1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39
40 #include "ipoib.h"
41
42 #include <linux/delay.h>
43 #include <linux/completion.h>
44
45 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
46 static int mcast_debug_level = 1;
47
48 module_param(mcast_debug_level, int, 0644);
49 MODULE_PARM_DESC(mcast_debug_level,
50 "Enable multicast debug tracing if > 0");
51 #endif
52
53 static DEFINE_MUTEX(mcast_mutex);
54
55 struct ipoib_mcast_iter {
56 struct ipoib_dev_priv *priv;
57 union ib_gid mgid;
58 unsigned long created;
59 unsigned int queuelen;
60 unsigned int complete;
61 unsigned int send_only;
62 };
63
64 static void ipoib_mcast_free(struct ipoib_mcast *mcast)
65 {
66 struct ifnet *dev = mcast->priv->dev;
67 int tx_dropped = 0;
68
69 ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n",
70 mcast->mcmember.mgid.raw, ":");
71
72 if (mcast->ah)
73 ipoib_put_ah(mcast->ah);
74
75 tx_dropped = mcast->pkt_queue.ifq_len;
76 _IF_DRAIN(&mcast->pkt_queue); /* XXX Locking. */
77
78 if_inc_counter(dev, IFCOUNTER_OERRORS, tx_dropped);
79
80 kfree(mcast);
81 }
82
83 static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv,
84 int can_sleep)
85 {
86 struct ipoib_mcast *mcast;
87
88 mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
89 if (!mcast)
90 return NULL;
91
92 mcast->priv = priv;
93 mcast->created = jiffies;
94 mcast->backoff = 1;
95
96 INIT_LIST_HEAD(&mcast->list);
97 bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue));
98
99 return mcast;
100 }
101
102 static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv,
103 void *mgid)
104 {
105 struct rb_node *n = priv->multicast_tree.rb_node;
106
107 while (n) {
108 struct ipoib_mcast *mcast;
109 int ret;
110
111 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
112
113 ret = memcmp(mgid, mcast->mcmember.mgid.raw,
114 sizeof (union ib_gid));
115 if (ret < 0)
116 n = n->rb_left;
117 else if (ret > 0)
118 n = n->rb_right;
119 else
120 return mcast;
121 }
122
123 return NULL;
124 }
125
126 static int __ipoib_mcast_add(struct ipoib_dev_priv *priv,
127 struct ipoib_mcast *mcast)
128 {
129 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
130
131 while (*n) {
132 struct ipoib_mcast *tmcast;
133 int ret;
134
135 pn = *n;
136 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
137
138 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
139 sizeof (union ib_gid));
140 if (ret < 0)
141 n = &pn->rb_left;
142 else if (ret > 0)
143 n = &pn->rb_right;
144 else
145 return -EEXIST;
146 }
147
148 rb_link_node(&mcast->rb_node, pn, n);
149 rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
150
151 return 0;
152 }
153
154 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
155 struct ib_sa_mcmember_rec *mcmember)
156 {
157 struct ipoib_dev_priv *priv = mcast->priv;
158 struct ifnet *dev = priv->dev;
159 struct ipoib_ah *ah;
160 struct epoch_tracker et;
161 int ret;
162 int set_qkey = 0;
163
164 mcast->mcmember = *mcmember;
165
166 /* Set the cached Q_Key before we attach if it's the broadcast group */
167 if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
168 sizeof (union ib_gid))) {
169 spin_lock_irq(&priv->lock);
170 if (!priv->broadcast) {
171 spin_unlock_irq(&priv->lock);
172 return -EAGAIN;
173 }
174 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
175 spin_unlock_irq(&priv->lock);
176 priv->tx_wr.remote_qkey = priv->qkey;
177 set_qkey = 1;
178 }
179
180 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
181 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
182 ipoib_warn(priv, "multicast group %16D already attached\n",
183 mcast->mcmember.mgid.raw, ":");
184
185 return 0;
186 }
187
188 ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid),
189 &mcast->mcmember.mgid, set_qkey);
190 if (ret < 0) {
191 ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n",
192 mcast->mcmember.mgid.raw, ":");
193
194 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
195 return ret;
196 }
197 }
198
199 {
200 struct ib_ah_attr av = {
201 .dlid = be16_to_cpu(mcast->mcmember.mlid),
202 .port_num = priv->port,
203 .sl = mcast->mcmember.sl,
204 .ah_flags = IB_AH_GRH,
205 .static_rate = mcast->mcmember.rate,
206 .grh = {
207 .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
208 .hop_limit = mcast->mcmember.hop_limit,
209 .sgid_index = 0,
210 .traffic_class = mcast->mcmember.traffic_class
211 }
212 };
213 av.grh.dgid = mcast->mcmember.mgid;
214
215 ah = ipoib_create_ah(priv, priv->pd, &av);
216 if (!ah) {
217 ipoib_warn(priv, "ib_address_create failed\n");
218 } else {
219 spin_lock_irq(&priv->lock);
220 mcast->ah = ah;
221 spin_unlock_irq(&priv->lock);
222
223 ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n",
224 mcast->mcmember.mgid.raw, ":",
225 mcast->ah->ah,
226 be16_to_cpu(mcast->mcmember.mlid),
227 mcast->mcmember.sl);
228 }
229 }
230
231 NET_EPOCH_ENTER(et);
232
233 /* actually send any queued packets */
234 while (mcast->pkt_queue.ifq_len) {
235 struct mbuf *mb;
236 _IF_DEQUEUE(&mcast->pkt_queue, mb);
237 mb->m_pkthdr.rcvif = dev;
238
239 if (dev->if_transmit(dev, mb))
240 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
241 }
242
243 NET_EPOCH_EXIT(et);
244 return 0;
245 }
246
247 static int
248 ipoib_mcast_sendonly_join_complete(int status,
249 struct ib_sa_multicast *multicast)
250 {
251 struct ipoib_mcast *mcast = multicast->context;
252 struct ipoib_dev_priv *priv = mcast->priv;
253
254 /* We trap for port events ourselves. */
255 if (status == -ENETRESET)
256 return 0;
257
258 if (!status)
259 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
260
261 if (status) {
262 if (mcast->logcount++ < 20)
263 ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
264 mcast->mcmember.mgid.raw, ":", status);
265
266 /* Flush out any queued packets */
267 if_inc_counter(priv->dev, IFCOUNTER_OERRORS, mcast->pkt_queue.ifq_len);
268 _IF_DRAIN(&mcast->pkt_queue);
269
270 /* Clear the busy flag so we try again */
271 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
272 &mcast->flags);
273 }
274 return status;
275 }
276
277 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
278 {
279 struct ipoib_dev_priv *priv = mcast->priv;
280 struct ib_sa_mcmember_rec rec = {
281 #if 0 /* Some SMs don't support send-only yet */
282 .join_state = 4
283 #else
284 .join_state = 1
285 #endif
286 };
287 int ret = 0;
288
289 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
290 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
291 return -ENODEV;
292 }
293
294 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
295 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
296 return -EBUSY;
297 }
298
299 rec.mgid = mcast->mcmember.mgid;
300 rec.port_gid = priv->local_gid;
301 rec.pkey = cpu_to_be16(priv->pkey);
302
303 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
304 priv->port, &rec,
305 IB_SA_MCMEMBER_REC_MGID |
306 IB_SA_MCMEMBER_REC_PORT_GID |
307 IB_SA_MCMEMBER_REC_PKEY |
308 IB_SA_MCMEMBER_REC_JOIN_STATE,
309 GFP_ATOMIC,
310 ipoib_mcast_sendonly_join_complete,
311 mcast);
312 if (IS_ERR(mcast->mc)) {
313 ret = PTR_ERR(mcast->mc);
314 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
315 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
316 ret);
317 } else {
318 ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n",
319 mcast->mcmember.mgid.raw, ":");
320 }
321
322 return ret;
323 }
324
325 void ipoib_mcast_carrier_on_task(struct work_struct *work)
326 {
327 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
328 carrier_on_task);
329 struct ib_port_attr attr;
330
331 /*
332 * Take rtnl_lock to avoid racing with ipoib_stop() and
333 * turning the carrier back on while a device is being
334 * removed.
335 */
336 if (ib_query_port(priv->ca, priv->port, &attr) ||
337 attr.state != IB_PORT_ACTIVE) {
338 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
339 return;
340 }
341 if_link_state_change(priv->dev, LINK_STATE_UP);
342 }
343
344 static int ipoib_mcast_join_complete(int status,
345 struct ib_sa_multicast *multicast)
346 {
347 struct ipoib_mcast *mcast = multicast->context;
348 struct ipoib_dev_priv *priv = mcast->priv;
349
350 ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n",
351 mcast->mcmember.mgid.raw, ":", status);
352
353 /* We trap for port events ourselves. */
354 if (status == -ENETRESET)
355 return 0;
356
357 if (!status)
358 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
359
360 if (!status) {
361 mcast->backoff = 1;
362 mutex_lock(&mcast_mutex);
363 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
364 queue_delayed_work(ipoib_workqueue,
365 &priv->mcast_task, 0);
366 mutex_unlock(&mcast_mutex);
367
368 /*
369 * Defer carrier on work to ipoib_workqueue to avoid a
370 * deadlock on rtnl_lock here.
371 */
372 if (mcast == priv->broadcast)
373 queue_work(ipoib_workqueue, &priv->carrier_on_task);
374
375 return 0;
376 }
377
378 if (mcast->logcount++ < 20) {
379 if (status == -ETIMEDOUT || status == -EAGAIN) {
380 ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
381 mcast->mcmember.mgid.raw, ":", status);
382 } else {
383 ipoib_warn(priv, "multicast join failed for %16D, status %d\n",
384 mcast->mcmember.mgid.raw, ":", status);
385 }
386 }
387
388 mcast->backoff *= 2;
389 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
390 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
391
392 /* Clear the busy flag so we try again */
393 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
394
395 mutex_lock(&mcast_mutex);
396 spin_lock_irq(&priv->lock);
397 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
398 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
399 mcast->backoff * HZ);
400 spin_unlock_irq(&priv->lock);
401 mutex_unlock(&mcast_mutex);
402
403 return status;
404 }
405
406 static void ipoib_mcast_join(struct ipoib_dev_priv *priv,
407 struct ipoib_mcast *mcast, int create)
408 {
409 struct ib_sa_mcmember_rec rec = {
410 .join_state = 1
411 };
412 ib_sa_comp_mask comp_mask;
413 int ret = 0;
414
415 ipoib_dbg_mcast(priv, "joining MGID %16D\n",
416 mcast->mcmember.mgid.raw, ":");
417
418 rec.mgid = mcast->mcmember.mgid;
419 rec.port_gid = priv->local_gid;
420 rec.pkey = cpu_to_be16(priv->pkey);
421
422 comp_mask =
423 IB_SA_MCMEMBER_REC_MGID |
424 IB_SA_MCMEMBER_REC_PORT_GID |
425 IB_SA_MCMEMBER_REC_PKEY |
426 IB_SA_MCMEMBER_REC_JOIN_STATE;
427
428 if (create) {
429 comp_mask |=
430 IB_SA_MCMEMBER_REC_QKEY |
431 IB_SA_MCMEMBER_REC_MTU_SELECTOR |
432 IB_SA_MCMEMBER_REC_MTU |
433 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
434 IB_SA_MCMEMBER_REC_RATE_SELECTOR |
435 IB_SA_MCMEMBER_REC_RATE |
436 IB_SA_MCMEMBER_REC_SL |
437 IB_SA_MCMEMBER_REC_FLOW_LABEL |
438 IB_SA_MCMEMBER_REC_HOP_LIMIT;
439
440 rec.qkey = priv->broadcast->mcmember.qkey;
441 rec.mtu_selector = IB_SA_EQ;
442 rec.mtu = priv->broadcast->mcmember.mtu;
443 rec.traffic_class = priv->broadcast->mcmember.traffic_class;
444 rec.rate_selector = IB_SA_EQ;
445 rec.rate = priv->broadcast->mcmember.rate;
446 rec.sl = priv->broadcast->mcmember.sl;
447 rec.flow_label = priv->broadcast->mcmember.flow_label;
448 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
449 }
450
451 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
452 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
453 &rec, comp_mask, GFP_KERNEL,
454 ipoib_mcast_join_complete, mcast);
455 if (IS_ERR(mcast->mc)) {
456 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
457 ret = PTR_ERR(mcast->mc);
458 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
459
460 mcast->backoff *= 2;
461 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
462 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
463
464 mutex_lock(&mcast_mutex);
465 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
466 queue_delayed_work(ipoib_workqueue,
467 &priv->mcast_task,
468 mcast->backoff * HZ);
469 mutex_unlock(&mcast_mutex);
470 }
471 }
472
473 void ipoib_mcast_join_task(struct work_struct *work)
474 {
475 struct ipoib_dev_priv *priv =
476 container_of(work, struct ipoib_dev_priv, mcast_task.work);
477 struct ifnet *dev = priv->dev;
478 struct ib_port_attr attr;
479
480 ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
481
482 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
483 return;
484
485 if (ib_query_port(priv->ca, priv->port, &attr) ||
486 attr.state != IB_PORT_ACTIVE) {
487 ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n",
488 __func__, attr.state);
489 return;
490 }
491
492 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
493 ipoib_warn(priv, "ib_query_gid() failed\n");
494 else
495 memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
496
497 {
498 struct ib_port_attr attr;
499
500 if (!ib_query_port(priv->ca, priv->port, &attr))
501 priv->local_lid = attr.lid;
502 else
503 ipoib_warn(priv, "ib_query_port failed\n");
504 }
505
506 if (!priv->broadcast) {
507 struct ipoib_mcast *broadcast;
508
509 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
510 return;
511
512 broadcast = ipoib_mcast_alloc(priv, 1);
513 if (!broadcast) {
514 ipoib_warn(priv, "failed to allocate broadcast group\n");
515 mutex_lock(&mcast_mutex);
516 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
517 queue_delayed_work(ipoib_workqueue,
518 &priv->mcast_task, HZ);
519 mutex_unlock(&mcast_mutex);
520 return;
521 }
522
523 spin_lock_irq(&priv->lock);
524 memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
525 sizeof (union ib_gid));
526 priv->broadcast = broadcast;
527
528 __ipoib_mcast_add(priv, priv->broadcast);
529 spin_unlock_irq(&priv->lock);
530 }
531
532 if (priv->broadcast &&
533 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
534 if (priv->broadcast &&
535 !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
536 ipoib_mcast_join(priv, priv->broadcast, 0);
537 return;
538 }
539
540 while (1) {
541 struct ipoib_mcast *mcast = NULL;
542
543 spin_lock_irq(&priv->lock);
544 list_for_each_entry(mcast, &priv->multicast_list, list) {
545 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
546 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
547 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
548 /* Found the next unjoined group */
549 break;
550 }
551 }
552 spin_unlock_irq(&priv->lock);
553
554 if (&mcast->list == &priv->multicast_list) {
555 /* All done */
556 break;
557 }
558
559 ipoib_mcast_join(priv, mcast, 1);
560 return;
561 }
562
563 spin_lock_irq(&priv->lock);
564 if (priv->broadcast)
565 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
566 else
567 priv->mcast_mtu = priv->admin_mtu;
568 spin_unlock_irq(&priv->lock);
569
570 if (!ipoib_cm_admin_enabled(priv))
571 ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu),
572 true);
573
574 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
575
576 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
577 }
578
579 int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv)
580 {
581 ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n",
582 priv->flags);
583
584 mutex_lock(&mcast_mutex);
585 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
586 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
587 mutex_unlock(&mcast_mutex);
588
589 return 0;
590 }
591
592 int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush)
593 {
594
595 ipoib_dbg_mcast(priv, "stopping multicast thread\n");
596
597 mutex_lock(&mcast_mutex);
598 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
599 cancel_delayed_work(&priv->mcast_task);
600 mutex_unlock(&mcast_mutex);
601
602 if (flush)
603 flush_workqueue(ipoib_workqueue);
604
605 return 0;
606 }
607
608 static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast)
609 {
610 int ret = 0;
611
612 if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
613 ib_sa_free_multicast(mcast->mc);
614
615 if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
616 ipoib_dbg_mcast(priv, "leaving MGID %16D\n",
617 mcast->mcmember.mgid.raw, ":");
618
619 /* Remove ourselves from the multicast group */
620 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
621 be16_to_cpu(mcast->mcmember.mlid));
622 if (ret)
623 ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
624 }
625
626 return 0;
627 }
628
629 void
630 ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb)
631 {
632 struct ifnet *dev = priv->dev;
633 struct ipoib_mcast *mcast;
634
635 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
636 !priv->broadcast ||
637 !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
638 if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
639 m_freem(mb);
640 return;
641 }
642
643 mcast = __ipoib_mcast_find(priv, mgid);
644 if (!mcast) {
645 /* Let's create a new send only group now */
646 ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n",
647 mgid, ":");
648
649 mcast = ipoib_mcast_alloc(priv, 0);
650 if (!mcast) {
651 ipoib_warn(priv, "unable to allocate memory for "
652 "multicast structure\n");
653 if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
654 m_freem(mb);
655 goto out;
656 }
657
658 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
659 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
660 __ipoib_mcast_add(priv, mcast);
661 list_add_tail(&mcast->list, &priv->multicast_list);
662 }
663
664 if (!mcast->ah) {
665 if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) {
666 _IF_ENQUEUE(&mcast->pkt_queue, mb);
667 } else {
668 if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
669 m_freem(mb);
670 }
671
672 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
673 ipoib_dbg_mcast(priv, "no address vector, "
674 "but multicast join already started\n");
675 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
676 ipoib_mcast_sendonly_join(mcast);
677
678 /*
679 * If lookup completes between here and out:, don't
680 * want to send packet twice.
681 */
682 mcast = NULL;
683 }
684
685 out:
686 if (mcast && mcast->ah)
687 ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN);
688 }
689
690 void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv)
691 {
692 LIST_HEAD(remove_list);
693 struct ipoib_mcast *mcast, *tmcast;
694 unsigned long flags;
695
696 ipoib_dbg_mcast(priv, "flushing multicast list\n");
697
698 spin_lock_irqsave(&priv->lock, flags);
699
700 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
701 list_del(&mcast->list);
702 rb_erase(&mcast->rb_node, &priv->multicast_tree);
703 list_add_tail(&mcast->list, &remove_list);
704 }
705
706 if (priv->broadcast) {
707 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
708 list_add_tail(&priv->broadcast->list, &remove_list);
709 priv->broadcast = NULL;
710 }
711
712 spin_unlock_irqrestore(&priv->lock, flags);
713
714 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
715 ipoib_mcast_leave(priv, mcast);
716 ipoib_mcast_free(mcast);
717 }
718 }
719
720 static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
721 const u8 *broadcast)
722 {
723 if (addrlen != INFINIBAND_ALEN)
724 return 0;
725 /* reserved QPN, prefix, scope */
726 if (memcmp(addr, broadcast, 6))
727 return 0;
728 /* signature lower, pkey */
729 if (memcmp(addr + 7, broadcast + 7, 3))
730 return 0;
731 return 1;
732 }
733
734 void ipoib_mcast_restart_task(struct work_struct *work)
735 {
736 struct ipoib_dev_priv *priv =
737 container_of(work, struct ipoib_dev_priv, restart_task);
738 ipoib_mcast_restart(priv);
739 }
740
741 struct ipoib_mcast_ctx {
742 struct ipoib_dev_priv *priv;
743 struct list_head remove_list;
744 };
745
746 static u_int
747 ipoib_process_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
748 {
749 struct ipoib_mcast_ctx *ctx = arg;
750 struct ipoib_dev_priv *priv = ctx->priv;
751 struct ipoib_mcast *mcast;
752 struct ib_sa_mcmember_rec rec;
753 union ib_gid mgid;
754 uint8_t *addr;
755 int addrlen;
756
757 addr = LLADDR(sdl);
758 addrlen = sdl->sdl_alen;
759 if (!ipoib_mcast_addr_is_valid(addr, addrlen,
760 priv->dev->if_broadcastaddr))
761 return (0);
762
763 memcpy(mgid.raw, addr + 4, sizeof mgid);
764
765 mcast = __ipoib_mcast_find(priv, &mgid);
766 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
767 struct ipoib_mcast *nmcast;
768
769 /* ignore group which is directly joined by userspace */
770 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
771 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
772 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n",
773 mgid.raw, ":");
774 return (0);
775 }
776
777 /* Not found or send-only group, let's add a new entry */
778 ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n",
779 mgid.raw, ":");
780
781 nmcast = ipoib_mcast_alloc(priv, 0);
782 if (!nmcast) {
783 ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
784 return (0);
785 }
786
787 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
788
789 nmcast->mcmember.mgid = mgid;
790
791 if (mcast) {
792 /* Destroy the send only entry */
793 list_move_tail(&mcast->list, &ctx->remove_list);
794
795 rb_replace_node(&mcast->rb_node,
796 &nmcast->rb_node,
797 &priv->multicast_tree);
798 } else
799 __ipoib_mcast_add(priv, nmcast);
800
801 list_add_tail(&nmcast->list, &priv->multicast_list);
802 }
803
804 if (mcast)
805 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
806
807 return (1);
808 }
809
810 void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
811 {
812 struct ipoib_mcast_ctx ctx = { priv,
813 { &ctx.remove_list, &ctx.remove_list }};
814 struct ifnet *dev = priv->dev;
815 struct ipoib_mcast *mcast, *tmcast;
816
817 ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n",
818 priv->flags);
819
820 ipoib_mcast_stop_thread(priv, 0);
821
822 spin_lock(&priv->lock);
823
824 /*
825 * Unfortunately, the networking core only gives us a list of all of
826 * the multicast hardware addresses. We need to figure out which ones
827 * are new and which ones have been removed
828 */
829
830 /* Clear out the found flag */
831 list_for_each_entry(mcast, &priv->multicast_list, list)
832 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
833
834 /* Mark all of the entries that are found or don't exist */
835 ctx.priv = priv;
836 if_foreach_llmaddr(dev, ipoib_process_maddr, &ctx);
837
838 /* Remove all of the entries don't exist anymore */
839 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
840 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
841 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
842 ipoib_dbg_mcast(priv, "deleting multicast group %16D\n",
843 mcast->mcmember.mgid.raw, ":");
844
845 rb_erase(&mcast->rb_node, &priv->multicast_tree);
846
847 /* Move to the remove list */
848 list_move_tail(&mcast->list, &ctx.remove_list);
849 }
850 }
851
852 spin_unlock(&priv->lock);
853
854 /* We have to cancel outside of the spinlock */
855 list_for_each_entry_safe(mcast, tmcast, &ctx.remove_list, list) {
856 ipoib_mcast_leave(mcast->priv, mcast);
857 ipoib_mcast_free(mcast);
858 }
859
860 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
861 ipoib_mcast_start_thread(priv);
862 }
863
864 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
865
866 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv)
867 {
868 struct ipoib_mcast_iter *iter;
869
870 iter = kmalloc(sizeof *iter, GFP_KERNEL);
871 if (!iter)
872 return NULL;
873
874 iter->priv = priv;
875 memset(iter->mgid.raw, 0, 16);
876
877 if (ipoib_mcast_iter_next(iter)) {
878 kfree(iter);
879 return NULL;
880 }
881
882 return iter;
883 }
884
885 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
886 {
887 struct ipoib_dev_priv *priv = iter->priv;
888 struct rb_node *n;
889 struct ipoib_mcast *mcast;
890 int ret = 1;
891
892 spin_lock_irq(&priv->lock);
893
894 n = rb_first(&priv->multicast_tree);
895
896 while (n) {
897 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
898
899 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
900 sizeof (union ib_gid)) < 0) {
901 iter->mgid = mcast->mcmember.mgid;
902 iter->created = mcast->created;
903 iter->queuelen = mcast->pkt_queue.ifq_len;
904 iter->complete = !!mcast->ah;
905 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
906
907 ret = 0;
908
909 break;
910 }
911
912 n = rb_next(n);
913 }
914
915 spin_unlock_irq(&priv->lock);
916
917 return ret;
918 }
919
920 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
921 union ib_gid *mgid,
922 unsigned long *created,
923 unsigned int *queuelen,
924 unsigned int *complete,
925 unsigned int *send_only)
926 {
927 *mgid = iter->mgid;
928 *created = iter->created;
929 *queuelen = iter->queuelen;
930 *complete = iter->complete;
931 *send_only = iter->send_only;
932 }
933
934 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
Cache object: 1a8bd8136b92009e11059729115da334
|