ipoib_multicast.c revision e6ded99cbbbfef2cef537d717ad61d2f77f4dfd6
1/* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ 33 */ 34 35#include <linux/skbuff.h> 36#include <linux/rtnetlink.h> 37#include <linux/ip.h> 38#include <linux/in.h> 39#include <linux/igmp.h> 40#include <linux/inetdevice.h> 41#include <linux/delay.h> 42#include <linux/completion.h> 43 44#include "ipoib.h" 45 46#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 47static int mcast_debug_level; 48 49module_param(mcast_debug_level, int, 0644); 50MODULE_PARM_DESC(mcast_debug_level, 51 "Enable multicast debug tracing if > 0"); 52#endif 53 54static DECLARE_MUTEX(mcast_mutex); 55 56/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 57struct ipoib_mcast { 58 struct ib_sa_mcmember_rec mcmember; 59 struct ipoib_ah *ah; 60 61 struct rb_node rb_node; 62 struct list_head list; 63 struct completion done; 64 65 int query_id; 66 struct ib_sa_query *query; 67 68 unsigned long created; 69 unsigned long backoff; 70 71 unsigned long flags; 72 unsigned char logcount; 73 74 struct list_head neigh_list; 75 76 struct sk_buff_head pkt_queue; 77 78 struct net_device *dev; 79}; 80 81struct ipoib_mcast_iter { 82 struct net_device *dev; 83 union ib_gid mgid; 84 unsigned long created; 85 unsigned int queuelen; 86 unsigned int complete; 87 unsigned int send_only; 88}; 89 90static void ipoib_mcast_free(struct ipoib_mcast *mcast) 91{ 92 struct net_device *dev = mcast->dev; 93 struct ipoib_dev_priv *priv = netdev_priv(dev); 94 struct ipoib_neigh *neigh, *tmp; 95 unsigned long flags; 96 LIST_HEAD(ah_list); 97 struct ipoib_ah *ah, *tah; 98 99 ipoib_dbg_mcast(netdev_priv(dev), 100 "deleting multicast group " IPOIB_GID_FMT "\n", 101 IPOIB_GID_ARG(mcast->mcmember.mgid)); 102 103 spin_lock_irqsave(&priv->lock, flags); 104 105 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { 106 if (neigh->ah) 107 list_add_tail(&neigh->ah->list, &ah_list); 108 *to_ipoib_neigh(neigh->neighbour) = NULL; 109 neigh->neighbour->ops->destructor = NULL; 110 kfree(neigh); 111 } 112 113 spin_unlock_irqrestore(&priv->lock, flags); 114 115 list_for_each_entry_safe(ah, tah, &ah_list, list) 116 ipoib_put_ah(ah); 117 118 if (mcast->ah) 119 ipoib_put_ah(mcast->ah); 120 121 while (!skb_queue_empty(&mcast->pkt_queue)) { 122 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 123 124 skb->dev = dev; 125 dev_kfree_skb_any(skb); 126 } 127 128 kfree(mcast); 129} 130 131static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, 132 int can_sleep) 133{ 134 struct ipoib_mcast *mcast; 135 136 mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); 137 if (!mcast) 138 return NULL; 139 140 memset(mcast, 0, sizeof (*mcast)); 141 142 init_completion(&mcast->done); 143 144 mcast->dev = dev; 145 mcast->created = jiffies; 146 mcast->backoff = HZ; 147 mcast->logcount = 0; 148 149 INIT_LIST_HEAD(&mcast->list); 150 INIT_LIST_HEAD(&mcast->neigh_list); 151 skb_queue_head_init(&mcast->pkt_queue); 152 153 mcast->ah = NULL; 154 mcast->query = NULL; 155 156 return mcast; 157} 158 159static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, union ib_gid *mgid) 160{ 161 struct ipoib_dev_priv *priv = netdev_priv(dev); 162 struct rb_node *n = priv->multicast_tree.rb_node; 163 164 while (n) { 165 struct ipoib_mcast *mcast; 166 int ret; 167 168 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 169 170 ret = memcmp(mgid->raw, mcast->mcmember.mgid.raw, 171 sizeof (union ib_gid)); 172 if (ret < 0) 173 n = n->rb_left; 174 else if (ret > 0) 175 n = n->rb_right; 176 else 177 return mcast; 178 } 179 180 return NULL; 181} 182 183static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) 184{ 185 struct ipoib_dev_priv *priv = netdev_priv(dev); 186 struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; 187 188 while (*n) { 189 struct ipoib_mcast *tmcast; 190 int ret; 191 192 pn = *n; 193 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); 194 195 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, 196 sizeof (union ib_gid)); 197 if (ret < 0) 198 n = &pn->rb_left; 199 else if (ret > 0) 200 n = &pn->rb_right; 201 else 202 return -EEXIST; 203 } 204 205 rb_link_node(&mcast->rb_node, pn, n); 206 rb_insert_color(&mcast->rb_node, &priv->multicast_tree); 207 208 return 0; 209} 210 211static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, 212 struct ib_sa_mcmember_rec *mcmember) 213{ 214 struct net_device *dev = mcast->dev; 215 struct ipoib_dev_priv *priv = netdev_priv(dev); 216 int ret; 217 218 mcast->mcmember = *mcmember; 219 220 /* Set the cached Q_Key before we attach if it's the broadcast group */ 221 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 222 sizeof (union ib_gid))) { 223 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 224 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 225 } 226 227 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 228 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 229 ipoib_warn(priv, "multicast group " IPOIB_GID_FMT 230 " already attached\n", 231 IPOIB_GID_ARG(mcast->mcmember.mgid)); 232 233 return 0; 234 } 235 236 ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), 237 &mcast->mcmember.mgid); 238 if (ret < 0) { 239 ipoib_warn(priv, "couldn't attach QP to multicast group " 240 IPOIB_GID_FMT "\n", 241 IPOIB_GID_ARG(mcast->mcmember.mgid)); 242 243 clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); 244 return ret; 245 } 246 } 247 248 { 249 struct ib_ah_attr av = { 250 .dlid = be16_to_cpu(mcast->mcmember.mlid), 251 .port_num = priv->port, 252 .sl = mcast->mcmember.sl, 253 .ah_flags = IB_AH_GRH, 254 .grh = { 255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 256 .hop_limit = mcast->mcmember.hop_limit, 257 .sgid_index = 0, 258 .traffic_class = mcast->mcmember.traffic_class 259 } 260 }; 261 int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate); 262 263 av.grh.dgid = mcast->mcmember.mgid; 264 265 if (path_rate > 0 && priv->local_rate > path_rate) 266 av.static_rate = (priv->local_rate - 1) / path_rate; 267 268 ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n", 269 av.static_rate, priv->local_rate, 270 ib_sa_rate_enum_to_int(mcast->mcmember.rate)); 271 272 mcast->ah = ipoib_create_ah(dev, priv->pd, &av); 273 if (!mcast->ah) { 274 ipoib_warn(priv, "ib_address_create failed\n"); 275 } else { 276 ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT 277 " AV %p, LID 0x%04x, SL %d\n", 278 IPOIB_GID_ARG(mcast->mcmember.mgid), 279 mcast->ah->ah, 280 be16_to_cpu(mcast->mcmember.mlid), 281 mcast->mcmember.sl); 282 } 283 } 284 285 /* actually send any queued packets */ 286 while (!skb_queue_empty(&mcast->pkt_queue)) { 287 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 288 289 skb->dev = dev; 290 291 if (!skb->dst || !skb->dst->neighbour) { 292 /* put pseudoheader back on for next time */ 293 skb_push(skb, sizeof (struct ipoib_pseudoheader)); 294 } 295 296 if (dev_queue_xmit(skb)) 297 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 298 } 299 300 return 0; 301} 302 303static void 304ipoib_mcast_sendonly_join_complete(int status, 305 struct ib_sa_mcmember_rec *mcmember, 306 void *mcast_ptr) 307{ 308 struct ipoib_mcast *mcast = mcast_ptr; 309 struct net_device *dev = mcast->dev; 310 311 if (!status) 312 ipoib_mcast_join_finish(mcast, mcmember); 313 else { 314 if (mcast->logcount++ < 20) 315 ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " 316 IPOIB_GID_FMT ", status %d\n", 317 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 318 319 /* Flush out any queued packets */ 320 while (!skb_queue_empty(&mcast->pkt_queue)) { 321 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 322 323 skb->dev = dev; 324 325 dev_kfree_skb_any(skb); 326 } 327 328 /* Clear the busy flag so we try again */ 329 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 330 } 331 332 complete(&mcast->done); 333} 334 335static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) 336{ 337 struct net_device *dev = mcast->dev; 338 struct ipoib_dev_priv *priv = netdev_priv(dev); 339 struct ib_sa_mcmember_rec rec = { 340#if 0 /* Some SMs don't support send-only yet */ 341 .join_state = 4 342#else 343 .join_state = 1 344#endif 345 }; 346 int ret = 0; 347 348 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { 349 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); 350 return -ENODEV; 351 } 352 353 if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { 354 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); 355 return -EBUSY; 356 } 357 358 rec.mgid = mcast->mcmember.mgid; 359 rec.port_gid = priv->local_gid; 360 rec.pkey = be16_to_cpu(priv->pkey); 361 362 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, 363 IB_SA_MCMEMBER_REC_MGID | 364 IB_SA_MCMEMBER_REC_PORT_GID | 365 IB_SA_MCMEMBER_REC_PKEY | 366 IB_SA_MCMEMBER_REC_JOIN_STATE, 367 1000, GFP_ATOMIC, 368 ipoib_mcast_sendonly_join_complete, 369 mcast, &mcast->query); 370 if (ret < 0) { 371 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n", 372 ret); 373 } else { 374 ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT 375 ", starting join\n", 376 IPOIB_GID_ARG(mcast->mcmember.mgid)); 377 378 mcast->query_id = ret; 379 } 380 381 return ret; 382} 383 384static void ipoib_mcast_join_complete(int status, 385 struct ib_sa_mcmember_rec *mcmember, 386 void *mcast_ptr) 387{ 388 struct ipoib_mcast *mcast = mcast_ptr; 389 struct net_device *dev = mcast->dev; 390 struct ipoib_dev_priv *priv = netdev_priv(dev); 391 392 ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT 393 " (status %d)\n", 394 IPOIB_GID_ARG(mcast->mcmember.mgid), status); 395 396 if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { 397 mcast->backoff = HZ; 398 down(&mcast_mutex); 399 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 400 queue_work(ipoib_workqueue, &priv->mcast_task); 401 up(&mcast_mutex); 402 complete(&mcast->done); 403 return; 404 } 405 406 if (status == -EINTR) { 407 complete(&mcast->done); 408 return; 409 } 410 411 if (status && mcast->logcount++ < 20) { 412 if (status == -ETIMEDOUT || status == -EINTR) { 413 ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT 414 ", status %d\n", 415 IPOIB_GID_ARG(mcast->mcmember.mgid), 416 status); 417 } else { 418 ipoib_warn(priv, "multicast join failed for " 419 IPOIB_GID_FMT ", status %d\n", 420 IPOIB_GID_ARG(mcast->mcmember.mgid), 421 status); 422 } 423 } 424 425 mcast->backoff *= 2; 426 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 427 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 428 429 mcast->query = NULL; 430 431 down(&mcast_mutex); 432 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) { 433 if (status == -ETIMEDOUT) 434 queue_work(ipoib_workqueue, &priv->mcast_task); 435 else 436 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 437 mcast->backoff * HZ); 438 } else 439 complete(&mcast->done); 440 up(&mcast_mutex); 441 442 return; 443} 444 445static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, 446 int create) 447{ 448 struct ipoib_dev_priv *priv = netdev_priv(dev); 449 struct ib_sa_mcmember_rec rec = { 450 .join_state = 1 451 }; 452 ib_sa_comp_mask comp_mask; 453 int ret = 0; 454 455 ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", 456 IPOIB_GID_ARG(mcast->mcmember.mgid)); 457 458 rec.mgid = mcast->mcmember.mgid; 459 rec.port_gid = priv->local_gid; 460 rec.pkey = be16_to_cpu(priv->pkey); 461 462 comp_mask = 463 IB_SA_MCMEMBER_REC_MGID | 464 IB_SA_MCMEMBER_REC_PORT_GID | 465 IB_SA_MCMEMBER_REC_PKEY | 466 IB_SA_MCMEMBER_REC_JOIN_STATE; 467 468 if (create) { 469 comp_mask |= 470 IB_SA_MCMEMBER_REC_QKEY | 471 IB_SA_MCMEMBER_REC_SL | 472 IB_SA_MCMEMBER_REC_FLOW_LABEL | 473 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 474 475 rec.qkey = priv->broadcast->mcmember.qkey; 476 rec.sl = priv->broadcast->mcmember.sl; 477 rec.flow_label = priv->broadcast->mcmember.flow_label; 478 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 479 } 480 481 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, 482 mcast->backoff * 1000, GFP_ATOMIC, 483 ipoib_mcast_join_complete, 484 mcast, &mcast->query); 485 486 if (ret < 0) { 487 ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret); 488 489 mcast->backoff *= 2; 490 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) 491 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; 492 493 down(&mcast_mutex); 494 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 495 queue_delayed_work(ipoib_workqueue, 496 &priv->mcast_task, 497 mcast->backoff); 498 up(&mcast_mutex); 499 } else 500 mcast->query_id = ret; 501} 502 503void ipoib_mcast_join_task(void *dev_ptr) 504{ 505 struct net_device *dev = dev_ptr; 506 struct ipoib_dev_priv *priv = netdev_priv(dev); 507 508 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 509 return; 510 511 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 512 ipoib_warn(priv, "ib_gid_entry_get() failed\n"); 513 else 514 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 515 516 { 517 struct ib_port_attr attr; 518 519 if (!ib_query_port(priv->ca, priv->port, &attr)) { 520 priv->local_lid = attr.lid; 521 priv->local_rate = attr.active_speed * 522 ib_width_enum_to_int(attr.active_width); 523 } else 524 ipoib_warn(priv, "ib_query_port failed\n"); 525 } 526 527 if (!priv->broadcast) { 528 priv->broadcast = ipoib_mcast_alloc(dev, 1); 529 if (!priv->broadcast) { 530 ipoib_warn(priv, "failed to allocate broadcast group\n"); 531 down(&mcast_mutex); 532 if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) 533 queue_delayed_work(ipoib_workqueue, 534 &priv->mcast_task, HZ); 535 up(&mcast_mutex); 536 return; 537 } 538 539 memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 540 sizeof (union ib_gid)); 541 542 spin_lock_irq(&priv->lock); 543 __ipoib_mcast_add(dev, priv->broadcast); 544 spin_unlock_irq(&priv->lock); 545 } 546 547 if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { 548 ipoib_mcast_join(dev, priv->broadcast, 0); 549 return; 550 } 551 552 while (1) { 553 struct ipoib_mcast *mcast = NULL; 554 555 spin_lock_irq(&priv->lock); 556 list_for_each_entry(mcast, &priv->multicast_list, list) { 557 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) 558 && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) 559 && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { 560 /* Found the next unjoined group */ 561 break; 562 } 563 } 564 spin_unlock_irq(&priv->lock); 565 566 if (&mcast->list == &priv->multicast_list) { 567 /* All done */ 568 break; 569 } 570 571 ipoib_mcast_join(dev, mcast, 1); 572 return; 573 } 574 575 priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - 576 IPOIB_ENCAP_LEN; 577 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 578 579 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 580 581 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 582 netif_carrier_on(dev); 583} 584 585int ipoib_mcast_start_thread(struct net_device *dev) 586{ 587 struct ipoib_dev_priv *priv = netdev_priv(dev); 588 589 ipoib_dbg_mcast(priv, "starting multicast thread\n"); 590 591 down(&mcast_mutex); 592 if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) 593 queue_work(ipoib_workqueue, &priv->mcast_task); 594 up(&mcast_mutex); 595 596 return 0; 597} 598 599int ipoib_mcast_stop_thread(struct net_device *dev) 600{ 601 struct ipoib_dev_priv *priv = netdev_priv(dev); 602 struct ipoib_mcast *mcast; 603 604 ipoib_dbg_mcast(priv, "stopping multicast thread\n"); 605 606 down(&mcast_mutex); 607 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 608 cancel_delayed_work(&priv->mcast_task); 609 up(&mcast_mutex); 610 611 flush_workqueue(ipoib_workqueue); 612 613 if (priv->broadcast && priv->broadcast->query) { 614 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); 615 priv->broadcast->query = NULL; 616 ipoib_dbg_mcast(priv, "waiting for bcast\n"); 617 wait_for_completion(&priv->broadcast->done); 618 } 619 620 list_for_each_entry(mcast, &priv->multicast_list, list) { 621 if (mcast->query) { 622 ib_sa_cancel_query(mcast->query_id, mcast->query); 623 mcast->query = NULL; 624 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", 625 IPOIB_GID_ARG(mcast->mcmember.mgid)); 626 wait_for_completion(&mcast->done); 627 } 628 } 629 630 return 0; 631} 632 633static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 634{ 635 struct ipoib_dev_priv *priv = netdev_priv(dev); 636 struct ib_sa_mcmember_rec rec = { 637 .join_state = 1 638 }; 639 int ret = 0; 640 641 if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) 642 return 0; 643 644 ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", 645 IPOIB_GID_ARG(mcast->mcmember.mgid)); 646 647 rec.mgid = mcast->mcmember.mgid; 648 rec.port_gid = priv->local_gid; 649 rec.pkey = be16_to_cpu(priv->pkey); 650 651 /* Remove ourselves from the multicast group */ 652 ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), 653 &mcast->mcmember.mgid); 654 if (ret) 655 ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); 656 657 /* 658 * Just make one shot at leaving and don't wait for a reply; 659 * if we fail, too bad. 660 */ 661 ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, 662 IB_SA_MCMEMBER_REC_MGID | 663 IB_SA_MCMEMBER_REC_PORT_GID | 664 IB_SA_MCMEMBER_REC_PKEY | 665 IB_SA_MCMEMBER_REC_JOIN_STATE, 666 0, GFP_ATOMIC, NULL, 667 mcast, &mcast->query); 668 if (ret < 0) 669 ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed " 670 "for leave (result = %d)\n", ret); 671 672 return 0; 673} 674 675void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, 676 struct sk_buff *skb) 677{ 678 struct ipoib_dev_priv *priv = netdev_priv(dev); 679 struct ipoib_mcast *mcast; 680 681 /* 682 * We can only be called from ipoib_start_xmit, so we're 683 * inside tx_lock -- no need to save/restore flags. 684 */ 685 spin_lock(&priv->lock); 686 687 mcast = __ipoib_mcast_find(dev, mgid); 688 if (!mcast) { 689 /* Let's create a new send only group now */ 690 ipoib_dbg_mcast(priv, "setting up send only multicast group for " 691 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(*mgid)); 692 693 mcast = ipoib_mcast_alloc(dev, 0); 694 if (!mcast) { 695 ipoib_warn(priv, "unable to allocate memory for " 696 "multicast structure\n"); 697 dev_kfree_skb_any(skb); 698 goto out; 699 } 700 701 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); 702 mcast->mcmember.mgid = *mgid; 703 __ipoib_mcast_add(dev, mcast); 704 list_add_tail(&mcast->list, &priv->multicast_list); 705 } 706 707 if (!mcast->ah) { 708 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) 709 skb_queue_tail(&mcast->pkt_queue, skb); 710 else 711 dev_kfree_skb_any(skb); 712 713 if (mcast->query) 714 ipoib_dbg_mcast(priv, "no address vector, " 715 "but multicast join already started\n"); 716 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) 717 ipoib_mcast_sendonly_join(mcast); 718 719 /* 720 * If lookup completes between here and out:, don't 721 * want to send packet twice. 722 */ 723 mcast = NULL; 724 } 725 726out: 727 if (mcast && mcast->ah) { 728 if (skb->dst && 729 skb->dst->neighbour && 730 !*to_ipoib_neigh(skb->dst->neighbour)) { 731 struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 732 733 if (neigh) { 734 kref_get(&mcast->ah->ref); 735 neigh->ah = mcast->ah; 736 neigh->neighbour = skb->dst->neighbour; 737 *to_ipoib_neigh(skb->dst->neighbour) = neigh; 738 list_add_tail(&neigh->list, &mcast->neigh_list); 739 } 740 } 741 742 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 743 } 744 745 spin_unlock(&priv->lock); 746} 747 748void ipoib_mcast_dev_flush(struct net_device *dev) 749{ 750 struct ipoib_dev_priv *priv = netdev_priv(dev); 751 LIST_HEAD(remove_list); 752 struct ipoib_mcast *mcast, *tmcast, *nmcast; 753 unsigned long flags; 754 755 ipoib_dbg_mcast(priv, "flushing multicast list\n"); 756 757 spin_lock_irqsave(&priv->lock, flags); 758 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 759 nmcast = ipoib_mcast_alloc(dev, 0); 760 if (nmcast) { 761 nmcast->flags = 762 mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY); 763 764 nmcast->mcmember.mgid = mcast->mcmember.mgid; 765 766 /* Add the new group in before the to-be-destroyed group */ 767 list_add_tail(&nmcast->list, &mcast->list); 768 list_del_init(&mcast->list); 769 770 rb_replace_node(&mcast->rb_node, &nmcast->rb_node, 771 &priv->multicast_tree); 772 773 list_add_tail(&mcast->list, &remove_list); 774 } else { 775 ipoib_warn(priv, "could not reallocate multicast group " 776 IPOIB_GID_FMT "\n", 777 IPOIB_GID_ARG(mcast->mcmember.mgid)); 778 } 779 } 780 781 if (priv->broadcast) { 782 nmcast = ipoib_mcast_alloc(dev, 0); 783 if (nmcast) { 784 nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid; 785 786 rb_replace_node(&priv->broadcast->rb_node, 787 &nmcast->rb_node, 788 &priv->multicast_tree); 789 790 list_add_tail(&priv->broadcast->list, &remove_list); 791 } 792 793 priv->broadcast = nmcast; 794 } 795 796 spin_unlock_irqrestore(&priv->lock, flags); 797 798 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 799 ipoib_mcast_leave(dev, mcast); 800 ipoib_mcast_free(mcast); 801 } 802} 803 804void ipoib_mcast_dev_down(struct net_device *dev) 805{ 806 struct ipoib_dev_priv *priv = netdev_priv(dev); 807 unsigned long flags; 808 809 /* Delete broadcast since it will be recreated */ 810 if (priv->broadcast) { 811 ipoib_dbg_mcast(priv, "deleting broadcast group\n"); 812 813 spin_lock_irqsave(&priv->lock, flags); 814 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 815 spin_unlock_irqrestore(&priv->lock, flags); 816 ipoib_mcast_leave(dev, priv->broadcast); 817 ipoib_mcast_free(priv->broadcast); 818 priv->broadcast = NULL; 819 } 820} 821 822void ipoib_mcast_restart_task(void *dev_ptr) 823{ 824 struct net_device *dev = dev_ptr; 825 struct ipoib_dev_priv *priv = netdev_priv(dev); 826 struct dev_mc_list *mclist; 827 struct ipoib_mcast *mcast, *tmcast; 828 LIST_HEAD(remove_list); 829 unsigned long flags; 830 831 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 832 833 ipoib_mcast_stop_thread(dev); 834 835 spin_lock_irqsave(&priv->lock, flags); 836 837 /* 838 * Unfortunately, the networking core only gives us a list of all of 839 * the multicast hardware addresses. We need to figure out which ones 840 * are new and which ones have been removed 841 */ 842 843 /* Clear out the found flag */ 844 list_for_each_entry(mcast, &priv->multicast_list, list) 845 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 846 847 /* Mark all of the entries that are found or don't exist */ 848 for (mclist = dev->mc_list; mclist; mclist = mclist->next) { 849 union ib_gid mgid; 850 851 memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); 852 853 /* Add in the P_Key */ 854 mgid.raw[4] = (priv->pkey >> 8) & 0xff; 855 mgid.raw[5] = priv->pkey & 0xff; 856 857 mcast = __ipoib_mcast_find(dev, &mgid); 858 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 859 struct ipoib_mcast *nmcast; 860 861 /* Not found or send-only group, let's add a new entry */ 862 ipoib_dbg_mcast(priv, "adding multicast entry for mgid " 863 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); 864 865 nmcast = ipoib_mcast_alloc(dev, 0); 866 if (!nmcast) { 867 ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); 868 continue; 869 } 870 871 set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); 872 873 nmcast->mcmember.mgid = mgid; 874 875 if (mcast) { 876 /* Destroy the send only entry */ 877 list_del(&mcast->list); 878 list_add_tail(&mcast->list, &remove_list); 879 880 rb_replace_node(&mcast->rb_node, 881 &nmcast->rb_node, 882 &priv->multicast_tree); 883 } else 884 __ipoib_mcast_add(dev, nmcast); 885 886 list_add_tail(&nmcast->list, &priv->multicast_list); 887 } 888 889 if (mcast) 890 set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); 891 } 892 893 /* Remove all of the entries don't exist anymore */ 894 list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { 895 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && 896 !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 897 ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", 898 IPOIB_GID_ARG(mcast->mcmember.mgid)); 899 900 rb_erase(&mcast->rb_node, &priv->multicast_tree); 901 902 /* Move to the remove list */ 903 list_del(&mcast->list); 904 list_add_tail(&mcast->list, &remove_list); 905 } 906 } 907 spin_unlock_irqrestore(&priv->lock, flags); 908 909 /* We have to cancel outside of the spinlock */ 910 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 911 ipoib_mcast_leave(mcast->dev, mcast); 912 ipoib_mcast_free(mcast); 913 } 914 915 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 916 ipoib_mcast_start_thread(dev); 917} 918 919struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) 920{ 921 struct ipoib_mcast_iter *iter; 922 923 iter = kmalloc(sizeof *iter, GFP_KERNEL); 924 if (!iter) 925 return NULL; 926 927 iter->dev = dev; 928 memset(iter->mgid.raw, 0, sizeof iter->mgid); 929 930 if (ipoib_mcast_iter_next(iter)) { 931 ipoib_mcast_iter_free(iter); 932 return NULL; 933 } 934 935 return iter; 936} 937 938void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter) 939{ 940 kfree(iter); 941} 942 943int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) 944{ 945 struct ipoib_dev_priv *priv = netdev_priv(iter->dev); 946 struct rb_node *n; 947 struct ipoib_mcast *mcast; 948 int ret = 1; 949 950 spin_lock_irq(&priv->lock); 951 952 n = rb_first(&priv->multicast_tree); 953 954 while (n) { 955 mcast = rb_entry(n, struct ipoib_mcast, rb_node); 956 957 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, 958 sizeof (union ib_gid)) < 0) { 959 iter->mgid = mcast->mcmember.mgid; 960 iter->created = mcast->created; 961 iter->queuelen = skb_queue_len(&mcast->pkt_queue); 962 iter->complete = !!mcast->ah; 963 iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); 964 965 ret = 0; 966 967 break; 968 } 969 970 n = rb_next(n); 971 } 972 973 spin_unlock_irq(&priv->lock); 974 975 return ret; 976} 977 978void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, 979 union ib_gid *mgid, 980 unsigned long *created, 981 unsigned int *queuelen, 982 unsigned int *complete, 983 unsigned int *send_only) 984{ 985 *mgid = iter->mgid; 986 *created = iter->created; 987 *queuelen = iter->queuelen; 988 *complete = iter->complete; 989 *send_only = iter->send_only; 990} 991