Commit a3bada70660fb020430135ec8a774ae1ea6bc9a9

Authored by Jon Maloy
Committed by David S. Miller
1 parent 399574d419

tipc: guarantee delivery of last broadcast before DOWN event

The following scenario is possible:
- A user sends a broadcast message, and thereafter immediately leaves
  the group.
- The LEAVE message, following a different path than the broadcast,
  arrives ahead of the broadcast, and the sending member is removed
  from the receiver's list.
- The broadcast message arrives, but is dropped because the sender
  now is unknown to the receipient.

We fix this by sequence numbering membership events, just like ordinary
unicast messages. Currently, when a JOIN is sent to a peer, it contains
a synchronization point, - the sequence number of the next sent
broadcast, in order to give the receiver a start synchronization point.
We now let even LEAVE messages contain such an "end synchronization"
point, so that the recipient can delay the removal of the sending member
until it knows that all messages have been received.

The received synchronization points are added as sequence numbers to the
generated membership events, making it possible to handle them almost
the same way as regular unicasts in the receiving filter function. In
particular, a DOWN event with a too high sequence number will be kept
in the reordering queue until the missing broadcast(s) arrive and have
been delivered.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 32 additions and 13 deletions Side-by-side Diff

... ... @@ -71,6 +71,7 @@
71 71 u16 advertised;
72 72 u16 window;
73 73 u16 bc_rcv_nxt;
  74 + u16 bc_syncpt;
74 75 u16 bc_acked;
75 76 bool usr_pending;
76 77 };
... ... @@ -410,7 +411,7 @@
410 411 struct sk_buff *_skb, *tmp;
411 412 int mtyp = msg_type(hdr);
412 413  
413   - /* Bcast may be bypassed by unicast or other bcast, - sort it in */
  414 + /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
414 415 if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
415 416 skb_queue_walk_safe(defq, _skb, tmp) {
416 417 _hdr = buf_msg(_skb);
... ... @@ -431,7 +432,7 @@
431 432 struct sk_buff_head *xmitq)
432 433 {
433 434 struct sk_buff *skb = __skb_dequeue(inputq);
434   - bool ack, deliver, update;
  435 + bool ack, deliver, update, leave = false;
435 436 struct sk_buff_head *defq;
436 437 struct tipc_member *m;
437 438 struct tipc_msg *hdr;
... ... @@ -448,13 +449,6 @@
448 449 if (!msg_in_group(hdr))
449 450 goto drop;
450 451  
451   - if (msg_is_grp_evt(hdr)) {
452   - if (!grp->events)
453   - goto drop;
454   - __skb_queue_tail(inputq, skb);
455   - return;
456   - }
457   -
458 452 m = tipc_group_find_member(grp, node, port);
459 453 if (!tipc_group_is_receiver(m))
460 454 goto drop;
... ... @@ -490,6 +484,12 @@
490 484 break;
491 485 case TIPC_GRP_UCAST_MSG:
492 486 break;
  487 + case TIPC_GRP_MEMBER_EVT:
  488 + if (m->state == MBR_LEAVING)
  489 + leave = true;
  490 + if (!grp->events)
  491 + deliver = false;
  492 + break;
493 493 default:
494 494 break;
495 495 }
... ... @@ -504,6 +504,11 @@
504 504 if (ack)
505 505 tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
506 506  
  507 + if (leave) {
  508 + tipc_group_delete_member(grp, m);
  509 + __skb_queue_purge(defq);
  510 + break;
  511 + }
507 512 if (!update)
508 513 continue;
509 514  
... ... @@ -561,6 +566,8 @@
561 566 msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
562 567 msg_set_adv_win(hdr, adv);
563 568 m->advertised += adv;
  569 + } else if (mtyp == GRP_LEAVE_MSG) {
  570 + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
564 571 } else if (mtyp == GRP_ADV_MSG) {
565 572 msg_set_adv_win(hdr, adv);
566 573 m->advertised += adv;
... ... @@ -577,6 +584,7 @@
577 584 u32 node = msg_orignode(hdr);
578 585 u32 port = msg_origport(hdr);
579 586 struct tipc_member *m;
  587 + struct tipc_msg *ehdr;
580 588  
581 589 if (!grp)
582 590 return;
... ... @@ -590,7 +598,8 @@
590 598 MBR_QUARANTINED);
591 599 if (!m)
592 600 return;
593   - m->bc_rcv_nxt = msg_grp_bc_syncpt(hdr);
  601 + m->bc_syncpt = msg_grp_bc_syncpt(hdr);
  602 + m->bc_rcv_nxt = m->bc_syncpt;
594 603 m->window += msg_adv_win(hdr);
595 604  
596 605 /* Wait until PUBLISH event is received */
... ... @@ -601,6 +610,8 @@
601 610 *usr_wakeup = true;
602 611 m->usr_pending = false;
603 612 tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
  613 + ehdr = buf_msg(m->event_msg);
  614 + msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
604 615 __skb_queue_tail(inputq, m->event_msg);
605 616 }
606 617 if (m->window < ADV_IDLE)
... ... @@ -611,6 +622,7 @@
611 622 case GRP_LEAVE_MSG:
612 623 if (!m)
613 624 return;
  625 + m->bc_syncpt = msg_grp_bc_syncpt(hdr);
614 626  
615 627 /* Wait until WITHDRAW event is received */
616 628 if (m->state != MBR_LEAVING) {
617 629  
... ... @@ -618,9 +630,10 @@
618 630 return;
619 631 }
620 632 /* Otherwise deliver already received WITHDRAW event */
  633 + ehdr = buf_msg(m->event_msg);
  634 + msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
621 635 __skb_queue_tail(inputq, m->event_msg);
622 636 *usr_wakeup = true;
623   - tipc_group_delete_member(grp, m);
624 637 list_del_init(&m->congested);
625 638 return;
626 639 case GRP_ADV_MSG:
... ... @@ -662,6 +675,7 @@
662 675 int event = evt->event;
663 676 struct tipc_member *m;
664 677 struct net *net;
  678 + bool node_up;
665 679 u32 self;
666 680  
667 681 if (!grp)
... ... @@ -695,6 +709,7 @@
695 709 m->event_msg = skb;
696 710 m->state = MBR_PUBLISHED;
697 711 } else {
  712 + msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
698 713 __skb_queue_tail(inputq, skb);
699 714 m->state = MBR_JOINED;
700 715 *usr_wakeup = true;
701 716  
702 717  
703 718  
... ... @@ -715,14 +730,18 @@
715 730  
716 731 *usr_wakeup = true;
717 732 m->usr_pending = false;
  733 + node_up = tipc_node_is_up(net, node);
718 734  
719 735 /* Hold back event if more messages might be expected */
720   - if (m->state != MBR_LEAVING && tipc_node_is_up(net, node)) {
  736 + if (m->state != MBR_LEAVING && node_up) {
721 737 m->event_msg = skb;
722 738 m->state = MBR_LEAVING;
723 739 } else {
  740 + if (node_up)
  741 + msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
  742 + else
  743 + msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
724 744 __skb_queue_tail(inputq, skb);
725   - tipc_group_delete_member(grp, m);
726 745 }
727 746 list_del_init(&m->congested);
728 747 }