Commit f8d9605243280f1870dd2c6c37a735b925c15f3c

Authored by Thomas Graf
Committed by David S. Miller
1 parent 31cb852809

sctp: Enforce retransmission limit during shutdown

When initiating a graceful shutdown while having data chunks
on the retransmission queue with a peer which is in zero
window mode the shutdown is never completed because the
retransmission error count is reset periodically by the
following two rules:

 - Do not timeout association while doing zero window probe.
 - Reset overall error count when a heartbeat request has
   been acknowledged.

The graceful shutdown will wait for all outstanding TSN to
be acknowledged before sending the SHUTDOWN request. This
never happens due to the peer's zero window not acknowledging
the continuously retransmitted data chunks. Although the
error counter is incremented for each failed retransmission,
the receiving of the SACK announcing the zero window clears
the error count again immediately. Also heartbeat requests
continue to be sent periodically. The peer acknowledges these
requests causing the error counter to be reset as well.

This patch changes behaviour to only reset the overall error
counter for the above rules while not in shutdown. After
reaching the maximum number of retransmission attempts, the
T5 shutdown guard timer is scheduled to give the receiver
some additional time to recover. The timer is stopped as soon
as the receiver acknowledges any data.

The issue can be easily reproduced by establishing a sctp
association over the loopback device, constantly queueing
data at the sender while not reading any at the receiver.
Wait for the window to reach zero, then initiate a shutdown
by killing both processes simultaneously. The association
will never be freed and the chunks on the retransmission
queue will be retransmitted indefinitely.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 5 changed files with 62 additions and 13 deletions Side-by-side Diff

include/net/sctp/command.h
... ... @@ -63,6 +63,7 @@
63 63 SCTP_CMD_ECN_ECNE, /* Do delayed ECNE processing. */
64 64 SCTP_CMD_ECN_CWR, /* Do delayed CWR processing. */
65 65 SCTP_CMD_TIMER_START, /* Start a timer. */
  66 + SCTP_CMD_TIMER_START_ONCE, /* Start a timer once */
66 67 SCTP_CMD_TIMER_RESTART, /* Restart a timer. */
67 68 SCTP_CMD_TIMER_STOP, /* Stop a timer. */
68 69 SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */
... ... @@ -1582,6 +1582,8 @@
1582 1582 #endif /* SCTP_DEBUG */
1583 1583 if (transport) {
1584 1584 if (bytes_acked) {
  1585 + struct sctp_association *asoc = transport->asoc;
  1586 +
1585 1587 /* We may have counted DATA that was migrated
1586 1588 * to this transport due to DEL-IP operation.
1587 1589 * Subtract those bytes, since the were never
... ... @@ -1600,6 +1602,17 @@
1600 1602 transport->error_count = 0;
1601 1603 transport->asoc->overall_error_count = 0;
1602 1604  
  1605 + /*
  1606 + * While in SHUTDOWN PENDING, we may have started
  1607 + * the T5 shutdown guard timer after reaching the
  1608 + * retransmission limit. Stop that timer as soon
  1609 + * as the receiver acknowledged any data.
  1610 + */
  1611 + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING &&
  1612 + del_timer(&asoc->timers
  1613 + [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]))
  1614 + sctp_association_put(asoc);
  1615 +
1603 1616 /* Mark the destination transport address as
1604 1617 * active if it is not so marked.
1605 1618 */
1606 1619  
... ... @@ -1629,10 +1642,15 @@
1629 1642 * A sender is doing zero window probing when the
1630 1643 * receiver's advertised window is zero, and there is
1631 1644 * only one data chunk in flight to the receiver.
  1645 + *
  1646 + * Allow the association to timeout while in SHUTDOWN
  1647 + * PENDING or SHUTDOWN RECEIVED in case the receiver
  1648 + * stays in zero window mode forever.
1632 1649 */
1633 1650 if (!q->asoc->peer.rwnd &&
1634 1651 !list_empty(&tlist) &&
1635   - (sack_ctsn+2 == q->asoc->next_tsn)) {
  1652 + (sack_ctsn+2 == q->asoc->next_tsn) &&
  1653 + q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) {
1636 1654 SCTP_DEBUG_PRINTK("%s: SACK received for zero "
1637 1655 "window probe: %u\n",
1638 1656 __func__, sack_ctsn);
net/sctp/sm_sideeffect.c
... ... @@ -670,11 +670,20 @@
670 670 /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
671 671 * HEARTBEAT should clear the error counter of the destination
672 672 * transport address to which the HEARTBEAT was sent.
673   - * The association's overall error count is also cleared.
674 673 */
675 674 t->error_count = 0;
676   - t->asoc->overall_error_count = 0;
677 675  
  676 + /*
  677 + * Although RFC4960 specifies that the overall error count must
  678 + * be cleared when a HEARTBEAT ACK is received, we make an
  679 + * exception while in SHUTDOWN PENDING. If the peer keeps its
  680 + * window shut forever, we may never be able to transmit our
  681 + * outstanding data and rely on the retransmission limit be reached
  682 + * to shutdown the association.
  683 + */
  684 + if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING)
  685 + t->asoc->overall_error_count = 0;
  686 +
678 687 /* Clear the hb_sent flag to signal that we had a good
679 688 * acknowledgement.
680 689 */
... ... @@ -1436,6 +1445,13 @@
1436 1445 case SCTP_CMD_SETUP_T2:
1437 1446 sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr);
1438 1447 break;
  1448 +
  1449 + case SCTP_CMD_TIMER_START_ONCE:
  1450 + timer = &asoc->timers[cmd->obj.to];
  1451 +
  1452 + if (timer_pending(timer))
  1453 + break;
  1454 + /* fall through */
1439 1455  
1440 1456 case SCTP_CMD_TIMER_START:
1441 1457 timer = &asoc->timers[cmd->obj.to];
net/sctp/sm_statefuns.c
... ... @@ -5154,7 +5154,7 @@
5154 5154 * The sender of the SHUTDOWN MAY also start an overall guard timer
5155 5155 * 'T5-shutdown-guard' to bound the overall time for shutdown sequence.
5156 5156 */
5157   - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
  5157 + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
5158 5158 SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
5159 5159  
5160 5160 if (asoc->autoclose)
... ... @@ -5299,14 +5299,28 @@
5299 5299 SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
5300 5300  
5301 5301 if (asoc->overall_error_count >= asoc->max_retrans) {
5302   - sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
5303   - SCTP_ERROR(ETIMEDOUT));
5304   - /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
5305   - sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
5306   - SCTP_PERR(SCTP_ERROR_NO_ERROR));
5307   - SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
5308   - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
5309   - return SCTP_DISPOSITION_DELETE_TCB;
  5302 + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
  5303 + /*
  5304 + * We are here likely because the receiver had its rwnd
  5305 + * closed for a while and we have not been able to
  5306 + * transmit the locally queued data within the maximum
  5307 + * retransmission attempts limit. Start the T5
  5308 + * shutdown guard timer to give the receiver one last
  5309 + * chance and some additional time to recover before
  5310 + * aborting.
  5311 + */
  5312 + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START_ONCE,
  5313 + SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
  5314 + } else {
  5315 + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
  5316 + SCTP_ERROR(ETIMEDOUT));
  5317 + /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
  5318 + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
  5319 + SCTP_PERR(SCTP_ERROR_NO_ERROR));
  5320 + SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
  5321 + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
  5322 + return SCTP_DISPOSITION_DELETE_TCB;
  5323 + }
5310 5324 }
5311 5325  
5312 5326 /* E1) For the destination address for which the timer
net/sctp/sm_statetable.c
... ... @@ -827,7 +827,7 @@
827 827 /* SCTP_STATE_ESTABLISHED */ \
828 828 TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
829 829 /* SCTP_STATE_SHUTDOWN_PENDING */ \
830   - TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
  830 + TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
831 831 /* SCTP_STATE_SHUTDOWN_SENT */ \
832 832 TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \
833 833 /* SCTP_STATE_SHUTDOWN_RECEIVED */ \