Commit 3b1d6210a9577369103330b0d802b0bf74b65e7f

Authored by Paolo Abeni
Committed by David S. Miller
1 parent 3f8e0aae17

mptcp: implement and use MPTCP-level retransmission

On timeout event, schedule a work queue to do the retransmission.
Retransmission code closely resembles the sendmsg() implementation and
re-uses mptcp_sendmsg_frag, providing a dummy msghdr - for flags'
sake - and peeking the relevant dfrag from the rtx head.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 95 additions and 4 deletions Side-by-side Diff

net/mptcp/protocol.c
... ... @@ -283,6 +283,10 @@
283 283 void mptcp_data_acked(struct sock *sk)
284 284 {
285 285 mptcp_reset_timer(sk);
  286 +
  287 + if (!sk_stream_is_writeable(sk) &&
  288 + schedule_work(&mptcp_sk(sk)->work))
  289 + sock_hold(sk);
286 290 }
287 291  
288 292 static void mptcp_stop_timer(struct sock *sk)
289 293  
... ... @@ -900,10 +904,13 @@
900 904 {
901 905 struct mptcp_sock *msk = mptcp_sk(sk);
902 906  
903   - if (atomic64_read(&msk->snd_una) == msk->write_seq)
  907 + if (atomic64_read(&msk->snd_una) == msk->write_seq) {
904 908 mptcp_stop_timer(sk);
905   - else
906   - mptcp_reset_timer(sk);
  909 + } else {
  910 + set_bit(MPTCP_WORK_RTX, &msk->flags);
  911 + if (schedule_work(&msk->work))
  912 + sock_hold(sk);
  913 + }
907 914 }
908 915  
909 916 static void mptcp_retransmit_timer(struct timer_list *t)
... ... @@ -925,6 +932,37 @@
925 932 sock_put(sk);
926 933 }
927 934  
  935 +/* Find an idle subflow. Return NULL if there is unacked data at tcp
  936 + * level.
  937 + *
  938 + * A backup subflow is returned only if that is the only kind available.
  939 + */
  940 +static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
  941 +{
  942 + struct mptcp_subflow_context *subflow;
  943 + struct sock *backup = NULL;
  944 +
  945 + sock_owned_by_me((const struct sock *)msk);
  946 +
  947 + mptcp_for_each_subflow(msk, subflow) {
  948 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  949 +
  950 + /* still data outstanding at TCP level? Don't retransmit. */
  951 + if (!tcp_write_queue_empty(ssk))
  952 + return NULL;
  953 +
  954 + if (subflow->backup) {
  955 + if (!backup)
  956 + backup = ssk;
  957 + continue;
  958 + }
  959 +
  960 + return ssk;
  961 + }
  962 +
  963 + return backup;
  964 +}
  965 +
928 966 /* subflow sockets can be either outgoing (connect) or incoming
929 967 * (accept).
930 968 *
931 969  
932 970  
... ... @@ -958,11 +996,62 @@
958 996 static void mptcp_worker(struct work_struct *work)
959 997 {
960 998 struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
961   - struct sock *sk = &msk->sk.icsk_inet.sk;
  999 + struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
  1000 + int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0;
  1001 + struct mptcp_data_frag *dfrag;
  1002 + u64 orig_write_seq;
  1003 + size_t copied = 0;
  1004 + struct msghdr msg;
  1005 + long timeo = 0;
962 1006  
963 1007 lock_sock(sk);
  1008 + mptcp_clean_una(sk);
964 1009 __mptcp_flush_join_list(msk);
965 1010 __mptcp_move_skbs(msk);
  1011 +
  1012 + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
  1013 + goto unlock;
  1014 +
  1015 + dfrag = mptcp_rtx_head(sk);
  1016 + if (!dfrag)
  1017 + goto unlock;
  1018 +
  1019 + ssk = mptcp_subflow_get_retrans(msk);
  1020 + if (!ssk)
  1021 + goto reset_unlock;
  1022 +
  1023 + lock_sock(ssk);
  1024 +
  1025 + msg.msg_flags = MSG_DONTWAIT;
  1026 + orig_len = dfrag->data_len;
  1027 + orig_offset = dfrag->offset;
  1028 + orig_write_seq = dfrag->data_seq;
  1029 + while (dfrag->data_len > 0) {
  1030 + ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, &mss_now,
  1031 + &size_goal);
  1032 + if (ret < 0)
  1033 + break;
  1034 +
  1035 + copied += ret;
  1036 + dfrag->data_len -= ret;
  1037 + dfrag->offset += ret;
  1038 + }
  1039 + if (copied)
  1040 + tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle,
  1041 + size_goal);
  1042 +
  1043 + dfrag->data_seq = orig_write_seq;
  1044 + dfrag->offset = orig_offset;
  1045 + dfrag->data_len = orig_len;
  1046 +
  1047 + mptcp_set_timeout(sk, ssk);
  1048 + release_sock(ssk);
  1049 +
  1050 +reset_unlock:
  1051 + if (!mptcp_timer_pending(sk))
  1052 + mptcp_reset_timer(sk);
  1053 +
  1054 +unlock:
966 1055 release_sock(sk);
967 1056 sock_put(sk);
968 1057 }
... ... @@ -1124,6 +1213,7 @@
1124 1213 lock_sock(sk);
1125 1214 __mptcp_clear_xmit(sk);
1126 1215 release_sock(sk);
  1216 + mptcp_cancel_work(sk);
1127 1217 return tcp_disconnect(sk, flags);
1128 1218 }
1129 1219  
net/mptcp/protocol.h
... ... @@ -88,6 +88,7 @@
88 88 /* MPTCP socket flags */
89 89 #define MPTCP_DATA_READY 0
90 90 #define MPTCP_SEND_SPACE 1
  91 +#define MPTCP_WORK_RTX 2
91 92  
92 93 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
93 94 {