Commit 66b13d99d96a1a69f47a6bc3dc47f45955967377
Committed by
David S. Miller
1 parent
318cf7aaa0
Exists in
master
and in
6 other branches
ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT
There is a long standing bug in linux tcp stack, about ACK messages sent on behalf of TIME_WAIT sockets. In the IP header of the ACK message, we choose to reflect TOS field of incoming message, and this might break some setups. Example of things that were broken : - Routing using TOS as a selector - Firewalls - Trafic classification / shaping We now remember in timewait structure the inet tos field and use it in ACK generation, and route lookup. Notes : - We still reflect incoming TOS in RST messages. - We could extend MuraliRaja Muniraju patch to report TOS value in netlink messages for TIME_WAIT sockets. - A patch is needed for IPv6 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 5 changed files with 15 additions and 9 deletions Side-by-side Diff
include/net/inet_timewait_sock.h
... | ... | @@ -126,7 +126,8 @@ |
126 | 126 | /* And these are ours. */ |
127 | 127 | unsigned int tw_ipv6only : 1, |
128 | 128 | tw_transparent : 1, |
129 | - tw_pad : 14, /* 14 bits hole */ | |
129 | + tw_pad : 6, /* 6 bits hole */ | |
130 | + tw_tos : 8, | |
130 | 131 | tw_ipv6_offset : 16; |
131 | 132 | kmemcheck_bitfield_end(flags); |
132 | 133 | unsigned long tw_ttd; |
include/net/ip.h
... | ... | @@ -165,6 +165,7 @@ |
165 | 165 | int csumoffset; /* u16 offset of csum in iov[0].iov_base */ |
166 | 166 | /* -1 if not needed */ |
167 | 167 | int bound_dev_if; |
168 | + u8 tos; | |
168 | 169 | }; |
169 | 170 | |
170 | 171 | #define IP_REPLY_ARG_NOSRCCHECK 1 |
... | ... | @@ -175,7 +176,7 @@ |
175 | 176 | } |
176 | 177 | |
177 | 178 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, |
178 | - struct ip_reply_arg *arg, unsigned int len); | |
179 | + const struct ip_reply_arg *arg, unsigned int len); | |
179 | 180 | |
180 | 181 | struct ipv4_config { |
181 | 182 | int log_martians; |
net/ipv4/inet_timewait_sock.c
... | ... | @@ -183,6 +183,7 @@ |
183 | 183 | tw->tw_daddr = inet->inet_daddr; |
184 | 184 | tw->tw_rcv_saddr = inet->inet_rcv_saddr; |
185 | 185 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; |
186 | + tw->tw_tos = inet->tos; | |
186 | 187 | tw->tw_num = inet->inet_num; |
187 | 188 | tw->tw_state = TCP_TIME_WAIT; |
188 | 189 | tw->tw_substate = state; |
net/ipv4/ip_output.c
... | ... | @@ -1466,7 +1466,7 @@ |
1466 | 1466 | * structure to pass arguments. |
1467 | 1467 | */ |
1468 | 1468 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, |
1469 | - struct ip_reply_arg *arg, unsigned int len) | |
1469 | + const struct ip_reply_arg *arg, unsigned int len) | |
1470 | 1470 | { |
1471 | 1471 | struct inet_sock *inet = inet_sk(sk); |
1472 | 1472 | struct ip_options_data replyopts; |
... | ... | @@ -1489,7 +1489,7 @@ |
1489 | 1489 | } |
1490 | 1490 | |
1491 | 1491 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, |
1492 | - RT_TOS(ip_hdr(skb)->tos), | |
1492 | + RT_TOS(arg->tos), | |
1493 | 1493 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
1494 | 1494 | ip_reply_arg_flowi_flags(arg), |
1495 | 1495 | daddr, rt->rt_spec_dst, |
... | ... | @@ -1506,7 +1506,7 @@ |
1506 | 1506 | with locally disabled BH and that sk cannot be already spinlocked. |
1507 | 1507 | */ |
1508 | 1508 | bh_lock_sock(sk); |
1509 | - inet->tos = ip_hdr(skb)->tos; | |
1509 | + inet->tos = arg->tos; | |
1510 | 1510 | sk->sk_priority = skb->priority; |
1511 | 1511 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1512 | 1512 | sk->sk_bound_dev_if = arg->bound_dev_if; |
net/ipv4/tcp_ipv4.c
... | ... | @@ -652,6 +652,7 @@ |
652 | 652 | arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; |
653 | 653 | |
654 | 654 | net = dev_net(skb_dst(skb)->dev); |
655 | + arg.tos = ip_hdr(skb)->tos; | |
655 | 656 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
656 | 657 | &arg, arg.iov[0].iov_len); |
657 | 658 | |
... | ... | @@ -666,7 +667,7 @@ |
666 | 667 | static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, |
667 | 668 | u32 win, u32 ts, int oif, |
668 | 669 | struct tcp_md5sig_key *key, |
669 | - int reply_flags) | |
670 | + int reply_flags, u8 tos) | |
670 | 671 | { |
671 | 672 | const struct tcphdr *th = tcp_hdr(skb); |
672 | 673 | struct { |
... | ... | @@ -726,7 +727,7 @@ |
726 | 727 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; |
727 | 728 | if (oif) |
728 | 729 | arg.bound_dev_if = oif; |
729 | - | |
730 | + arg.tos = tos; | |
730 | 731 | ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, |
731 | 732 | &arg, arg.iov[0].iov_len); |
732 | 733 | |
... | ... | @@ -743,7 +744,8 @@ |
743 | 744 | tcptw->tw_ts_recent, |
744 | 745 | tw->tw_bound_dev_if, |
745 | 746 | tcp_twsk_md5_key(tcptw), |
746 | - tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 | |
747 | + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, | |
748 | + tw->tw_tos | |
747 | 749 | ); |
748 | 750 | |
749 | 751 | inet_twsk_put(tw); |
... | ... | @@ -757,7 +759,8 @@ |
757 | 759 | req->ts_recent, |
758 | 760 | 0, |
759 | 761 | tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), |
760 | - inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); | |
762 | + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, | |
763 | + ip_hdr(skb)->tos); | |
761 | 764 | } |
762 | 765 | |
763 | 766 | /* |