Commit 628a5c561890a9a9a74dea017873530584aab06e
Committed by
David S. Miller
1 parent
b881ef7603
Exists in
master
and in
4 other branches
[INET]: Add IP(V6)_PMTUDISC_RPOBE
Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER. This option forces us not to fragment, but does not make use of the kernel path MTU discovery. That is, it allows for user-mode MTU probing (or, packetization-layer path MTU discovery). This is particularly useful for diagnostic utilities, like traceroute/tracepath. Signed-off-by: John Heffner <jheffner@psc.edu> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 6 changed files with 31 additions and 10 deletions Side-by-side Diff
include/linux/in.h
... | ... | @@ -83,6 +83,7 @@ |
83 | 83 | #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ |
84 | 84 | #define IP_PMTUDISC_WANT 1 /* Use per route hints */ |
85 | 85 | #define IP_PMTUDISC_DO 2 /* Always DF */ |
86 | +#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ | |
86 | 87 | |
87 | 88 | #define IP_MULTICAST_IF 32 |
88 | 89 | #define IP_MULTICAST_TTL 33 |
include/linux/in6.h
net/ipv4/ip_output.c
... | ... | @@ -189,6 +189,14 @@ |
189 | 189 | return -EINVAL; |
190 | 190 | } |
191 | 191 | |
192 | +static inline int ip_skb_dst_mtu(struct sk_buff *skb) | |
193 | +{ | |
194 | + struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; | |
195 | + | |
196 | + return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? | |
197 | + skb->dst->dev->mtu : dst_mtu(skb->dst); | |
198 | +} | |
199 | + | |
192 | 200 | static inline int ip_finish_output(struct sk_buff *skb) |
193 | 201 | { |
194 | 202 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
... | ... | @@ -198,7 +206,7 @@ |
198 | 206 | return dst_output(skb); |
199 | 207 | } |
200 | 208 | #endif |
201 | - if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) | |
209 | + if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) | |
202 | 210 | return ip_fragment(skb, ip_finish_output2); |
203 | 211 | else |
204 | 212 | return ip_finish_output2(skb); |
... | ... | @@ -422,7 +430,7 @@ |
422 | 430 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { |
423 | 431 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); |
424 | 432 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
425 | - htonl(dst_mtu(&rt->u.dst))); | |
433 | + htonl(ip_skb_dst_mtu(skb))); | |
426 | 434 | kfree_skb(skb); |
427 | 435 | return -EMSGSIZE; |
428 | 436 | } |
... | ... | @@ -787,7 +795,9 @@ |
787 | 795 | inet->cork.addr = ipc->addr; |
788 | 796 | } |
789 | 797 | dst_hold(&rt->u.dst); |
790 | - inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); | |
798 | + inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | |
799 | + rt->u.dst.dev->mtu : | |
800 | + dst_mtu(rt->u.dst.path); | |
791 | 801 | inet->cork.rt = rt; |
792 | 802 | inet->cork.length = 0; |
793 | 803 | sk->sk_sndmsg_page = NULL; |
794 | 804 | |
... | ... | @@ -1203,13 +1213,13 @@ |
1203 | 1213 | * to fragment the frame generated here. No matter, what transforms |
1204 | 1214 | * how transforms change size of the packet, it will come out. |
1205 | 1215 | */ |
1206 | - if (inet->pmtudisc != IP_PMTUDISC_DO) | |
1216 | + if (inet->pmtudisc < IP_PMTUDISC_DO) | |
1207 | 1217 | skb->local_df = 1; |
1208 | 1218 | |
1209 | 1219 | /* DF bit is set when we want to see DF on outgoing frames. |
1210 | 1220 | * If local_df is set too, we still allow to fragment this frame |
1211 | 1221 | * locally. */ |
1212 | - if (inet->pmtudisc == IP_PMTUDISC_DO || | |
1222 | + if (inet->pmtudisc >= IP_PMTUDISC_DO || | |
1213 | 1223 | (skb->len <= dst_mtu(&rt->u.dst) && |
1214 | 1224 | ip_dont_fragment(sk, &rt->u.dst))) |
1215 | 1225 | df = htons(IP_DF); |
net/ipv4/ip_sockglue.c
net/ipv6/ip6_output.c
... | ... | @@ -137,9 +137,17 @@ |
137 | 137 | return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); |
138 | 138 | } |
139 | 139 | |
140 | +static inline int ip6_skb_dst_mtu(struct sk_buff *skb) | |
141 | +{ | |
142 | + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; | |
143 | + | |
144 | + return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? | |
145 | + skb->dst->dev->mtu : dst_mtu(skb->dst); | |
146 | +} | |
147 | + | |
140 | 148 | int ip6_output(struct sk_buff *skb) |
141 | 149 | { |
142 | - if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) || | |
150 | + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || | |
143 | 151 | dst_allfrag(skb->dst)) |
144 | 152 | return ip6_fragment(skb, ip6_output2); |
145 | 153 | else |
... | ... | @@ -566,7 +574,7 @@ |
566 | 574 | hlen = ip6_find_1stfragopt(skb, &prevhdr); |
567 | 575 | nexthdr = *prevhdr; |
568 | 576 | |
569 | - mtu = dst_mtu(&rt->u.dst); | |
577 | + mtu = ip6_skb_dst_mtu(skb); | |
570 | 578 | |
571 | 579 | /* We must not fragment if the socket is set to force MTU discovery |
572 | 580 | * or if the skb it not generated by a local socket. (This last |
... | ... | @@ -1063,7 +1071,8 @@ |
1063 | 1071 | inet->cork.fl = *fl; |
1064 | 1072 | np->cork.hop_limit = hlimit; |
1065 | 1073 | np->cork.tclass = tclass; |
1066 | - mtu = dst_mtu(rt->u.dst.path); | |
1074 | + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? | |
1075 | + rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); | |
1067 | 1076 | if (np->frag_size < mtu) { |
1068 | 1077 | if (np->frag_size) |
1069 | 1078 | mtu = np->frag_size; |