Commit 628a5c561890a9a9a74dea017873530584aab06e

Authored by John Heffner
Committed by David S. Miller
1 parent b881ef7603

[INET]: Add IP(V6)_PMTUDISC_RPOBE

Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER.  This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery).  This is particularly useful for diagnostic utilities, like
traceroute/tracepath.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 6 changed files with 31 additions and 10 deletions Side-by-side Diff

... ... @@ -83,6 +83,7 @@
83 83 #define IP_PMTUDISC_DONT 0 /* Never send DF frames */
84 84 #define IP_PMTUDISC_WANT 1 /* Use per route hints */
85 85 #define IP_PMTUDISC_DO 2 /* Always DF */
  86 +#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
86 87  
87 88 #define IP_MULTICAST_IF 32
88 89 #define IP_MULTICAST_TTL 33
... ... @@ -179,6 +179,7 @@
179 179 #define IPV6_PMTUDISC_DONT 0
180 180 #define IPV6_PMTUDISC_WANT 1
181 181 #define IPV6_PMTUDISC_DO 2
  182 +#define IPV6_PMTUDISC_PROBE 3
182 183  
183 184 /* Flowlabel */
184 185 #define IPV6_FLOWLABEL_MGR 32
net/ipv4/ip_output.c
... ... @@ -189,6 +189,14 @@
189 189 return -EINVAL;
190 190 }
191 191  
  192 +static inline int ip_skb_dst_mtu(struct sk_buff *skb)
  193 +{
  194 + struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
  195 +
  196 + return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
  197 + skb->dst->dev->mtu : dst_mtu(skb->dst);
  198 +}
  199 +
192 200 static inline int ip_finish_output(struct sk_buff *skb)
193 201 {
194 202 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
... ... @@ -198,7 +206,7 @@
198 206 return dst_output(skb);
199 207 }
200 208 #endif
201   - if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
  209 + if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
202 210 return ip_fragment(skb, ip_finish_output2);
203 211 else
204 212 return ip_finish_output2(skb);
... ... @@ -422,7 +430,7 @@
422 430 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
423 431 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
424 432 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
425   - htonl(dst_mtu(&rt->u.dst)));
  433 + htonl(ip_skb_dst_mtu(skb)));
426 434 kfree_skb(skb);
427 435 return -EMSGSIZE;
428 436 }
... ... @@ -787,7 +795,9 @@
787 795 inet->cork.addr = ipc->addr;
788 796 }
789 797 dst_hold(&rt->u.dst);
790   - inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
  798 + inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
  799 + rt->u.dst.dev->mtu :
  800 + dst_mtu(rt->u.dst.path);
791 801 inet->cork.rt = rt;
792 802 inet->cork.length = 0;
793 803 sk->sk_sndmsg_page = NULL;
794 804  
... ... @@ -1203,13 +1213,13 @@
1203 1213 * to fragment the frame generated here. No matter, what transforms
1204 1214 * how transforms change size of the packet, it will come out.
1205 1215 */
1206   - if (inet->pmtudisc != IP_PMTUDISC_DO)
  1216 + if (inet->pmtudisc < IP_PMTUDISC_DO)
1207 1217 skb->local_df = 1;
1208 1218  
1209 1219 /* DF bit is set when we want to see DF on outgoing frames.
1210 1220 * If local_df is set too, we still allow to fragment this frame
1211 1221 * locally. */
1212   - if (inet->pmtudisc == IP_PMTUDISC_DO ||
  1222 + if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1213 1223 (skb->len <= dst_mtu(&rt->u.dst) &&
1214 1224 ip_dont_fragment(sk, &rt->u.dst)))
1215 1225 df = htons(IP_DF);
net/ipv4/ip_sockglue.c
... ... @@ -542,7 +542,7 @@
542 542 inet->hdrincl = val ? 1 : 0;
543 543 break;
544 544 case IP_MTU_DISCOVER:
545   - if (val<0 || val>2)
  545 + if (val<0 || val>3)
546 546 goto e_inval;
547 547 inet->pmtudisc = val;
548 548 break;
net/ipv6/ip6_output.c
... ... @@ -137,9 +137,17 @@
137 137 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138 138 }
139 139  
  140 +static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
  141 +{
  142 + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
  143 +
  144 + return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
  145 + skb->dst->dev->mtu : dst_mtu(skb->dst);
  146 +}
  147 +
140 148 int ip6_output(struct sk_buff *skb)
141 149 {
142   - if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
  150 + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
143 151 dst_allfrag(skb->dst))
144 152 return ip6_fragment(skb, ip6_output2);
145 153 else
... ... @@ -566,7 +574,7 @@
566 574 hlen = ip6_find_1stfragopt(skb, &prevhdr);
567 575 nexthdr = *prevhdr;
568 576  
569   - mtu = dst_mtu(&rt->u.dst);
  577 + mtu = ip6_skb_dst_mtu(skb);
570 578  
571 579 /* We must not fragment if the socket is set to force MTU discovery
572 580 * or if the skb it not generated by a local socket. (This last
... ... @@ -1063,7 +1071,8 @@
1063 1071 inet->cork.fl = *fl;
1064 1072 np->cork.hop_limit = hlimit;
1065 1073 np->cork.tclass = tclass;
1066   - mtu = dst_mtu(rt->u.dst.path);
  1074 + mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
  1075 + rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1067 1076 if (np->frag_size < mtu) {
1068 1077 if (np->frag_size)
1069 1078 mtu = np->frag_size;
net/ipv6/ipv6_sockglue.c
... ... @@ -694,7 +694,7 @@
694 694 retv = ip6_ra_control(sk, val, NULL);
695 695 break;
696 696 case IPV6_MTU_DISCOVER:
697   - if (val<0 || val>2)
  697 + if (val<0 || val>3)
698 698 goto e_inval;
699 699 np->pmtudisc = val;
700 700 retv = 0;