ipv4: fix path MTU discovery with connection tracking
IPv4 conntrack defragments incoming packet at the PRE_ROUTING hook and (in case of forwarded packets) refragments them at POST_ROUTING independent of the IP_DF flag. Refragmentation uses the dst_mtu() of the local route without caring about the original fragment sizes, thereby breaking PMTUD. This patch fixes this by keeping track of the largest received fragment with IP_DF set and generates an ICMP fragmentation required error during refragmentation if that size exceeds the MTU. Signed-off-by: Patrick McHardy <kaber@trash.net> Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
					parent
					
						
							
								0fa7fa98db
							
						
					
				
			
			
				commit
				
					
						5f2d04f1f9
					
				
			
		
					 4 changed files with 14 additions and 2 deletions
				
			
		|  | @ -29,6 +29,8 @@ struct inet_frag_queue { | ||||||
| #define INET_FRAG_COMPLETE	4 | #define INET_FRAG_COMPLETE	4 | ||||||
| #define INET_FRAG_FIRST_IN	2 | #define INET_FRAG_FIRST_IN	2 | ||||||
| #define INET_FRAG_LAST_IN	1 | #define INET_FRAG_LAST_IN	1 | ||||||
|  | 
 | ||||||
|  | 	u16			max_size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define INETFRAGS_HASHSZ		64 | #define INETFRAGS_HASHSZ		64 | ||||||
|  |  | ||||||
|  | @ -42,6 +42,8 @@ struct inet_skb_parm { | ||||||
| #define IPSKB_XFRM_TRANSFORMED	4 | #define IPSKB_XFRM_TRANSFORMED	4 | ||||||
| #define IPSKB_FRAG_COMPLETE	8 | #define IPSKB_FRAG_COMPLETE	8 | ||||||
| #define IPSKB_REROUTED		16 | #define IPSKB_REROUTED		16 | ||||||
|  | 
 | ||||||
|  | 	u16			frag_max_size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static inline unsigned int ip_hdrlen(const struct sk_buff *skb) | static inline unsigned int ip_hdrlen(const struct sk_buff *skb) | ||||||
|  |  | ||||||
|  | @ -523,6 +523,10 @@ found: | ||||||
| 	if (offset == 0) | 	if (offset == 0) | ||||||
| 		qp->q.last_in |= INET_FRAG_FIRST_IN; | 		qp->q.last_in |= INET_FRAG_FIRST_IN; | ||||||
| 
 | 
 | ||||||
|  | 	if (ip_hdr(skb)->frag_off & htons(IP_DF) && | ||||||
|  | 	    skb->len + ihl > qp->q.max_size) | ||||||
|  | 		qp->q.max_size = skb->len + ihl; | ||||||
|  | 
 | ||||||
| 	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | 	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | ||||||
| 	    qp->q.meat == qp->q.len) | 	    qp->q.meat == qp->q.len) | ||||||
| 		return ip_frag_reasm(qp, prev, dev); | 		return ip_frag_reasm(qp, prev, dev); | ||||||
|  | @ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | ||||||
| 	head->next = NULL; | 	head->next = NULL; | ||||||
| 	head->dev = dev; | 	head->dev = dev; | ||||||
| 	head->tstamp = qp->q.stamp; | 	head->tstamp = qp->q.stamp; | ||||||
|  | 	IPCB(head)->frag_max_size = qp->q.max_size; | ||||||
| 
 | 
 | ||||||
| 	iph = ip_hdr(head); | 	iph = ip_hdr(head); | ||||||
| 	iph->frag_off = 0; | 	/* max_size != 0 implies at least one fragment had IP_DF set */ | ||||||
|  | 	iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0; | ||||||
| 	iph->tot_len = htons(len); | 	iph->tot_len = htons(len); | ||||||
| 	iph->tos |= ecn; | 	iph->tos |= ecn; | ||||||
| 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | ||||||
|  |  | ||||||
|  | @ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | ||||||
| 
 | 
 | ||||||
| 	iph = ip_hdr(skb); | 	iph = ip_hdr(skb); | ||||||
| 
 | 
 | ||||||
| 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) || | ||||||
|  | 		     (IPCB(skb)->frag_max_size && | ||||||
|  | 		      IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) { | ||||||
| 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | ||||||
| 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | ||||||
| 			  htonl(ip_skb_dst_mtu(skb))); | 			  htonl(ip_skb_dst_mtu(skb))); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Patrick McHardy
				Patrick McHardy