ipv6: Implement automatic flow label generation on transmit
Automatically generate flow labels for IPv6 packets on transmit.
The flow label is computed based on skb_get_hash. The flow label will
only automatically be set when it is zero otherwise (i.e. flow label
manager hasn't set one). This supports the transmit side functionality
of RFC 6438.
Added an IPv6 sysctl auto_flowlabels to enable/disable this behavior
system wide, and added IPV6_AUTOFLOWLABEL socket option to enable this
functionality per socket.
By default, auto flowlabels are disabled to avoid possible conflicts
with flow label manager, however if this feature proves useful we
may want to enable it by default.
It should also be noted that FreeBSD has already implemented automatic
flow labels (including the sysctl and socket option). In FreeBSD,
automatic flow labels default to enabled.
Performance impact:
Running super_netperf with 200 flows for TCP_RR and UDP_RR for
IPv6. Note that in UDP case, __skb_get_hash will be called for
every packet with explains slight regression. In the TCP case
the hash is saved in the socket so there is no regression.
Automatic flow labels disabled:
  TCP_RR:
    86.53% CPU utilization
    127/195/322 90/95/99% latencies
    1.40498e+06 tps
  UDP_RR:
    90.70% CPU utilization
    118/168/243 90/95/99% latencies
    1.50309e+06 tps
Automatic flow labels enabled:
  TCP_RR:
    85.90% CPU utilization
    128/199/337 90/95/99% latencies
    1.40051e+06
  UDP_RR
    92.61% CPU utilization
    115/164/236 90/95/99% latencies
    1.4687e+06
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
	
	
This commit is contained in:
		
					parent
					
						
							
								19469a873b
							
						
					
				
			
			
				commit
				
					
						cb1ce2ef38
					
				
			
		
					 11 changed files with 62 additions and 6 deletions
				
			
		|  | @ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN | ||||||
| 	FALSE: disabled | 	FALSE: disabled | ||||||
| 	Default: TRUE | 	Default: TRUE | ||||||
| 
 | 
 | ||||||
|  | auto_flowlabels - BOOLEAN | ||||||
|  | 	Automatically generate flow labels based based on a flow hash | ||||||
|  | 	of the packet. This allows intermediate devices, such as routers, | ||||||
|  | 	to idenfify packet flows for mechanisms like Equal Cost Multipath | ||||||
|  | 	Routing (see RFC 6438). | ||||||
|  | 	TRUE: enabled | ||||||
|  | 	FALSE: disabled | ||||||
|  | 	Default: false | ||||||
|  | 
 | ||||||
| anycast_src_echo_reply - BOOLEAN | anycast_src_echo_reply - BOOLEAN | ||||||
| 	Controls the use of anycast addresses as source addresses for ICMPv6 | 	Controls the use of anycast addresses as source addresses for ICMPv6 | ||||||
| 	echo reply | 	echo reply | ||||||
|  |  | ||||||
|  | @ -199,7 +199,8 @@ struct ipv6_pinfo { | ||||||
| 						 * 010: prefer public address | 						 * 010: prefer public address | ||||||
| 						 * 100: prefer care-of address | 						 * 100: prefer care-of address | ||||||
| 						 */ | 						 */ | ||||||
| 				dontfrag:1; | 				dontfrag:1, | ||||||
|  | 				autoflowlabel:1; | ||||||
| 	__u8			min_hopcount; | 	__u8			min_hopcount; | ||||||
| 	__u8			tclass; | 	__u8			tclass; | ||||||
| 	__be32			rcv_flowinfo; | 	__be32			rcv_flowinfo; | ||||||
|  |  | ||||||
|  | @ -699,6 +699,26 @@ static inline void ip6_set_txhash(struct sock *sk) | ||||||
| 	sk->sk_txhash = flow_hash_from_keys(&keys); | 	sk->sk_txhash = flow_hash_from_keys(&keys); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, | ||||||
|  | 					__be32 flowlabel, bool autolabel) | ||||||
|  | { | ||||||
|  | 	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { | ||||||
|  | 		__be32 hash; | ||||||
|  | 
 | ||||||
|  | 		hash = skb_get_hash(skb); | ||||||
|  | 
 | ||||||
|  | 		/* Since this is being sent on the wire obfuscate hash a bit
 | ||||||
|  | 		 * to minimize possbility that any useful information to an | ||||||
|  | 		 * attacker is leaked. Only lower 20 bits are relevant. | ||||||
|  | 		 */ | ||||||
|  | 		hash ^= hash >> 12; | ||||||
|  | 
 | ||||||
|  | 		flowlabel = hash & IPV6_FLOWLABEL_MASK; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return flowlabel; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  *	Header manipulation |  *	Header manipulation | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | @ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { | ||||||
| 	int ip6_rt_mtu_expires; | 	int ip6_rt_mtu_expires; | ||||||
| 	int ip6_rt_min_advmss; | 	int ip6_rt_min_advmss; | ||||||
| 	int flowlabel_consistency; | 	int flowlabel_consistency; | ||||||
|  | 	int auto_flowlabels; | ||||||
| 	int icmpv6_time; | 	int icmpv6_time; | ||||||
| 	int anycast_src_echo_reply; | 	int anycast_src_echo_reply; | ||||||
| 	int fwmark_reflect; | 	int fwmark_reflect; | ||||||
|  |  | ||||||
|  | @ -233,6 +233,7 @@ struct in6_flowlabel_req { | ||||||
| #if 0	/* not yet */
 | #if 0	/* not yet */
 | ||||||
| #define IPV6_USE_MIN_MTU	63 | #define IPV6_USE_MIN_MTU	63 | ||||||
| #endif | #endif | ||||||
|  | #define IPV6_AUTOFLOWLABEL	64 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Netfilter (1) |  * Netfilter (1) | ||||||
|  |  | ||||||
|  | @ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) | ||||||
| 	net->ipv6.sysctl.bindv6only = 0; | 	net->ipv6.sysctl.bindv6only = 0; | ||||||
| 	net->ipv6.sysctl.icmpv6_time = 1*HZ; | 	net->ipv6.sysctl.icmpv6_time = 1*HZ; | ||||||
| 	net->ipv6.sysctl.flowlabel_consistency = 1; | 	net->ipv6.sysctl.flowlabel_consistency = 1; | ||||||
|  | 	net->ipv6.sysctl.auto_flowlabels = 0; | ||||||
| 	atomic_set(&net->ipv6.rt_genid, 0); | 	atomic_set(&net->ipv6.rt_genid, 0); | ||||||
| 
 | 
 | ||||||
| 	err = ipv6_init_mibs(net); | 	err = ipv6_init_mibs(net); | ||||||
|  |  | ||||||
|  | @ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | ||||||
| 	 *	Push down and install the IP header. | 	 *	Push down and install the IP header. | ||||||
| 	 */ | 	 */ | ||||||
| 	ipv6h = ipv6_hdr(skb); | 	ipv6h = ipv6_hdr(skb); | ||||||
| 	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); | 	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), | ||||||
|  | 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); | ||||||
| 	ipv6h->hop_limit = tunnel->parms.hop_limit; | 	ipv6h->hop_limit = tunnel->parms.hop_limit; | ||||||
| 	ipv6h->nexthdr = proto; | 	ipv6h->nexthdr = proto; | ||||||
| 	ipv6h->saddr = fl6->saddr; | 	ipv6h->saddr = fl6->saddr; | ||||||
|  | @ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, | ||||||
| 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); | 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); | ||||||
| 	__be16 *p = (__be16 *)(ipv6h+1); | 	__be16 *p = (__be16 *)(ipv6h+1); | ||||||
| 
 | 
 | ||||||
| 	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); | 	ip6_flow_hdr(ipv6h, 0, | ||||||
|  | 		     ip6_make_flowlabel(dev_net(dev), skb, | ||||||
|  | 					t->fl.u.ip6.flowlabel, false)); | ||||||
| 	ipv6h->hop_limit = t->parms.hop_limit; | 	ipv6h->hop_limit = t->parms.hop_limit; | ||||||
| 	ipv6h->nexthdr = NEXTHDR_GRE; | 	ipv6h->nexthdr = NEXTHDR_GRE; | ||||||
| 	ipv6h->saddr = t->parms.laddr; | 	ipv6h->saddr = t->parms.laddr; | ||||||
|  |  | ||||||
|  | @ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, | ||||||
| 	if (hlimit < 0) | 	if (hlimit < 0) | ||||||
| 		hlimit = ip6_dst_hoplimit(dst); | 		hlimit = ip6_dst_hoplimit(dst); | ||||||
| 
 | 
 | ||||||
| 	ip6_flow_hdr(hdr, tclass, fl6->flowlabel); | 	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, | ||||||
|  | 						     np->autoflowlabel)); | ||||||
| 
 | 
 | ||||||
| 	hdr->payload_len = htons(seg_len); | 	hdr->payload_len = htons(seg_len); | ||||||
| 	hdr->nexthdr = proto; | 	hdr->nexthdr = proto; | ||||||
|  | @ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk) | ||||||
| 	skb_reset_network_header(skb); | 	skb_reset_network_header(skb); | ||||||
| 	hdr = ipv6_hdr(skb); | 	hdr = ipv6_hdr(skb); | ||||||
| 
 | 
 | ||||||
| 	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); | 	ip6_flow_hdr(hdr, np->cork.tclass, | ||||||
|  | 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, | ||||||
|  | 					np->autoflowlabel)); | ||||||
| 	hdr->hop_limit = np->cork.hop_limit; | 	hdr->hop_limit = np->cork.hop_limit; | ||||||
| 	hdr->nexthdr = proto; | 	hdr->nexthdr = proto; | ||||||
| 	hdr->saddr = fl6->saddr; | 	hdr->saddr = fl6->saddr; | ||||||
|  |  | ||||||
|  | @ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, | ||||||
| 	skb_push(skb, sizeof(struct ipv6hdr)); | 	skb_push(skb, sizeof(struct ipv6hdr)); | ||||||
| 	skb_reset_network_header(skb); | 	skb_reset_network_header(skb); | ||||||
| 	ipv6h = ipv6_hdr(skb); | 	ipv6h = ipv6_hdr(skb); | ||||||
| 	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); | 	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), | ||||||
|  | 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); | ||||||
| 	ipv6h->hop_limit = t->parms.hop_limit; | 	ipv6h->hop_limit = t->parms.hop_limit; | ||||||
| 	ipv6h->nexthdr = proto; | 	ipv6h->nexthdr = proto; | ||||||
| 	ipv6h->saddr = fl6->saddr; | 	ipv6h->saddr = fl6->saddr; | ||||||
|  |  | ||||||
|  | @ -834,6 +834,10 @@ pref_skip_coa: | ||||||
| 		np->dontfrag = valbool; | 		np->dontfrag = valbool; | ||||||
| 		retv = 0; | 		retv = 0; | ||||||
| 		break; | 		break; | ||||||
|  | 	case IPV6_AUTOFLOWLABEL: | ||||||
|  | 		np->autoflowlabel = valbool; | ||||||
|  | 		retv = 0; | ||||||
|  | 		break; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	release_sock(sk); | 	release_sock(sk); | ||||||
|  | @ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, | ||||||
| 		val = np->dontfrag; | 		val = np->dontfrag; | ||||||
| 		break; | 		break; | ||||||
| 
 | 
 | ||||||
|  | 	case IPV6_AUTOFLOWLABEL: | ||||||
|  | 		val = np->autoflowlabel; | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
| 	default: | 	default: | ||||||
| 		return -ENOPROTOOPT; | 		return -ENOPROTOOPT; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { | ||||||
| 		.mode		= 0644, | 		.mode		= 0644, | ||||||
| 		.proc_handler	= proc_dointvec | 		.proc_handler	= proc_dointvec | ||||||
| 	}, | 	}, | ||||||
|  | 	{ | ||||||
|  | 		.procname	= "auto_flowlabels", | ||||||
|  | 		.data		= &init_net.ipv6.sysctl.auto_flowlabels, | ||||||
|  | 		.maxlen		= sizeof(int), | ||||||
|  | 		.mode		= 0644, | ||||||
|  | 		.proc_handler	= proc_dointvec | ||||||
|  | 	}, | ||||||
| 	{ | 	{ | ||||||
| 		.procname	= "fwmark_reflect", | 		.procname	= "fwmark_reflect", | ||||||
| 		.data		= &init_net.ipv6.sysctl.fwmark_reflect, | 		.data		= &init_net.ipv6.sysctl.fwmark_reflect, | ||||||
|  | @ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) | ||||||
| 	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; | 	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; | ||||||
| 	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; | 	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; | ||||||
| 	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; | 	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; | ||||||
|  | 	ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; | ||||||
| 
 | 
 | ||||||
| 	ipv6_route_table = ipv6_route_sysctl_init(net); | 	ipv6_route_table = ipv6_route_sysctl_init(net); | ||||||
| 	if (!ipv6_route_table) | 	if (!ipv6_route_table) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Tom Herbert
				Tom Herbert