 aacd9289af
			
		
	
	
	aacd9289af
	
	
	
		
			
			We must try harder to get unique (addr, port) pairs when
doing port autoselection for sockets with SO_REUSEADDR
option set.
We achieve this by adding a relaxation parameter to
inet_csk_bind_conflict. When 'relax' parameter is off
we return a conflict whenever the current searched
pair (addr, port) is not unique.
This tries to address the problems reported in patch:
	8d238b25b1
	Revert "tcp: bind() fix when many ports are bound"
Tests where ran for creating and binding(0) many sockets
on 100 IPs. The results are, on average:
	* 60000 sockets, 600 ports / IP:
		* 0.210 s, 620 (IP, port) duplicates without patch
		* 0.219 s, no duplicates with patch
	* 100000 sockets, 1000 ports / IP:
		* 0.371 s, 1720 duplicates without patch
		* 0.373 s, no duplicates with patch
	* 200000 sockets, 2000 ports / IP:
		* 0.766 s, 6900 duplicates without patch
		* 0.768 s, no duplicates with patch
	* 500000 sockets, 5000 ports / IP:
		* 2.227 s, 41500 duplicates without patch
		* 2.284 s, no duplicates with patch
Signed-off-by: Alex Copot <alex.mihai.c@gmail.com>
Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
		
	
			
		
			
				
	
	
		
			255 lines
		
	
	
	
		
			6.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			255 lines
		
	
	
	
		
			6.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * INET        An implementation of the TCP/IP protocol suite for the LINUX
 | |
|  *             operating system.  INET is implemented using the  BSD Socket
 | |
|  *             interface as the means of communication with the user level.
 | |
|  *
 | |
|  *             Support for INET6 connection oriented protocols.
 | |
|  *
 | |
|  * Authors:    See the TCPv6 sources
 | |
|  *
 | |
|  *             This program is free software; you can redistribute it and/or
 | |
|  *             modify it under the terms of the GNU General Public License
 | |
|  *             as published by the Free Software Foundation; either version
 | |
|  *             2 of the License, or(at your option) any later version.
 | |
|  */
 | |
| 
 | |
| #include <linux/module.h>
 | |
| #include <linux/in6.h>
 | |
| #include <linux/ipv6.h>
 | |
| #include <linux/jhash.h>
 | |
| #include <linux/slab.h>
 | |
| 
 | |
| #include <net/addrconf.h>
 | |
| #include <net/inet_connection_sock.h>
 | |
| #include <net/inet_ecn.h>
 | |
| #include <net/inet_hashtables.h>
 | |
| #include <net/ip6_route.h>
 | |
| #include <net/sock.h>
 | |
| #include <net/inet6_connection_sock.h>
 | |
| 
 | |
| int inet6_csk_bind_conflict(const struct sock *sk,
 | |
| 			    const struct inet_bind_bucket *tb, bool relax)
 | |
| {
 | |
| 	const struct sock *sk2;
 | |
| 	const struct hlist_node *node;
 | |
| 
 | |
| 	/* We must walk the whole port owner list in this case. -DaveM */
 | |
| 	/*
 | |
| 	 * See comment in inet_csk_bind_conflict about sock lookup
 | |
| 	 * vs net namespaces issues.
 | |
| 	 */
 | |
| 	sk_for_each_bound(sk2, node, &tb->owners) {
 | |
| 		if (sk != sk2 &&
 | |
| 		    (!sk->sk_bound_dev_if ||
 | |
| 		     !sk2->sk_bound_dev_if ||
 | |
| 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 | |
| 		    (!sk->sk_reuse || !sk2->sk_reuse ||
 | |
| 		     sk2->sk_state == TCP_LISTEN) &&
 | |
| 		     ipv6_rcv_saddr_equal(sk, sk2))
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	return node != NULL;
 | |
| }
 | |
| 
 | |
| EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
 | |
| 
 | |
| struct dst_entry *inet6_csk_route_req(struct sock *sk,
 | |
| 				      const struct request_sock *req)
 | |
| {
 | |
| 	struct inet6_request_sock *treq = inet6_rsk(req);
 | |
| 	struct ipv6_pinfo *np = inet6_sk(sk);
 | |
| 	struct in6_addr *final_p, final;
 | |
| 	struct dst_entry *dst;
 | |
| 	struct flowi6 fl6;
 | |
| 
 | |
| 	memset(&fl6, 0, sizeof(fl6));
 | |
| 	fl6.flowi6_proto = IPPROTO_TCP;
 | |
| 	fl6.daddr = treq->rmt_addr;
 | |
| 	final_p = fl6_update_dst(&fl6, np->opt, &final);
 | |
| 	fl6.saddr = treq->loc_addr;
 | |
| 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 | |
| 	fl6.flowi6_mark = sk->sk_mark;
 | |
| 	fl6.fl6_dport = inet_rsk(req)->rmt_port;
 | |
| 	fl6.fl6_sport = inet_rsk(req)->loc_port;
 | |
| 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 | |
| 
 | |
| 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
 | |
| 	if (IS_ERR(dst))
 | |
| 		return NULL;
 | |
| 
 | |
| 	return dst;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * request_sock (formerly open request) hash tables.
 | |
|  */
 | |
| static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
 | |
| 			   const u32 rnd, const u32 synq_hsize)
 | |
| {
 | |
| 	u32 c;
 | |
| 
 | |
| 	c = jhash_3words((__force u32)raddr->s6_addr32[0],
 | |
| 			 (__force u32)raddr->s6_addr32[1],
 | |
| 			 (__force u32)raddr->s6_addr32[2],
 | |
| 			 rnd);
 | |
| 
 | |
| 	c = jhash_2words((__force u32)raddr->s6_addr32[3],
 | |
| 			 (__force u32)rport,
 | |
| 			 c);
 | |
| 
 | |
| 	return c & (synq_hsize - 1);
 | |
| }
 | |
| 
 | |
| struct request_sock *inet6_csk_search_req(const struct sock *sk,
 | |
| 					  struct request_sock ***prevp,
 | |
| 					  const __be16 rport,
 | |
| 					  const struct in6_addr *raddr,
 | |
| 					  const struct in6_addr *laddr,
 | |
| 					  const int iif)
 | |
| {
 | |
| 	const struct inet_connection_sock *icsk = inet_csk(sk);
 | |
| 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
 | |
| 	struct request_sock *req, **prev;
 | |
| 
 | |
| 	for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport,
 | |
| 						     lopt->hash_rnd,
 | |
| 						     lopt->nr_table_entries)];
 | |
| 	     (req = *prev) != NULL;
 | |
| 	     prev = &req->dl_next) {
 | |
| 		const struct inet6_request_sock *treq = inet6_rsk(req);
 | |
| 
 | |
| 		if (inet_rsk(req)->rmt_port == rport &&
 | |
| 		    req->rsk_ops->family == AF_INET6 &&
 | |
| 		    ipv6_addr_equal(&treq->rmt_addr, raddr) &&
 | |
| 		    ipv6_addr_equal(&treq->loc_addr, laddr) &&
 | |
| 		    (!treq->iif || treq->iif == iif)) {
 | |
| 			WARN_ON(req->sk != NULL);
 | |
| 			*prevp = prev;
 | |
| 			return req;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| EXPORT_SYMBOL_GPL(inet6_csk_search_req);
 | |
| 
 | |
| void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 | |
| 				    struct request_sock *req,
 | |
| 				    const unsigned long timeout)
 | |
| {
 | |
| 	struct inet_connection_sock *icsk = inet_csk(sk);
 | |
| 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
 | |
| 	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
 | |
| 				      inet_rsk(req)->rmt_port,
 | |
| 				      lopt->hash_rnd, lopt->nr_table_entries);
 | |
| 
 | |
| 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
 | |
| 	inet_csk_reqsk_queue_added(sk, timeout);
 | |
| }
 | |
| 
 | |
| EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
 | |
| 
 | |
| void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 | |
| {
 | |
| 	struct ipv6_pinfo *np = inet6_sk(sk);
 | |
| 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
 | |
| 
 | |
| 	sin6->sin6_family = AF_INET6;
 | |
| 	sin6->sin6_addr = np->daddr;
 | |
| 	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 | |
| 	/* We do not store received flowlabel for TCP */
 | |
| 	sin6->sin6_flowinfo = 0;
 | |
| 	sin6->sin6_scope_id = 0;
 | |
| 	if (sk->sk_bound_dev_if &&
 | |
| 	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 | |
| 		sin6->sin6_scope_id = sk->sk_bound_dev_if;
 | |
| }
 | |
| 
 | |
| EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
 | |
| 
 | |
| static inline
 | |
| void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
 | |
| 			   struct in6_addr *daddr, struct in6_addr *saddr)
 | |
| {
 | |
| 	__ip6_dst_store(sk, dst, daddr, saddr);
 | |
| 
 | |
| #ifdef CONFIG_XFRM
 | |
| 	{
 | |
| 		struct rt6_info *rt = (struct rt6_info  *)dst;
 | |
| 		rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
 | |
| 	}
 | |
| #endif
 | |
| }
 | |
| 
 | |
| static inline
 | |
| struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 | |
| {
 | |
| 	struct dst_entry *dst;
 | |
| 
 | |
| 	dst = __sk_dst_check(sk, cookie);
 | |
| 
 | |
| #ifdef CONFIG_XFRM
 | |
| 	if (dst) {
 | |
| 		struct rt6_info *rt = (struct rt6_info *)dst;
 | |
| 		if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
 | |
| 			__sk_dst_reset(sk);
 | |
| 			dst = NULL;
 | |
| 		}
 | |
| 	}
 | |
| #endif
 | |
| 
 | |
| 	return dst;
 | |
| }
 | |
| 
 | |
| int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 | |
| {
 | |
| 	struct sock *sk = skb->sk;
 | |
| 	struct inet_sock *inet = inet_sk(sk);
 | |
| 	struct ipv6_pinfo *np = inet6_sk(sk);
 | |
| 	struct flowi6 fl6;
 | |
| 	struct dst_entry *dst;
 | |
| 	struct in6_addr *final_p, final;
 | |
| 	int res;
 | |
| 
 | |
| 	memset(&fl6, 0, sizeof(fl6));
 | |
| 	fl6.flowi6_proto = sk->sk_protocol;
 | |
| 	fl6.daddr = np->daddr;
 | |
| 	fl6.saddr = np->saddr;
 | |
| 	fl6.flowlabel = np->flow_label;
 | |
| 	IP6_ECN_flow_xmit(sk, fl6.flowlabel);
 | |
| 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 | |
| 	fl6.flowi6_mark = sk->sk_mark;
 | |
| 	fl6.fl6_sport = inet->inet_sport;
 | |
| 	fl6.fl6_dport = inet->inet_dport;
 | |
| 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 | |
| 
 | |
| 	final_p = fl6_update_dst(&fl6, np->opt, &final);
 | |
| 
 | |
| 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 | |
| 
 | |
| 	if (dst == NULL) {
 | |
| 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
 | |
| 
 | |
| 		if (IS_ERR(dst)) {
 | |
| 			sk->sk_err_soft = -PTR_ERR(dst);
 | |
| 			sk->sk_route_caps = 0;
 | |
| 			kfree_skb(skb);
 | |
| 			return PTR_ERR(dst);
 | |
| 		}
 | |
| 
 | |
| 		__inet6_csk_dst_store(sk, dst, NULL, NULL);
 | |
| 	}
 | |
| 
 | |
| 	rcu_read_lock();
 | |
| 	skb_dst_set_noref(skb, dst);
 | |
| 
 | |
| 	/* Restore final destination back after routing done */
 | |
| 	fl6.daddr = np->daddr;
 | |
| 
 | |
| 	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 | |
| 	rcu_read_unlock();
 | |
| 	return res;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(inet6_csk_xmit);
 |