| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  *		INETPEER - A storage for permanent information about peers | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Authors:	Andrey V. Savochkin <saw@msu.ru> | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef _NET_INETPEER_H
 | 
					
						
							|  |  |  | #define _NET_INETPEER_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/types.h>
 | 
					
						
							|  |  |  | #include <linux/init.h>
 | 
					
						
							|  |  |  | #include <linux/jiffies.h>
 | 
					
						
							|  |  |  | #include <linux/spinlock.h>
 | 
					
						
							| 
									
										
										
										
											2011-01-26 20:55:53 -08:00
										 |  |  | #include <linux/rtnetlink.h>
 | 
					
						
							| 
									
										
										
										
											2010-11-30 12:20:00 -08:00
										 |  |  | #include <net/ipv6.h>
 | 
					
						
							| 
									
										
										
										
											2011-07-26 16:09:06 -07:00
										 |  |  | #include <linux/atomic.h>
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-02-09 14:30:26 -08:00
										 |  |  | struct inetpeer_addr_base { | 
					
						
							| 
									
										
										
										
											2010-11-30 11:53:55 -08:00
										 |  |  | 	union { | 
					
						
							| 
									
										
										
										
											2011-02-09 14:30:26 -08:00
										 |  |  | 		__be32			a4; | 
					
						
							|  |  |  | 		__be32			a6[4]; | 
					
						
							| 
									
										
										
										
											2010-11-30 11:53:55 -08:00
										 |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2011-02-09 14:30:26 -08:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct inetpeer_addr { | 
					
						
							|  |  |  | 	struct inetpeer_addr_base	addr; | 
					
						
							|  |  |  | 	__u16				family; | 
					
						
							| 
									
										
										
										
											2010-12-01 17:28:18 -08:00
										 |  |  | }; | 
					
						
							| 
									
										
										
										
											2010-11-30 11:53:55 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-11-03 03:26:03 +00:00
										 |  |  | struct inet_peer { | 
					
						
							| 
									
										
										
										
											2006-10-20 00:28:35 -07:00
										 |  |  | 	/* group together avl_left,avl_right,v4daddr to speedup lookups */ | 
					
						
							| 
									
										
										
										
											2010-10-25 23:55:38 +00:00
										 |  |  | 	struct inet_peer __rcu	*avl_left, *avl_right; | 
					
						
							| 
									
										
										
										
											2010-12-01 17:28:18 -08:00
										 |  |  | 	struct inetpeer_addr	daddr; | 
					
						
							| 
									
										
										
										
											2009-11-12 09:33:09 +00:00
										 |  |  | 	__u32			avl_height; | 
					
						
							| 
									
										
										
										
											2011-06-08 23:31:27 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	u32			metrics[RTAX_MAX]; | 
					
						
							|  |  |  | 	u32			rate_tokens;	/* rate limiting for ICMP */ | 
					
						
							|  |  |  | 	unsigned long		rate_last; | 
					
						
							| 
									
										
										
										
											2012-06-05 03:00:18 +00:00
										 |  |  | 	union { | 
					
						
							|  |  |  | 		struct list_head	gc_list; | 
					
						
							|  |  |  | 		struct rcu_head     gc_rcu; | 
					
						
							|  |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2010-06-16 04:52:13 +00:00
										 |  |  | 	/*
 | 
					
						
							| 
									
										
											  
											
												inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
   with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
   is about 20.
4) If server deals with many tcp flows, we have a high probability of
   not finding the inet_peer, allocating a fresh one, inserting it in
   the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2014-06-02 05:26:03 -07:00
										 |  |  | 	 * Once inet_peer is queued for deletion (refcnt == -1), following field | 
					
						
							|  |  |  | 	 * is not available: rid | 
					
						
							| 
									
										
										
										
											2011-01-26 20:55:53 -08:00
										 |  |  | 	 * We can share memory with rcu_head to help keep inet_peer small. | 
					
						
							| 
									
										
										
										
											2010-06-16 04:52:13 +00:00
										 |  |  | 	 */ | 
					
						
							|  |  |  | 	union { | 
					
						
							|  |  |  | 		struct { | 
					
						
							| 
									
										
										
										
											2011-02-09 15:36:47 -08:00
										 |  |  | 			atomic_t			rid;		/* Frag reception counter */ | 
					
						
							| 
									
										
										
										
											2010-06-16 04:52:13 +00:00
										 |  |  | 		}; | 
					
						
							|  |  |  | 		struct rcu_head         rcu; | 
					
						
							| 
									
										
										
										
											2011-06-08 13:35:34 +00:00
										 |  |  | 		struct inet_peer	*gc_next; | 
					
						
							| 
									
										
										
										
											2010-06-16 04:52:13 +00:00
										 |  |  | 	}; | 
					
						
							| 
									
										
										
										
											2011-06-08 23:31:27 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* following fields might be frequently dirtied */ | 
					
						
							|  |  |  | 	__u32			dtime;	/* the time of last use of not referenced entries */ | 
					
						
							|  |  |  | 	atomic_t		refcnt; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-09 16:27:05 -07:00
										 |  |  | struct inet_peer_base { | 
					
						
							|  |  |  | 	struct inet_peer __rcu	*root; | 
					
						
							|  |  |  | 	seqlock_t		lock; | 
					
						
							| 
									
										
										
										
											2012-06-10 00:24:21 -07:00
										 |  |  | 	u32			flush_seq; | 
					
						
							| 
									
										
										
										
											2012-06-09 16:27:05 -07:00
										 |  |  | 	int			total; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-09 22:36:36 -07:00
										 |  |  | #define INETPEER_BASE_BIT	0x1UL
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline struct inet_peer *inetpeer_ptr(unsigned long val) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	BUG_ON(val & INETPEER_BASE_BIT); | 
					
						
							|  |  |  | 	return (struct inet_peer *) val; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (!(val & INETPEER_BASE_BIT)) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	val &= ~INETPEER_BASE_BIT; | 
					
						
							|  |  |  | 	return (struct inet_peer_base *) val; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline bool inetpeer_ptr_is_peer(unsigned long val) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return !(val & INETPEER_BASE_BIT); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* This implicitly clears INETPEER_BASE_BIT */ | 
					
						
							|  |  |  | 	*val = (unsigned long) peer; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	unsigned long val = (unsigned long) peer; | 
					
						
							|  |  |  | 	unsigned long orig = *ptr; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-11 04:13:57 -07:00
										 |  |  | 	if (!(orig & INETPEER_BASE_BIT) || | 
					
						
							| 
									
										
										
										
											2012-06-09 22:36:36 -07:00
										 |  |  | 	    cmpxchg(ptr, orig, val) != orig) | 
					
						
							|  |  |  | 		return false; | 
					
						
							|  |  |  | 	return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	*ptr = (unsigned long) base | INETPEER_BASE_BIT; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	unsigned long val = *from; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	*to = val; | 
					
						
							|  |  |  | 	if (inetpeer_ptr_is_peer(val)) { | 
					
						
							|  |  |  | 		struct inet_peer *peer = inetpeer_ptr(val); | 
					
						
							|  |  |  | 		atomic_inc(&peer->refcnt); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 10:22:41 -07:00
										 |  |  | void inet_peer_base_init(struct inet_peer_base *); | 
					
						
							| 
									
										
										
										
											2012-06-09 16:27:05 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 10:22:41 -07:00
										 |  |  | void inet_initpeers(void) __init; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-01-27 13:52:16 -08:00
										 |  |  | #define INETPEER_METRICS_NEW	(~(u32) 0)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline bool inet_metrics_new(const struct inet_peer *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return p->metrics[RTAX_LOCK-1] == INETPEER_METRICS_NEW; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* can be called with or without local BH being disabled */ | 
					
						
							| 
									
										
										
										
											2012-06-09 19:12:36 -07:00
										 |  |  | struct inet_peer *inet_getpeer(struct inet_peer_base *base, | 
					
						
							| 
									
										
										
										
											2012-06-08 01:20:41 +00:00
										 |  |  | 			       const struct inetpeer_addr *daddr, | 
					
						
							|  |  |  | 			       int create); | 
					
						
							| 
									
										
										
										
											2010-11-30 11:54:19 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-09 19:12:36 -07:00
										 |  |  | static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, | 
					
						
							| 
									
										
										
										
											2012-06-08 01:21:40 +00:00
										 |  |  | 						__be32 v4daddr, | 
					
						
							|  |  |  | 						int create) | 
					
						
							| 
									
										
										
										
											2010-11-30 11:54:19 -08:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2010-12-01 17:28:18 -08:00
										 |  |  | 	struct inetpeer_addr daddr; | 
					
						
							| 
									
										
										
										
											2010-11-30 11:54:19 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-02-09 14:30:26 -08:00
										 |  |  | 	daddr.addr.a4 = v4daddr; | 
					
						
							| 
									
										
										
										
											2010-11-30 11:54:19 -08:00
										 |  |  | 	daddr.family = AF_INET; | 
					
						
							| 
									
										
										
										
											2012-06-09 19:12:36 -07:00
										 |  |  | 	return inet_getpeer(base, &daddr, create); | 
					
						
							| 
									
										
										
										
											2010-11-30 11:54:19 -08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-06-09 19:12:36 -07:00
										 |  |  | static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, | 
					
						
							| 
									
										
										
										
											2012-06-08 01:21:40 +00:00
										 |  |  | 						const struct in6_addr *v6daddr, | 
					
						
							|  |  |  | 						int create) | 
					
						
							| 
									
										
										
										
											2010-11-30 12:20:00 -08:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2010-12-01 17:28:18 -08:00
										 |  |  | 	struct inetpeer_addr daddr; | 
					
						
							| 
									
										
										
										
											2010-11-30 12:20:00 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-21 03:39:03 +00:00
										 |  |  | 	*(struct in6_addr *)daddr.addr.a6 = *v6daddr; | 
					
						
							| 
									
										
										
										
											2010-11-30 12:20:00 -08:00
										 |  |  | 	daddr.family = AF_INET6; | 
					
						
							| 
									
										
										
										
											2012-06-09 19:12:36 -07:00
										 |  |  | 	return inet_getpeer(base, &daddr, create); | 
					
						
							| 
									
										
										
										
											2010-11-30 12:20:00 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* can be called from BH context or outside */ | 
					
						
							| 
									
										
										
										
											2013-09-21 10:22:41 -07:00
										 |  |  | void inet_putpeer(struct inet_peer *p); | 
					
						
							|  |  |  | bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-21 10:22:41 -07:00
										 |  |  | void inetpeer_invalidate_tree(struct inet_peer_base *); | 
					
						
							| 
									
										
										
										
											2012-03-06 21:20:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-06-16 04:52:13 +00:00
										 |  |  | /*
 | 
					
						
							| 
									
										
											  
											
												inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
   with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
   is about 20.
4) If server deals with many tcp flows, we have a high probability of
   not finding the inet_peer, allocating a fresh one, inserting it in
   the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2014-06-02 05:26:03 -07:00
										 |  |  |  * temporary check to make sure we dont access rid, tcp_ts, | 
					
						
							| 
									
										
										
										
											2010-06-16 04:52:13 +00:00
										 |  |  |  * tcp_ts_stamp if no refcount is taken on inet_peer | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void inet_peer_refcheck(const struct inet_peer *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif /* _NET_INETPEER_H */
 |