net: more accurate skb truesize
skb truesize currently accounts for sk_buff struct and part of skb head. kmalloc() roundings are also ignored. Considering that skb_shared_info is larger than sk_buff, its time to take it into account for better memory accounting. This patch introduces SKB_TRUESIZE(X) macro to centralize various assumptions into a single place. At skb alloc phase, we put skb_shared_info struct at the exact end of skb head, to allow a better use of memory (lowering number of reallocations), since kmalloc() gives us power-of-two memory blocks. Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are aligned to cache lines, as before. Note: This patch might trigger performance regressions because of misconfigured protocol stacks, hitting per socket or global memory limits that were previously not reached. But its a necessary step for a more accurate memory accounting. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Andi Kleen <ak@linux.intel.com> CC: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
					parent
					
						
							
								97ba0eb64c
							
						
					
				
			
			
				commit
				
					
						87fb4b7b53
					
				
			
		
					 8 changed files with 32 additions and 19 deletions
				
			
		|  | @ -46,6 +46,11 @@ | |||
| #define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0)) | ||||
| #define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2)) | ||||
| 
 | ||||
| /* return minimum truesize of one skb containing X bytes of data */ | ||||
| #define SKB_TRUESIZE(X) ((X) +						\ | ||||
| 			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\ | ||||
| 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) | ||||
| 
 | ||||
| /* A. Checksumming of received packets by device.
 | ||||
|  * | ||||
|  *	NONE: device failed to checksum this packet. | ||||
|  |  | |||
|  | @ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 		goto out; | ||||
| 	prefetchw(skb); | ||||
| 
 | ||||
| 	size = SKB_DATA_ALIGN(size); | ||||
| 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | ||||
| 			gfp_mask, node); | ||||
| 	/* We do our best to align skb_shared_info on a separate cache
 | ||||
| 	 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives | ||||
| 	 * aligned memory blocks, unless SLUB/SLAB debug is enabled. | ||||
| 	 * Both skb->head and skb_shared_info are cache line aligned. | ||||
| 	 */ | ||||
| 	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | ||||
| 	data = kmalloc_node_track_caller(size, gfp_mask, node); | ||||
| 	if (!data) | ||||
| 		goto nodata; | ||||
| 	/* kmalloc(size) might give us more room than requested.
 | ||||
| 	 * Put skb_shared_info exactly at the end of allocated zone, | ||||
| 	 * to allow max possible filling before reallocation. | ||||
| 	 */ | ||||
| 	size = SKB_WITH_OVERHEAD(ksize(data)); | ||||
| 	prefetchw(data + size); | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 	 * the tail pointer in struct sk_buff! | ||||
| 	 */ | ||||
| 	memset(skb, 0, offsetof(struct sk_buff, tail)); | ||||
| 	skb->truesize = size + sizeof(struct sk_buff); | ||||
| 	/* Account for allocated memory : skb + skb->head */ | ||||
| 	skb->truesize = SKB_TRUESIZE(size); | ||||
| 	atomic_set(&skb->users, 1); | ||||
| 	skb->head = data; | ||||
| 	skb->data = data; | ||||
|  |  | |||
|  | @ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX]; | |||
|  * not depend upon such differences. | ||||
|  */ | ||||
| #define _SK_MEM_PACKETS		256 | ||||
| #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256) | ||||
| #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256) | ||||
| #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | ||||
| #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | ||||
| 
 | ||||
|  |  | |||
|  | @ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net) | |||
| 		net->ipv4.icmp_sk[i] = sk; | ||||
| 
 | ||||
| 		/* Enough space for 2 64K ICMP packets, including
 | ||||
| 		 * sk_buff struct overhead. | ||||
| 		 * sk_buff/skb_shared_info struct overhead. | ||||
| 		 */ | ||||
| 		sk->sk_sndbuf = | ||||
| 			(2 * ((64 * 1024) + sizeof(struct sk_buff))); | ||||
| 		sk->sk_sndbuf =	2 * SKB_TRUESIZE(64 * 1024); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Speedup sock_wfree() | ||||
|  |  | |||
|  | @ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) | |||
| 
 | ||||
| static void tcp_fixup_sndbuf(struct sock *sk) | ||||
| { | ||||
| 	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + | ||||
| 		     sizeof(struct sk_buff); | ||||
| 	int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); | ||||
| 
 | ||||
| 	if (sk->sk_sndbuf < 3 * sndmem) { | ||||
| 		sk->sk_sndbuf = 3 * sndmem; | ||||
|  | @ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) | |||
| static void tcp_fixup_rcvbuf(struct sock *sk) | ||||
| { | ||||
| 	struct tcp_sock *tp = tcp_sk(sk); | ||||
| 	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | ||||
| 	int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); | ||||
| 
 | ||||
| 	/* Try to select rcvbuf so that 4 mss-sized segments
 | ||||
| 	 * will fit to window and corresponding skbs will fit to our rcvbuf. | ||||
|  | @ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk) | |||
| 			space /= tp->advmss; | ||||
| 			if (!space) | ||||
| 				space = 1; | ||||
| 			rcvmem = (tp->advmss + MAX_TCP_HEADER + | ||||
| 				  16 + sizeof(struct sk_buff)); | ||||
| 			rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); | ||||
| 			while (tcp_win_from_space(rcvmem) < tp->advmss) | ||||
| 				rcvmem += 128; | ||||
| 			space *= rcvmem; | ||||
|  | @ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk) | |||
| 	struct tcp_sock *tp = tcp_sk(sk); | ||||
| 
 | ||||
| 	if (tcp_should_expand_sndbuf(sk)) { | ||||
| 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + | ||||
| 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | ||||
| 		int sndmem = SKB_TRUESIZE(max_t(u32, | ||||
| 						tp->rx_opt.mss_clamp, | ||||
| 						tp->mss_cache) + | ||||
| 					  MAX_TCP_HEADER); | ||||
| 		int demanded = max_t(unsigned int, tp->snd_cwnd, | ||||
| 				     tp->reordering + 1); | ||||
| 		sndmem *= 2 * demanded; | ||||
|  |  | |||
|  | @ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net) | |||
| 		/* Enough space for 2 64K ICMP packets, including
 | ||||
| 		 * sk_buff struct overhead. | ||||
| 		 */ | ||||
| 		sk->sk_sndbuf = | ||||
| 			(2 * ((64 * 1024) + sizeof(struct sk_buff))); | ||||
| 		sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); | ||||
| 	} | ||||
| 	return 0; | ||||
| 
 | ||||
|  |  | |||
|  | @ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) | |||
| 		goto save_message; | ||||
| 
 | ||||
| 	len = atomic_read(&sk->sk_rmem_alloc); | ||||
| 	len += iucv_msg_length(msg) + sizeof(struct sk_buff); | ||||
| 	len += SKB_TRUESIZE(iucv_msg_length(msg)); | ||||
| 	if (len > sk->sk_rcvbuf) | ||||
| 		goto save_message; | ||||
| 
 | ||||
|  |  | |||
|  | @ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void) | |||
| 	max_share = min(4UL*1024*1024, limit); | ||||
| 
 | ||||
| 	sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ | ||||
| 	sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); | ||||
| 	sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); | ||||
| 	sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); | ||||
| 
 | ||||
| 	sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Eric Dumazet
				Eric Dumazet