 645ca708f9
			
		
	
	
	645ca708f9
	
	
	
		
			
			UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
		
			
				
	
	
		
			121 lines
		
	
	
	
		
			3.4 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			121 lines
		
	
	
	
		
			3.4 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  *	Definitions for the UDP-Lite (RFC 3828) code.
 | |
|  */
 | |
| #ifndef _UDPLITE_H
 | |
| #define _UDPLITE_H
 | |
| 
 | |
| #include <net/ip6_checksum.h>
 | |
| 
 | |
| /* UDP-Lite socket options */
 | |
| #define UDPLITE_SEND_CSCOV   10 /* sender partial coverage (as sent)      */
 | |
| #define UDPLITE_RECV_CSCOV   11 /* receiver partial coverage (threshold ) */
 | |
| 
 | |
| extern struct proto 		udplite_prot;
 | |
| extern struct udp_table		udplite_table;
 | |
| 
 | |
| /*
 | |
|  *	Checksum computation is all in software, hence simpler getfrag.
 | |
|  */
 | |
| static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 | |
| 				      int len, int odd, struct sk_buff *skb)
 | |
| {
 | |
| 	return memcpy_fromiovecend(to, (struct iovec *) from, offset, len);
 | |
| }
 | |
| 
 | |
| /* Designate sk as UDP-Lite socket */
 | |
| static inline int udplite_sk_init(struct sock *sk)
 | |
| {
 | |
| 	udp_sk(sk)->pcflag = UDPLITE_BIT;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * 	Checksumming routines
 | |
|  */
 | |
| static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
 | |
| {
 | |
| 	u16 cscov;
 | |
| 
 | |
|         /* In UDPv4 a zero checksum means that the transmitter generated no
 | |
|          * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets
 | |
|          * with a zero checksum field are illegal.                            */
 | |
| 	if (uh->check == 0) {
 | |
| 		LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: zeroed checksum field\n");
 | |
| 		return 1;
 | |
| 	}
 | |
| 
 | |
| 	cscov = ntohs(uh->len);
 | |
| 
 | |
| 	if (cscov == 0)		 /* Indicates that full coverage is required. */
 | |
| 		;
 | |
| 	else if (cscov < 8  || cscov > skb->len) {
 | |
| 		/*
 | |
| 		 * Coverage length violates RFC 3828: log and discard silently.
 | |
| 		 */
 | |
| 		LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: bad csum coverage %d/%d\n",
 | |
| 			       cscov, skb->len);
 | |
| 		return 1;
 | |
| 
 | |
| 	} else if (cscov < skb->len) {
 | |
|         	UDP_SKB_CB(skb)->partial_cov = 1;
 | |
| 		UDP_SKB_CB(skb)->cscov = cscov;
 | |
| 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 | |
| 			skb->ip_summed = CHECKSUM_NONE;
 | |
|         }
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
 | |
| {
 | |
| 	int cscov = up->len;
 | |
| 
 | |
| 	/*
 | |
| 	 * Sender has set `partial coverage' option on UDP-Lite socket
 | |
| 	 */
 | |
| 	if (up->pcflag & UDPLITE_SEND_CC)    {
 | |
| 		if (up->pcslen < up->len) {
 | |
| 		/* up->pcslen == 0 means that full coverage is required,
 | |
| 		 * partial coverage only if  0 < up->pcslen < up->len */
 | |
| 			if (0 < up->pcslen) {
 | |
| 			       cscov = up->pcslen;
 | |
| 			}
 | |
| 			uh->len = htons(up->pcslen);
 | |
| 		}
 | |
| 	/*
 | |
| 	 * NOTE: Causes for the error case  `up->pcslen > up->len':
 | |
| 	 *        (i)  Application error (will not be penalized).
 | |
| 	 *       (ii)  Payload too big for send buffer: data is split
 | |
| 	 *             into several packets, each with its own header.
 | |
| 	 *             In this case (e.g. last segment), coverage may
 | |
| 	 *             exceed packet length.
 | |
| 	 *       Since packets with coverage length > packet length are
 | |
| 	 *       illegal, we fall back to the defaults here.
 | |
| 	 */
 | |
| 	}
 | |
| 	return cscov;
 | |
| }
 | |
| 
 | |
| static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
 | |
| {
 | |
| 	int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb));
 | |
| 	__wsum csum = 0;
 | |
| 
 | |
| 	skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
 | |
| 
 | |
| 	skb_queue_walk(&sk->sk_write_queue, skb) {
 | |
| 		const int off = skb_transport_offset(skb);
 | |
| 		const int len = skb->len - off;
 | |
| 
 | |
| 		csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
 | |
| 
 | |
| 		if ((cscov -= len) <= 0)
 | |
| 			break;
 | |
| 	}
 | |
| 	return csum;
 | |
| }
 | |
| 
 | |
| extern void	udplite4_register(void);
 | |
| extern int 	udplite_get_port(struct sock *sk, unsigned short snum,
 | |
| 			int (*scmp)(const struct sock *, const struct sock *));
 | |
| #endif	/* _UDPLITE_H */
 |