igmp: hash a hash table to speedup ip_check_mc_rcu()
After IP route cache removal, multicast applications using a lot of multicast addresses hit a O(N) behavior in ip_check_mc_rcu() Add a per in_device hash table to get faster lookup. This hash table is created only if the number of items in mc_list is above 4. Reported-by: Shawn Bohrer <sbohrer@rgmadvisors.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Tested-by: Shawn Bohrer <sbohrer@rgmadvisors.com> Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
					parent
					
						
							
								64153ce0a7
							
						
					
				
			
			
				commit
				
					
						e989707135
					
				
			
		
					 4 changed files with 77 additions and 3 deletions
				
			
		|  | @ -84,6 +84,7 @@ struct ip_mc_list { | ||||||
| 		struct ip_mc_list *next; | 		struct ip_mc_list *next; | ||||||
| 		struct ip_mc_list __rcu *next_rcu; | 		struct ip_mc_list __rcu *next_rcu; | ||||||
| 	}; | 	}; | ||||||
|  | 	struct ip_mc_list __rcu *next_hash; | ||||||
| 	struct timer_list	timer; | 	struct timer_list	timer; | ||||||
| 	int			users; | 	int			users; | ||||||
| 	atomic_t		refcnt; | 	atomic_t		refcnt; | ||||||
|  |  | ||||||
|  | @ -50,12 +50,17 @@ struct ipv4_devconf { | ||||||
| 	DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); | 	DECLARE_BITMAP(state, IPV4_DEVCONF_MAX); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | #define MC_HASH_SZ_LOG 9 | ||||||
|  | 
 | ||||||
| struct in_device { | struct in_device { | ||||||
| 	struct net_device	*dev; | 	struct net_device	*dev; | ||||||
| 	atomic_t		refcnt; | 	atomic_t		refcnt; | ||||||
| 	int			dead; | 	int			dead; | ||||||
| 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/ | 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/ | ||||||
|  | 
 | ||||||
| 	struct ip_mc_list __rcu	*mc_list;	/* IP multicast filter chain    */ | 	struct ip_mc_list __rcu	*mc_list;	/* IP multicast filter chain    */ | ||||||
|  | 	struct ip_mc_list __rcu	* __rcu *mc_hash; | ||||||
|  | 
 | ||||||
| 	int			mc_count;	/* Number of installed mcasts	*/ | 	int			mc_count;	/* Number of installed mcasts	*/ | ||||||
| 	spinlock_t		mc_tomb_lock; | 	spinlock_t		mc_tomb_lock; | ||||||
| 	struct ip_mc_list	*mc_tomb; | 	struct ip_mc_list	*mc_tomb; | ||||||
|  |  | ||||||
|  | @ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) | ||||||
| 
 | 
 | ||||||
| 	WARN_ON(idev->ifa_list); | 	WARN_ON(idev->ifa_list); | ||||||
| 	WARN_ON(idev->mc_list); | 	WARN_ON(idev->mc_list); | ||||||
|  | 	kfree(rcu_dereference_protected(idev->mc_hash, 1)); | ||||||
| #ifdef NET_REFCNT_DEBUG | #ifdef NET_REFCNT_DEBUG | ||||||
| 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); | 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | @ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) | ||||||
|  *	Multicast list managers |  *	Multicast list managers | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
|  | static u32 ip_mc_hash(const struct ip_mc_list *im) | ||||||
|  | { | ||||||
|  | 	return hash_32((u32)im->multiaddr, MC_HASH_SZ_LOG); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void ip_mc_hash_add(struct in_device *in_dev, | ||||||
|  | 			   struct ip_mc_list *im) | ||||||
|  | { | ||||||
|  | 	struct ip_mc_list __rcu **mc_hash; | ||||||
|  | 	u32 hash; | ||||||
|  | 
 | ||||||
|  | 	mc_hash = rtnl_dereference(in_dev->mc_hash); | ||||||
|  | 	if (mc_hash) { | ||||||
|  | 		hash = ip_mc_hash(im); | ||||||
|  | 		im->next_hash = rtnl_dereference(mc_hash[hash]); | ||||||
|  | 		rcu_assign_pointer(mc_hash[hash], im); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* do not use a hash table for small number of items */ | ||||||
|  | 	if (in_dev->mc_count < 4) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, | ||||||
|  | 			  GFP_KERNEL); | ||||||
|  | 	if (!mc_hash) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	for_each_pmc_rtnl(in_dev, im) { | ||||||
|  | 		hash = ip_mc_hash(im); | ||||||
|  | 		im->next_hash = rtnl_dereference(mc_hash[hash]); | ||||||
|  | 		RCU_INIT_POINTER(mc_hash[hash], im); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	rcu_assign_pointer(in_dev->mc_hash, mc_hash); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void ip_mc_hash_remove(struct in_device *in_dev, | ||||||
|  | 			      struct ip_mc_list *im) | ||||||
|  | { | ||||||
|  | 	struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); | ||||||
|  | 	struct ip_mc_list *aux; | ||||||
|  | 
 | ||||||
|  | 	if (!mc_hash) | ||||||
|  | 		return; | ||||||
|  | 	mc_hash += ip_mc_hash(im); | ||||||
|  | 	while ((aux = rtnl_dereference(*mc_hash)) != im) | ||||||
|  | 		mc_hash = &aux->next_hash; | ||||||
|  | 	*mc_hash = im->next_hash; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  *	A socket has joined a multicast group on device dev. |  *	A socket has joined a multicast group on device dev. | ||||||
|  | @ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | ||||||
| 	in_dev->mc_count++; | 	in_dev->mc_count++; | ||||||
| 	rcu_assign_pointer(in_dev->mc_list, im); | 	rcu_assign_pointer(in_dev->mc_list, im); | ||||||
| 
 | 
 | ||||||
|  | 	ip_mc_hash_add(in_dev, im); | ||||||
|  | 
 | ||||||
| #ifdef CONFIG_IP_MULTICAST | #ifdef CONFIG_IP_MULTICAST | ||||||
| 	igmpv3_del_delrec(in_dev, im->multiaddr); | 	igmpv3_del_delrec(in_dev, im->multiaddr); | ||||||
| #endif | #endif | ||||||
|  | @ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | ||||||
| 	     ip = &i->next_rcu) { | 	     ip = &i->next_rcu) { | ||||||
| 		if (i->multiaddr == addr) { | 		if (i->multiaddr == addr) { | ||||||
| 			if (--i->users == 0) { | 			if (--i->users == 0) { | ||||||
|  | 				ip_mc_hash_remove(in_dev, i); | ||||||
| 				*ip = i->next_rcu; | 				*ip = i->next_rcu; | ||||||
| 				in_dev->mc_count--; | 				in_dev->mc_count--; | ||||||
| 				igmp_group_dropped(i); | 				igmp_group_dropped(i); | ||||||
|  | @ -2321,12 +2375,25 @@ void ip_mc_drop_socket(struct sock *sk) | ||||||
| int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) | ||||||
| { | { | ||||||
| 	struct ip_mc_list *im; | 	struct ip_mc_list *im; | ||||||
|  | 	struct ip_mc_list __rcu **mc_hash; | ||||||
| 	struct ip_sf_list *psf; | 	struct ip_sf_list *psf; | ||||||
| 	int rv = 0; | 	int rv = 0; | ||||||
| 
 | 
 | ||||||
| 	for_each_pmc_rcu(in_dev, im) { | 	mc_hash = rcu_dereference(in_dev->mc_hash); | ||||||
| 		if (im->multiaddr == mc_addr) | 	if (mc_hash) { | ||||||
| 			break; | 		u32 hash = hash_32((u32)mc_addr, MC_HASH_SZ_LOG); | ||||||
|  | 
 | ||||||
|  | 		for (im = rcu_dereference(mc_hash[hash]); | ||||||
|  | 		     im != NULL; | ||||||
|  | 		     im = rcu_dereference(im->next_hash)) { | ||||||
|  | 			if (im->multiaddr == mc_addr) | ||||||
|  | 				break; | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		for_each_pmc_rcu(in_dev, im) { | ||||||
|  | 			if (im->multiaddr == mc_addr) | ||||||
|  | 				break; | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	if (im && proto == IPPROTO_IGMP) { | 	if (im && proto == IPPROTO_IGMP) { | ||||||
| 		rv = 1; | 		rv = 1; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Eric Dumazet
				Eric Dumazet