netfilter: conntrack: remove central spinlock nf_conntrack_lock
nf_conntrack_lock is a monolithic lock and suffers from huge contention
on current generation servers (8 or more core/threads).
Perf locking congestion is clear on base kernel:
-  72.56%  ksoftirqd/6  [kernel.kallsyms]    [k] _raw_spin_lock_bh
   - _raw_spin_lock_bh
      + 25.33% init_conntrack
      + 24.86% nf_ct_delete_from_lists
      + 24.62% __nf_conntrack_confirm
      + 24.38% destroy_conntrack
      + 0.70% tcp_packet
+   2.21%  ksoftirqd/6  [kernel.kallsyms]    [k] fib_table_lookup
+   1.15%  ksoftirqd/6  [kernel.kallsyms]    [k] __slab_free
+   0.77%  ksoftirqd/6  [kernel.kallsyms]    [k] inet_getpeer
+   0.70%  ksoftirqd/6  [nf_conntrack]       [k] nf_ct_delete
+   0.55%  ksoftirqd/6  [ip_tables]          [k] ipt_do_table
This patch change conntrack locking and provides a huge performance
improvement.  SYN-flood attack tested on a 24-core E5-2695v2(ES) with
10Gbit/s ixgbe (with tool trafgen):
 Base kernel:   810.405 new conntrack/sec
 After patch: 2.233.876 new conntrack/sec
Notice other floods attack (SYN+ACK or ACK) can easily be deflected using:
 # iptables -A INPUT -m state --state INVALID -j DROP
 # sysctl -w net/netfilter/nf_conntrack_tcp_loose=0
Use an array of hashed spinlocks to protect insertions/deletions of
conntracks into the hash table. 1024 spinlocks seem to give good
results, at minimal cost (4KB memory). Due to lockdep max depth,
1024 becomes 8 if CONFIG_LOCKDEP=y
The hash resize is a bit tricky, because we need to take all locks in
the array. A seqcount_t is used to synchronize the hash table users
with the resizing process.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
	
	
This commit is contained in:
		
					parent
					
						
							
								ca7433df3a
							
						
					
				
			
			
				commit
				
					
						93bb0ceb75
					
				
			
		
					 5 changed files with 188 additions and 67 deletions
				
			
		| 
						 | 
				
			
			@ -60,12 +60,60 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
 | 
			
		|||
				      const struct nlattr *attr) __read_mostly;
 | 
			
		||||
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
 | 
			
		||||
 | 
			
		||||
DEFINE_SPINLOCK(nf_conntrack_lock);
 | 
			
		||||
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 | 
			
		||||
__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 | 
			
		||||
EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 | 
			
		||||
 | 
			
		||||
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 | 
			
		||||
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 | 
			
		||||
 | 
			
		||||
static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
 | 
			
		||||
{
 | 
			
		||||
	h1 %= CONNTRACK_LOCKS;
 | 
			
		||||
	h2 %= CONNTRACK_LOCKS;
 | 
			
		||||
	spin_unlock(&nf_conntrack_locks[h1]);
 | 
			
		||||
	if (h1 != h2)
 | 
			
		||||
		spin_unlock(&nf_conntrack_locks[h2]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* return true if we need to recompute hashes (in case hash table was resized) */
 | 
			
		||||
static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
 | 
			
		||||
				     unsigned int h2, unsigned int sequence)
 | 
			
		||||
{
 | 
			
		||||
	h1 %= CONNTRACK_LOCKS;
 | 
			
		||||
	h2 %= CONNTRACK_LOCKS;
 | 
			
		||||
	if (h1 <= h2) {
 | 
			
		||||
		spin_lock(&nf_conntrack_locks[h1]);
 | 
			
		||||
		if (h1 != h2)
 | 
			
		||||
			spin_lock_nested(&nf_conntrack_locks[h2],
 | 
			
		||||
					 SINGLE_DEPTH_NESTING);
 | 
			
		||||
	} else {
 | 
			
		||||
		spin_lock(&nf_conntrack_locks[h2]);
 | 
			
		||||
		spin_lock_nested(&nf_conntrack_locks[h1],
 | 
			
		||||
				 SINGLE_DEPTH_NESTING);
 | 
			
		||||
	}
 | 
			
		||||
	if (read_seqcount_retry(&net->ct.generation, sequence)) {
 | 
			
		||||
		nf_conntrack_double_unlock(h1, h2);
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void nf_conntrack_all_lock(void)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < CONNTRACK_LOCKS; i++)
 | 
			
		||||
		spin_lock_nested(&nf_conntrack_locks[i], i);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void nf_conntrack_all_unlock(void)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < CONNTRACK_LOCKS; i++)
 | 
			
		||||
		spin_unlock(&nf_conntrack_locks[i]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
unsigned int nf_conntrack_htable_size __read_mostly;
 | 
			
		||||
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -280,15 +328,28 @@ destroy_conntrack(struct nf_conntrack *nfct)
 | 
			
		|||
static void nf_ct_delete_from_lists(struct nf_conn *ct)
 | 
			
		||||
{
 | 
			
		||||
	struct net *net = nf_ct_net(ct);
 | 
			
		||||
	unsigned int hash, reply_hash;
 | 
			
		||||
	u16 zone = nf_ct_zone(ct);
 | 
			
		||||
	unsigned int sequence;
 | 
			
		||||
 | 
			
		||||
	nf_ct_helper_destroy(ct);
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
	/* Inside lock so preempt is disabled on module removal path.
 | 
			
		||||
	 * Otherwise we can get spurious warnings. */
 | 
			
		||||
	NF_CT_STAT_INC(net, delete_list);
 | 
			
		||||
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	do {
 | 
			
		||||
		sequence = read_seqcount_begin(&net->ct.generation);
 | 
			
		||||
		hash = hash_conntrack(net, zone,
 | 
			
		||||
				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 | 
			
		||||
		reply_hash = hash_conntrack(net, zone,
 | 
			
		||||
					   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 | 
			
		||||
	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 | 
			
		||||
 | 
			
		||||
	clean_from_lists(ct);
 | 
			
		||||
	nf_conntrack_double_unlock(hash, reply_hash);
 | 
			
		||||
 | 
			
		||||
	nf_ct_add_to_dying_list(ct);
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
 | 
			
		||||
	NF_CT_STAT_INC(net, delete_list);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void death_by_event(unsigned long ul_conntrack)
 | 
			
		||||
| 
						 | 
				
			
			@ -372,8 +433,6 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 | 
			
		|||
 * Warning :
 | 
			
		||||
 * - Caller must take a reference on returned object
 | 
			
		||||
 *   and recheck nf_ct_tuple_equal(tuple, &h->tuple)
 | 
			
		||||
 * OR
 | 
			
		||||
 * - Caller must lock nf_conntrack_lock before calling this function
 | 
			
		||||
 */
 | 
			
		||||
static struct nf_conntrack_tuple_hash *
 | 
			
		||||
____nf_conntrack_find(struct net *net, u16 zone,
 | 
			
		||||
| 
						 | 
				
			
			@ -467,14 +526,18 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 | 
			
		|||
	struct nf_conntrack_tuple_hash *h;
 | 
			
		||||
	struct hlist_nulls_node *n;
 | 
			
		||||
	u16 zone;
 | 
			
		||||
	unsigned int sequence;
 | 
			
		||||
 | 
			
		||||
	zone = nf_ct_zone(ct);
 | 
			
		||||
	hash = hash_conntrack(net, zone,
 | 
			
		||||
			      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 | 
			
		||||
	reply_hash = hash_conntrack(net, zone,
 | 
			
		||||
				   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 | 
			
		||||
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	do {
 | 
			
		||||
		sequence = read_seqcount_begin(&net->ct.generation);
 | 
			
		||||
		hash = hash_conntrack(net, zone,
 | 
			
		||||
				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 | 
			
		||||
		reply_hash = hash_conntrack(net, zone,
 | 
			
		||||
					   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 | 
			
		||||
	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 | 
			
		||||
 | 
			
		||||
	/* See if there's one in the list already, including reverse */
 | 
			
		||||
	hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
 | 
			
		||||
| 
						 | 
				
			
			@ -493,14 +556,15 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 | 
			
		|||
	/* The caller holds a reference to this object */
 | 
			
		||||
	atomic_set(&ct->ct_general.use, 2);
 | 
			
		||||
	__nf_conntrack_hash_insert(ct, hash, reply_hash);
 | 
			
		||||
	nf_conntrack_double_unlock(hash, reply_hash);
 | 
			
		||||
	NF_CT_STAT_INC(net, insert);
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
	return 0;
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	nf_conntrack_double_unlock(hash, reply_hash);
 | 
			
		||||
	NF_CT_STAT_INC(net, insert_failed);
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
	return -EEXIST;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 | 
			
		||||
| 
						 | 
				
			
			@ -540,6 +604,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 | 
			
		|||
	enum ip_conntrack_info ctinfo;
 | 
			
		||||
	struct net *net;
 | 
			
		||||
	u16 zone;
 | 
			
		||||
	unsigned int sequence;
 | 
			
		||||
 | 
			
		||||
	ct = nf_ct_get(skb, &ctinfo);
 | 
			
		||||
	net = nf_ct_net(ct);
 | 
			
		||||
| 
						 | 
				
			
			@ -552,31 +617,37 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 | 
			
		|||
		return NF_ACCEPT;
 | 
			
		||||
 | 
			
		||||
	zone = nf_ct_zone(ct);
 | 
			
		||||
	/* reuse the hash saved before */
 | 
			
		||||
	hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
 | 
			
		||||
	hash = hash_bucket(hash, net);
 | 
			
		||||
	reply_hash = hash_conntrack(net, zone,
 | 
			
		||||
				   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		sequence = read_seqcount_begin(&net->ct.generation);
 | 
			
		||||
		/* reuse the hash saved before */
 | 
			
		||||
		hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
 | 
			
		||||
		hash = hash_bucket(hash, net);
 | 
			
		||||
		reply_hash = hash_conntrack(net, zone,
 | 
			
		||||
					   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 | 
			
		||||
 | 
			
		||||
	} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 | 
			
		||||
 | 
			
		||||
	/* We're not in hash table, and we refuse to set up related
 | 
			
		||||
	   connections for unconfirmed conns.  But packet copies and
 | 
			
		||||
	   REJECT will give spurious warnings here. */
 | 
			
		||||
	 * connections for unconfirmed conns.  But packet copies and
 | 
			
		||||
	 * REJECT will give spurious warnings here.
 | 
			
		||||
	 */
 | 
			
		||||
	/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 | 
			
		||||
 | 
			
		||||
	/* No external references means no one else could have
 | 
			
		||||
	   confirmed us. */
 | 
			
		||||
	 * confirmed us.
 | 
			
		||||
	 */
 | 
			
		||||
	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 | 
			
		||||
	pr_debug("Confirming conntrack %p\n", ct);
 | 
			
		||||
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
 | 
			
		||||
	/* We have to check the DYING flag inside the lock to prevent
 | 
			
		||||
	   a race against nf_ct_get_next_corpse() possibly called from
 | 
			
		||||
	   user context, else we insert an already 'dead' hash, blocking
 | 
			
		||||
	   further use of that particular connection -JM */
 | 
			
		||||
 | 
			
		||||
	if (unlikely(nf_ct_is_dying(ct))) {
 | 
			
		||||
		spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
		nf_conntrack_double_unlock(hash, reply_hash);
 | 
			
		||||
		local_bh_enable();
 | 
			
		||||
		return NF_ACCEPT;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -618,8 +689,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 | 
			
		|||
	 * stores are visible.
 | 
			
		||||
	 */
 | 
			
		||||
	__nf_conntrack_hash_insert(ct, hash, reply_hash);
 | 
			
		||||
	nf_conntrack_double_unlock(hash, reply_hash);
 | 
			
		||||
	NF_CT_STAT_INC(net, insert);
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
 | 
			
		||||
	help = nfct_help(ct);
 | 
			
		||||
	if (help && help->helper)
 | 
			
		||||
| 
						 | 
				
			
			@ -630,8 +702,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 | 
			
		|||
	return NF_ACCEPT;
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	nf_conntrack_double_unlock(hash, reply_hash);
 | 
			
		||||
	NF_CT_STAT_INC(net, insert_failed);
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
	return NF_DROP;
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
 | 
			
		||||
| 
						 | 
				
			
			@ -674,39 +747,48 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 | 
			
		|||
 | 
			
		||||
/* There's a small race here where we may free a just-assured
 | 
			
		||||
   connection.  Too bad: we're in trouble anyway. */
 | 
			
		||||
static noinline int early_drop(struct net *net, unsigned int hash)
 | 
			
		||||
static noinline int early_drop(struct net *net, unsigned int _hash)
 | 
			
		||||
{
 | 
			
		||||
	/* Use oldest entry, which is roughly LRU */
 | 
			
		||||
	struct nf_conntrack_tuple_hash *h;
 | 
			
		||||
	struct nf_conn *ct = NULL, *tmp;
 | 
			
		||||
	struct hlist_nulls_node *n;
 | 
			
		||||
	unsigned int i, cnt = 0;
 | 
			
		||||
	unsigned int i = 0, cnt = 0;
 | 
			
		||||
	int dropped = 0;
 | 
			
		||||
	unsigned int hash, sequence;
 | 
			
		||||
	spinlock_t *lockp;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for (i = 0; i < net->ct.htable_size; i++) {
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
restart:
 | 
			
		||||
	sequence = read_seqcount_begin(&net->ct.generation);
 | 
			
		||||
	hash = hash_bucket(_hash, net);
 | 
			
		||||
	for (; i < net->ct.htable_size; i++) {
 | 
			
		||||
		lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
 | 
			
		||||
		spin_lock(lockp);
 | 
			
		||||
		if (read_seqcount_retry(&net->ct.generation, sequence)) {
 | 
			
		||||
			spin_unlock(lockp);
 | 
			
		||||
			goto restart;
 | 
			
		||||
		}
 | 
			
		||||
		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 | 
			
		||||
					 hnnode) {
 | 
			
		||||
			tmp = nf_ct_tuplehash_to_ctrack(h);
 | 
			
		||||
			if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
 | 
			
		||||
			if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
 | 
			
		||||
			    !nf_ct_is_dying(tmp) &&
 | 
			
		||||
			    atomic_inc_not_zero(&tmp->ct_general.use)) {
 | 
			
		||||
				ct = tmp;
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
			cnt++;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (ct != NULL) {
 | 
			
		||||
			if (likely(!nf_ct_is_dying(ct) &&
 | 
			
		||||
				   atomic_inc_not_zero(&ct->ct_general.use)))
 | 
			
		||||
				break;
 | 
			
		||||
			else
 | 
			
		||||
				ct = NULL;
 | 
			
		||||
		}
 | 
			
		||||
		hash = (hash + 1) % net->ct.htable_size;
 | 
			
		||||
		spin_unlock(lockp);
 | 
			
		||||
 | 
			
		||||
		if (cnt >= NF_CT_EVICTION_RANGE)
 | 
			
		||||
		if (ct || cnt >= NF_CT_EVICTION_RANGE)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		hash = (hash + 1) % net->ct.htable_size;
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
 | 
			
		||||
	if (!ct)
 | 
			
		||||
		return dropped;
 | 
			
		||||
| 
						 | 
				
			
			@ -755,7 +837,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 | 
			
		|||
 | 
			
		||||
	if (nf_conntrack_max &&
 | 
			
		||||
	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
 | 
			
		||||
		if (!early_drop(net, hash_bucket(hash, net))) {
 | 
			
		||||
		if (!early_drop(net, hash)) {
 | 
			
		||||
			atomic_dec(&net->ct.count);
 | 
			
		||||
			net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
 | 
			
		||||
			return ERR_PTR(-ENOMEM);
 | 
			
		||||
| 
						 | 
				
			
			@ -1304,18 +1386,24 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 | 
			
		|||
	struct nf_conn *ct;
 | 
			
		||||
	struct hlist_nulls_node *n;
 | 
			
		||||
	int cpu;
 | 
			
		||||
	spinlock_t *lockp;
 | 
			
		||||
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 | 
			
		||||
		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 | 
			
		||||
			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 | 
			
		||||
				continue;
 | 
			
		||||
			ct = nf_ct_tuplehash_to_ctrack(h);
 | 
			
		||||
			if (iter(ct, data))
 | 
			
		||||
				goto found;
 | 
			
		||||
		lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
 | 
			
		||||
		local_bh_disable();
 | 
			
		||||
		spin_lock(lockp);
 | 
			
		||||
		if (*bucket < net->ct.htable_size) {
 | 
			
		||||
			hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 | 
			
		||||
				if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 | 
			
		||||
					continue;
 | 
			
		||||
				ct = nf_ct_tuplehash_to_ctrack(h);
 | 
			
		||||
				if (iter(ct, data))
 | 
			
		||||
					goto found;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock(lockp);
 | 
			
		||||
		local_bh_enable();
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
 | 
			
		||||
	for_each_possible_cpu(cpu) {
 | 
			
		||||
		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
 | 
			
		||||
| 
						 | 
				
			
			@ -1331,7 +1419,8 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 | 
			
		|||
	return NULL;
 | 
			
		||||
found:
 | 
			
		||||
	atomic_inc(&ct->ct_general.use);
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
	spin_unlock(lockp);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
	return ct;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1532,12 +1621,16 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 | 
			
		|||
	if (!hash)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	nf_conntrack_all_lock();
 | 
			
		||||
	write_seqcount_begin(&init_net.ct.generation);
 | 
			
		||||
 | 
			
		||||
	/* Lookups in the old hash might happen in parallel, which means we
 | 
			
		||||
	 * might get false negatives during connection lookup. New connections
 | 
			
		||||
	 * created because of a false negative won't make it into the hash
 | 
			
		||||
	 * though since that required taking the lock.
 | 
			
		||||
	 * though since that required taking the locks.
 | 
			
		||||
	 */
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < init_net.ct.htable_size; i++) {
 | 
			
		||||
		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 | 
			
		||||
			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 | 
			
		||||
| 
						 | 
				
			
			@ -1554,7 +1647,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 | 
			
		|||
 | 
			
		||||
	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 | 
			
		||||
	init_net.ct.hash = hash;
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
 | 
			
		||||
	write_seqcount_end(&init_net.ct.generation);
 | 
			
		||||
	nf_conntrack_all_unlock();
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
 | 
			
		||||
	nf_ct_free_hashtable(old_hash, old_size);
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1576,7 +1672,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 | 
			
		|||
int nf_conntrack_init_start(void)
 | 
			
		||||
{
 | 
			
		||||
	int max_factor = 8;
 | 
			
		||||
	int ret, cpu;
 | 
			
		||||
	int i, ret, cpu;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < ARRAY_SIZE(nf_conntrack_locks); i++)
 | 
			
		||||
		spin_lock_init(&nf_conntrack_locks[i]);
 | 
			
		||||
 | 
			
		||||
	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
 | 
			
		||||
	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -423,12 +423,16 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 | 
			
		|||
			unhelp(h, me);
 | 
			
		||||
		spin_unlock_bh(&pcpu->lock);
 | 
			
		||||
	}
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	for (i = 0; i < net->ct.htable_size; i++) {
 | 
			
		||||
		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 | 
			
		||||
			unhelp(h, me);
 | 
			
		||||
		spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
 | 
			
		||||
		if (i < net->ct.htable_size) {
 | 
			
		||||
			hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 | 
			
		||||
				unhelp(h, me);
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
 | 
			
		||||
	}
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -764,14 +764,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 | 
			
		|||
	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 | 
			
		||||
	u_int8_t l3proto = nfmsg->nfgen_family;
 | 
			
		||||
	int res;
 | 
			
		||||
	spinlock_t *lockp;
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_NF_CONNTRACK_MARK
 | 
			
		||||
	const struct ctnetlink_dump_filter *filter = cb->data;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
	spin_lock_bh(&nf_conntrack_lock);
 | 
			
		||||
	last = (struct nf_conn *)cb->args[1];
 | 
			
		||||
 | 
			
		||||
	local_bh_disable();
 | 
			
		||||
	for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
 | 
			
		||||
restart:
 | 
			
		||||
		lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
 | 
			
		||||
		spin_lock(lockp);
 | 
			
		||||
		if (cb->args[0] >= net->ct.htable_size) {
 | 
			
		||||
			spin_unlock(lockp);
 | 
			
		||||
			goto out;
 | 
			
		||||
		}
 | 
			
		||||
		hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
 | 
			
		||||
					 hnnode) {
 | 
			
		||||
			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 | 
			
		||||
| 
						 | 
				
			
			@ -803,16 +812,18 @@ restart:
 | 
			
		|||
			if (res < 0) {
 | 
			
		||||
				nf_conntrack_get(&ct->ct_general);
 | 
			
		||||
				cb->args[1] = (unsigned long)ct;
 | 
			
		||||
				spin_unlock(lockp);
 | 
			
		||||
				goto out;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		spin_unlock(lockp);
 | 
			
		||||
		if (cb->args[1]) {
 | 
			
		||||
			cb->args[1] = 0;
 | 
			
		||||
			goto restart;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
out:
 | 
			
		||||
	spin_unlock_bh(&nf_conntrack_lock);
 | 
			
		||||
	local_bh_enable();
 | 
			
		||||
	if (last)
 | 
			
		||||
		nf_ct_put(last);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue