| 
									
										
											  
											
												rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain.
hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.
This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.
Two new files are added :
include/linux/list_nulls.h
  - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant
include/linux/rculist_nulls.h
  - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant
   Only four helpers are declared for the moment :
     hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
     hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()
prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.
Example of use (extracted from __udp4_lib_lookup())
	struct sock *sk, *result;
        struct hlist_nulls_node *node;
        unsigned short hnum = ntohs(dport);
        unsigned int hash = udp_hashfn(net, hnum);
        struct udp_hslot *hslot = &udptable->hash[hash];
        int score, badness;
        rcu_read_lock();
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != hash)
                goto begin;
        if (result) {
                if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
                        result = NULL;
                else if (unlikely(compute_score(result, net, saddr, hnum, sport,
                                  daddr, dport, dif) < badness)) {
                        sock_put(result);
                        goto begin;
                }
        }
        rcu_read_unlock();
        return result;
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-11-16 19:37:55 -08:00
										 |  |  | #ifndef _LINUX_RCULIST_NULLS_H
 | 
					
						
							|  |  |  | #define _LINUX_RCULIST_NULLS_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __KERNEL__
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * RCU-protected list version | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #include <linux/list_nulls.h>
 | 
					
						
							|  |  |  | #include <linux/rcupdate.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization | 
					
						
							|  |  |  |  * @n: the element to delete from the hash list. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Note: hlist_nulls_unhashed() on the node return true after this. It is | 
					
						
							|  |  |  |  * useful for RCU based read lockfree traversal if the writer side | 
					
						
							|  |  |  |  * must know if the list entry is still hashed or already unhashed. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * In particular, it means that we can not poison the forward pointers | 
					
						
							|  |  |  |  * that may still be used for walking the hash list and we can only | 
					
						
							|  |  |  |  * zero the pprev pointer so list_unhashed() will return true after | 
					
						
							|  |  |  |  * this. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The caller must take whatever precautions are necessary (such as | 
					
						
							|  |  |  |  * holding appropriate locks) to avoid racing with another | 
					
						
							|  |  |  |  * list-mutation primitive, such as hlist_nulls_add_head_rcu() or | 
					
						
							|  |  |  |  * hlist_nulls_del_rcu(), running on this same list.  However, it is | 
					
						
							|  |  |  |  * perfectly legal to run concurrently with the _rcu list-traversal | 
					
						
							|  |  |  |  * primitives, such as hlist_nulls_for_each_entry_rcu(). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	if (!hlist_nulls_unhashed(n)) { | 
					
						
							|  |  |  | 		__hlist_nulls_del(n); | 
					
						
							|  |  |  | 		n->pprev = NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization | 
					
						
							|  |  |  |  * @n: the element to delete from the hash list. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Note: hlist_nulls_unhashed() on entry does not return true after this, | 
					
						
							|  |  |  |  * the entry is in an undefined state. It is useful for RCU based | 
					
						
							|  |  |  |  * lockfree traversal. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * In particular, it means that we can not poison the forward | 
					
						
							|  |  |  |  * pointers that may still be used for walking the hash list. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The caller must take whatever precautions are necessary | 
					
						
							|  |  |  |  * (such as holding appropriate locks) to avoid racing | 
					
						
							|  |  |  |  * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | 
					
						
							|  |  |  |  * or hlist_nulls_del_rcu(), running on this same list. | 
					
						
							|  |  |  |  * However, it is perfectly legal to run concurrently with | 
					
						
							|  |  |  |  * the _rcu list-traversal primitives, such as | 
					
						
							|  |  |  |  * hlist_nulls_for_each_entry(). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	__hlist_nulls_del(n); | 
					
						
							|  |  |  | 	n->pprev = LIST_POISON2; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * hlist_nulls_add_head_rcu | 
					
						
							|  |  |  |  * @n: the element to add to the hash list. | 
					
						
							|  |  |  |  * @h: the list to add to. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Description: | 
					
						
							|  |  |  |  * Adds the specified element to the specified hlist_nulls, | 
					
						
							|  |  |  |  * while permitting racing traversals. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The caller must take whatever precautions are necessary | 
					
						
							|  |  |  |  * (such as holding appropriate locks) to avoid racing | 
					
						
							|  |  |  |  * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | 
					
						
							|  |  |  |  * or hlist_nulls_del_rcu(), running on this same list. | 
					
						
							|  |  |  |  * However, it is perfectly legal to run concurrently with | 
					
						
							|  |  |  |  * the _rcu list-traversal primitives, such as | 
					
						
							|  |  |  |  * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency | 
					
						
							|  |  |  |  * problems on Alpha CPUs.  Regardless of the type of CPU, the | 
					
						
							|  |  |  |  * list-traversal primitive must be guarded by rcu_read_lock(). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | 
					
						
							|  |  |  | 					struct hlist_nulls_head *h) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct hlist_nulls_node *first = h->first; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	n->next = first; | 
					
						
							|  |  |  | 	n->pprev = &h->first; | 
					
						
							|  |  |  | 	rcu_assign_pointer(h->first, n); | 
					
						
							|  |  |  | 	if (!is_a_nulls(first)) | 
					
						
							|  |  |  | 		first->pprev = &n->next; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | /**
 | 
					
						
							|  |  |  |  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type | 
					
						
							|  |  |  |  * @tpos:	the type * to use as a loop cursor. | 
					
						
							|  |  |  |  * @pos:	the &struct hlist_nulls_node to use as a loop cursor. | 
					
						
							|  |  |  |  * @head:	the head for your list. | 
					
						
							|  |  |  |  * @member:	the name of the hlist_nulls_node within the struct. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
 | 
					
						
							| 
									
										
										
										
											2010-02-22 17:04:48 -08:00
										 |  |  | 	for (pos = rcu_dereference_raw((head)->first);			 \ | 
					
						
							| 
									
										
										
										
											2009-09-18 10:28:19 -07:00
										 |  |  | 		(!is_a_nulls(pos)) &&			\ | 
					
						
							| 
									
										
											  
											
												rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain.
hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.
This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.
Two new files are added :
include/linux/list_nulls.h
  - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant
include/linux/rculist_nulls.h
  - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant
   Only four helpers are declared for the moment :
     hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
     hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()
prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.
Example of use (extracted from __udp4_lib_lookup())
	struct sock *sk, *result;
        struct hlist_nulls_node *node;
        unsigned short hnum = ntohs(dport);
        unsigned int hash = udp_hashfn(net, hnum);
        struct udp_hslot *hslot = &udptable->hash[hash];
        int score, badness;
        rcu_read_lock();
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != hash)
                goto begin;
        if (result) {
                if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
                        result = NULL;
                else if (unlikely(compute_score(result, net, saddr, hnum, sport,
                                  daddr, dport, dif) < badness)) {
                        sock_put(result);
                        goto begin;
                }
        }
        rcu_read_unlock();
        return result;
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-11-16 19:37:55 -08:00
										 |  |  | 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ | 
					
						
							| 
									
										
										
										
											2010-02-22 17:04:48 -08:00
										 |  |  | 		pos = rcu_dereference_raw(pos->next)) | 
					
						
							| 
									
										
											  
											
												rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain.
hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.
This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.
Two new files are added :
include/linux/list_nulls.h
  - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant
include/linux/rculist_nulls.h
  - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant
   Only four helpers are declared for the moment :
     hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
     hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()
prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.
Example of use (extracted from __udp4_lib_lookup())
	struct sock *sk, *result;
        struct hlist_nulls_node *node;
        unsigned short hnum = ntohs(dport);
        unsigned int hash = udp_hashfn(net, hnum);
        struct udp_hslot *hslot = &udptable->hash[hash];
        int score, badness;
        rcu_read_lock();
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != hash)
                goto begin;
        if (result) {
                if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
                        result = NULL;
                else if (unlikely(compute_score(result, net, saddr, hnum, sport,
                                  daddr, dport, dif) < badness)) {
                        sock_put(result);
                        goto begin;
                }
        }
        rcu_read_unlock();
        return result;
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2008-11-16 19:37:55 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif
 |