 3708983452
			
		
	
	
	3708983452
	
	
	
		
			
			The only valid use of preempt_enable_no_resched() is if the very next line is schedule() or if we know preemption cannot actually be enabled by that statement due to known more preempt_count 'refs'. This busy_poll stuff looks to be completely and utterly broken, sched_clock() can return utter garbage with interrupts enabled (rare but still) and it can drift unbounded between CPUs. This means that if you get preempted/migrated and your new CPU is years behind on the previous CPU we get to busy spin for a _very_ long time. There is a _REASON_ sched_clock() warns about preemptability - papering over it with a preempt_disable()/preempt_enable_no_resched() is just terminal brain damage on so many levels. Replace sched_clock() usage with local_clock() which has a bounded drift between CPUs (<2 jiffies). There is a further problem with the entire busy wait poll thing in that the spin time is additive to the syscall timeout, not inclusive. Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: David S. Miller <davem@davemloft.net> Cc: rui.zhang@intel.com Cc: jacob.jun.pan@linux.intel.com Cc: Mike Galbraith <bitbucket@online.de> Cc: hpa@zytor.com Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: lenb@kernel.org Cc: rjw@rjwysocki.net Cc: Eliezer Tamir <eliezer.tamir@linux.intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20131119151338.GF3694@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
		
			
				
	
	
		
			169 lines
		
	
	
	
		
			4 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			169 lines
		
	
	
	
		
			4 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * net busy poll support
 | |
|  * Copyright(c) 2013 Intel Corporation.
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify it
 | |
|  * under the terms and conditions of the GNU General Public License,
 | |
|  * version 2, as published by the Free Software Foundation.
 | |
|  *
 | |
|  * This program is distributed in the hope it will be useful, but WITHOUT
 | |
|  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | |
|  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 | |
|  * more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU General Public License along with
 | |
|  * this program; if not, write to the Free Software Foundation, Inc.,
 | |
|  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 | |
|  *
 | |
|  * Author: Eliezer Tamir
 | |
|  *
 | |
|  * Contact Information:
 | |
|  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
 | |
|  */
 | |
| 
 | |
| #ifndef _LINUX_NET_BUSY_POLL_H
 | |
| #define _LINUX_NET_BUSY_POLL_H
 | |
| 
 | |
| #include <linux/netdevice.h>
 | |
| #include <net/ip.h>
 | |
| 
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 
 | |
| struct napi_struct;
 | |
| extern unsigned int sysctl_net_busy_read __read_mostly;
 | |
| extern unsigned int sysctl_net_busy_poll __read_mostly;
 | |
| 
 | |
| /* return values from ndo_ll_poll */
 | |
| #define LL_FLUSH_FAILED		-1
 | |
| #define LL_FLUSH_BUSY		-2
 | |
| 
 | |
| static inline bool net_busy_loop_on(void)
 | |
| {
 | |
| 	return sysctl_net_busy_poll;
 | |
| }
 | |
| 
 | |
| static inline u64 busy_loop_us_clock(void)
 | |
| {
 | |
| 	return local_clock() >> 10;
 | |
| }
 | |
| 
 | |
| static inline unsigned long sk_busy_loop_end_time(struct sock *sk)
 | |
| {
 | |
| 	return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec);
 | |
| }
 | |
| 
 | |
| /* in poll/select we use the global sysctl_net_ll_poll value */
 | |
| static inline unsigned long busy_loop_end_time(void)
 | |
| {
 | |
| 	return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll);
 | |
| }
 | |
| 
 | |
| static inline bool sk_can_busy_loop(struct sock *sk)
 | |
| {
 | |
| 	return sk->sk_ll_usec && sk->sk_napi_id &&
 | |
| 	       !need_resched() && !signal_pending(current);
 | |
| }
 | |
| 
 | |
| 
 | |
| static inline bool busy_loop_timeout(unsigned long end_time)
 | |
| {
 | |
| 	unsigned long now = busy_loop_us_clock();
 | |
| 
 | |
| 	return time_after(now, end_time);
 | |
| }
 | |
| 
 | |
| /* when used in sock_poll() nonblock is known at compile time to be true
 | |
|  * so the loop and end_time will be optimized out
 | |
|  */
 | |
| static inline bool sk_busy_loop(struct sock *sk, int nonblock)
 | |
| {
 | |
| 	unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
 | |
| 	const struct net_device_ops *ops;
 | |
| 	struct napi_struct *napi;
 | |
| 	int rc = false;
 | |
| 
 | |
| 	/*
 | |
| 	 * rcu read lock for napi hash
 | |
| 	 * bh so we don't race with net_rx_action
 | |
| 	 */
 | |
| 	rcu_read_lock_bh();
 | |
| 
 | |
| 	napi = napi_by_id(sk->sk_napi_id);
 | |
| 	if (!napi)
 | |
| 		goto out;
 | |
| 
 | |
| 	ops = napi->dev->netdev_ops;
 | |
| 	if (!ops->ndo_busy_poll)
 | |
| 		goto out;
 | |
| 
 | |
| 	do {
 | |
| 		rc = ops->ndo_busy_poll(napi);
 | |
| 
 | |
| 		if (rc == LL_FLUSH_FAILED)
 | |
| 			break; /* permanent failure */
 | |
| 
 | |
| 		if (rc > 0)
 | |
| 			/* local bh are disabled so it is ok to use _BH */
 | |
| 			NET_ADD_STATS_BH(sock_net(sk),
 | |
| 					 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
 | |
| 		cpu_relax();
 | |
| 
 | |
| 	} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
 | |
| 		 !need_resched() && !busy_loop_timeout(end_time));
 | |
| 
 | |
| 	rc = !skb_queue_empty(&sk->sk_receive_queue);
 | |
| out:
 | |
| 	rcu_read_unlock_bh();
 | |
| 	return rc;
 | |
| }
 | |
| 
 | |
| /* used in the NIC receive handler to mark the skb */
 | |
| static inline void skb_mark_napi_id(struct sk_buff *skb,
 | |
| 				    struct napi_struct *napi)
 | |
| {
 | |
| 	skb->napi_id = napi->napi_id;
 | |
| }
 | |
| 
 | |
| /* used in the protocol hanlder to propagate the napi_id to the socket */
 | |
| static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
 | |
| {
 | |
| 	sk->sk_napi_id = skb->napi_id;
 | |
| }
 | |
| 
 | |
| #else /* CONFIG_NET_RX_BUSY_POLL */
 | |
| static inline unsigned long net_busy_loop_on(void)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline unsigned long busy_loop_end_time(void)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline bool sk_can_busy_loop(struct sock *sk)
 | |
| {
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| static inline void skb_mark_napi_id(struct sk_buff *skb,
 | |
| 				    struct napi_struct *napi)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline bool busy_loop_timeout(unsigned long end_time)
 | |
| {
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static inline bool sk_busy_loop(struct sock *sk, int nonblock)
 | |
| {
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| #endif /* CONFIG_NET_RX_BUSY_POLL */
 | |
| #endif /* _LINUX_NET_BUSY_POLL_H */
 |