| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
					
						
							|  |  |  |  *		operating system.  INET is implemented using the  BSD Socket | 
					
						
							|  |  |  |  *		interface as the means of communication with the user level. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *		Pseudo-driver for the loopback interface. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Version:	@(#)loopback.c	1.0.4b	08/16/93 | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2005-05-05 16:16:16 -07:00
										 |  |  |  * Authors:	Ross Biro | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 
					
						
							|  |  |  |  *		Donald Becker, <becker@scyld.com> | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *		Alan Cox	:	Fixed oddments for NET3.014 | 
					
						
							|  |  |  |  *		Alan Cox	:	Rejig for NET3.029 snap #3 | 
					
						
							|  |  |  |  *		Alan Cox	: 	Fixed NET3.029 bugs and sped up | 
					
						
							|  |  |  |  *		Larry McVoy	:	Tiny tweak to double performance | 
					
						
							|  |  |  |  *		Alan Cox	:	Backed out LMV's tweak - the linux mm | 
					
						
							|  |  |  |  *					can't take it... | 
					
						
							|  |  |  |  *              Michael Griffith:       Don't bother computing the checksums | 
					
						
							|  |  |  |  *                                      on packets received on the loopback | 
					
						
							|  |  |  |  *                                      interface. | 
					
						
							|  |  |  |  *		Alexey Kuznetsov:	Potential hang under some extreme | 
					
						
							|  |  |  |  *					cases removed. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *		This program is free software; you can redistribute it and/or | 
					
						
							|  |  |  |  *		modify it under the terms of the GNU General Public License | 
					
						
							|  |  |  |  *		as published by the Free Software Foundation; either version | 
					
						
							|  |  |  |  *		2 of the License, or (at your option) any later version. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #include <linux/kernel.h>
 | 
					
						
							|  |  |  | #include <linux/jiffies.h>
 | 
					
						
							|  |  |  | #include <linux/module.h>
 | 
					
						
							|  |  |  | #include <linux/interrupt.h>
 | 
					
						
							|  |  |  | #include <linux/fs.h>
 | 
					
						
							|  |  |  | #include <linux/types.h>
 | 
					
						
							|  |  |  | #include <linux/string.h>
 | 
					
						
							|  |  |  | #include <linux/socket.h>
 | 
					
						
							|  |  |  | #include <linux/errno.h>
 | 
					
						
							|  |  |  | #include <linux/fcntl.h>
 | 
					
						
							|  |  |  | #include <linux/in.h>
 | 
					
						
							|  |  |  | #include <linux/init.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <asm/system.h>
 | 
					
						
							|  |  |  | #include <asm/uaccess.h>
 | 
					
						
							|  |  |  | #include <asm/io.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/inet.h>
 | 
					
						
							|  |  |  | #include <linux/netdevice.h>
 | 
					
						
							|  |  |  | #include <linux/etherdevice.h>
 | 
					
						
							|  |  |  | #include <linux/skbuff.h>
 | 
					
						
							|  |  |  | #include <linux/ethtool.h>
 | 
					
						
							|  |  |  | #include <net/sock.h>
 | 
					
						
							|  |  |  | #include <net/checksum.h>
 | 
					
						
							|  |  |  | #include <linux/if_ether.h>	/* For the statistics structure. */
 | 
					
						
							|  |  |  | #include <linux/if_arp.h>	/* For ARPHRD_ETHER */
 | 
					
						
							|  |  |  | #include <linux/ip.h>
 | 
					
						
							|  |  |  | #include <linux/tcp.h>
 | 
					
						
							|  |  |  | #include <linux/percpu.h>
 | 
					
						
							| 
									
										
										
										
											2007-09-26 22:10:56 -07:00
										 |  |  | #include <net/net_namespace.h>
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-10-18 20:51:57 -07:00
										 |  |  | struct pcpu_lstats { | 
					
						
							|  |  |  | 	unsigned long packets; | 
					
						
							|  |  |  | 	unsigned long bytes; | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 	unsigned long drops; | 
					
						
							| 
									
										
										
										
											2006-10-18 20:51:57 -07:00
										 |  |  | }; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * The higher levels take care of making this non-reentrant (it's | 
					
						
							|  |  |  |  * called with bh's disabled). | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2009-08-31 19:50:58 +00:00
										 |  |  | static netdev_tx_t loopback_xmit(struct sk_buff *skb, | 
					
						
							|  |  |  | 				 struct net_device *dev) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	struct pcpu_lstats __percpu *pcpu_lstats; | 
					
						
							|  |  |  | 	struct pcpu_lstats *lb_stats; | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 	int len; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	skb_orphan(skb); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 	skb->protocol = eth_type_trans(skb, dev); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-27 17:09:39 -07:00
										 |  |  | 	/* it's OK to use per_cpu_ptr() because BHs are off */ | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	pcpu_lstats = (void __percpu __force *)dev->ml_priv; | 
					
						
							| 
									
										
										
										
											2009-10-03 19:48:22 +09:00
										 |  |  | 	lb_stats = this_cpu_ptr(pcpu_lstats); | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 	len = skb->len; | 
					
						
							|  |  |  | 	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) { | 
					
						
							|  |  |  | 		lb_stats->bytes += len; | 
					
						
							|  |  |  | 		lb_stats->packets++; | 
					
						
							|  |  |  | 	} else | 
					
						
							|  |  |  | 		lb_stats->drops++; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-06-23 06:03:08 +00:00
										 |  |  | 	return NETDEV_TX_OK; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-11-19 21:46:18 -08:00
										 |  |  | static struct net_device_stats *loopback_get_stats(struct net_device *dev) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	const struct pcpu_lstats __percpu *pcpu_lstats; | 
					
						
							| 
									
										
										
										
											2007-04-10 13:25:40 -07:00
										 |  |  | 	struct net_device_stats *stats = &dev->stats; | 
					
						
							| 
									
										
										
										
											2006-10-18 20:51:57 -07:00
										 |  |  | 	unsigned long bytes = 0; | 
					
						
							|  |  |  | 	unsigned long packets = 0; | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 	unsigned long drops = 0; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	pcpu_lstats = (void __percpu __force *)dev->ml_priv; | 
					
						
							| 
									
										
										
										
											2006-03-28 01:56:37 -08:00
										 |  |  | 	for_each_possible_cpu(i) { | 
					
						
							| 
									
										
										
										
											2006-10-18 20:51:57 -07:00
										 |  |  | 		const struct pcpu_lstats *lb_stats; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-26 22:08:12 -07:00
										 |  |  | 		lb_stats = per_cpu_ptr(pcpu_lstats, i); | 
					
						
							| 
									
										
										
										
											2006-10-18 20:51:57 -07:00
										 |  |  | 		bytes   += lb_stats->bytes; | 
					
						
							|  |  |  | 		packets += lb_stats->packets; | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 		drops   += lb_stats->drops; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2006-10-18 20:51:57 -07:00
										 |  |  | 	stats->rx_packets = packets; | 
					
						
							|  |  |  | 	stats->tx_packets = packets; | 
					
						
							| 
									
										
										
										
											2009-04-17 22:03:10 +00:00
										 |  |  | 	stats->rx_dropped = drops; | 
					
						
							|  |  |  | 	stats->rx_errors  = drops; | 
					
						
							|  |  |  | 	stats->rx_bytes   = bytes; | 
					
						
							|  |  |  | 	stats->tx_bytes   = bytes; | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	return stats; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-09-27 20:33:34 -07:00
										 |  |  | static u32 always_on(struct net_device *dev) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-09-13 14:30:00 -04:00
										 |  |  | static const struct ethtool_ops loopback_ethtool_ops = { | 
					
						
							| 
									
										
										
										
											2006-09-27 20:33:34 -07:00
										 |  |  | 	.get_link		= always_on, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	.set_tso		= ethtool_op_set_tso, | 
					
						
							| 
									
										
										
										
											2006-09-27 20:33:34 -07:00
										 |  |  | 	.get_tx_csum		= always_on, | 
					
						
							|  |  |  | 	.get_sg			= always_on, | 
					
						
							|  |  |  | 	.get_rx_csum		= always_on, | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-26 22:08:12 -07:00
										 |  |  | static int loopback_dev_init(struct net_device *dev) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	struct pcpu_lstats __percpu *lstats; | 
					
						
							| 
									
										
										
										
											2007-09-26 22:08:12 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	lstats = alloc_percpu(struct pcpu_lstats); | 
					
						
							|  |  |  | 	if (!lstats) | 
					
						
							|  |  |  | 		return -ENOMEM; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	dev->ml_priv = (void __force *)lstats; | 
					
						
							| 
									
										
										
										
											2007-09-26 22:08:12 -07:00
										 |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void loopback_dev_free(struct net_device *dev) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2010-02-16 15:21:08 +00:00
										 |  |  | 	struct pcpu_lstats __percpu *lstats = | 
					
						
							|  |  |  | 		(void __percpu __force *)dev->ml_priv; | 
					
						
							| 
									
										
										
										
											2007-09-26 22:08:12 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	free_percpu(lstats); | 
					
						
							|  |  |  | 	free_netdev(dev); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-11-19 21:46:18 -08:00
										 |  |  | static const struct net_device_ops loopback_ops = { | 
					
						
							|  |  |  | 	.ndo_init      = loopback_dev_init, | 
					
						
							| 
									
										
										
										
											2008-11-20 20:14:53 -08:00
										 |  |  | 	.ndo_start_xmit= loopback_xmit, | 
					
						
							| 
									
										
										
										
											2008-11-19 21:46:18 -08:00
										 |  |  | 	.ndo_get_stats = loopback_get_stats, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-09-27 20:33:34 -07:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2007-09-27 17:09:39 -07:00
										 |  |  |  * The loopback device is special. There is only one instance | 
					
						
							|  |  |  |  * per network namespace. | 
					
						
							| 
									
										
										
										
											2006-09-27 20:33:34 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | static void loopback_setup(struct net_device *dev) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	dev->mtu		= (16 * 1024) + 20 + 20 + 12; | 
					
						
							|  |  |  | 	dev->hard_header_len	= ETH_HLEN;	/* 14	*/ | 
					
						
							|  |  |  | 	dev->addr_len		= ETH_ALEN;	/* 6	*/ | 
					
						
							|  |  |  | 	dev->tx_queue_len	= 0; | 
					
						
							|  |  |  | 	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/ | 
					
						
							|  |  |  | 	dev->flags		= IFF_LOOPBACK; | 
					
						
							| 
									
										
											  
											
												net: release dst entry in dev_hard_start_xmit()
One point of contention in high network loads is the dst_release() performed
when a transmited skb is freed. This is because NIC tx completion calls
dev_kree_skb() long after original call to dev_queue_xmit(skb).
CPU cache is cold and the atomic op in dst_release() stalls. On SMP, this is
quite visible if one CPU is 100% handling softirqs for a network device,
since dst_clone() is done by other cpus, involving cache line ping pongs.
It seems right place to release dst is in dev_hard_start_xmit(), for most
devices but ones that are virtual, and some exceptions.
David Miller suggested to define a new device flag, set in alloc_netdev_mq()
(so that most devices set it at init time), and carefuly unset in devices
which dont want a NULL skb->dst in their ndo_start_xmit().
List of devices that must clear this flag is :
- loopback device, because it calls netif_rx() and quoting Patrick :
    "ip_route_input() doesn't accept loopback addresses, so loopback packets
     already need to have a dst_entry attached."
- appletalk/ipddp.c : needs skb->dst in its xmit function
- And all devices that call again dev_queue_xmit() from their xmit function
(as some classifiers need skb->dst) : bonding, vlan, macvlan, eql, ifb, hdlc_fr
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
											
										 
											2009-05-18 22:19:19 -07:00
										 |  |  | 	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE; | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST | 
					
						
							|  |  |  | 		| NETIF_F_TSO | 
					
						
							|  |  |  | 		| NETIF_F_NO_CSUM | 
					
						
							|  |  |  | 		| NETIF_F_HIGHDMA | 
					
						
							|  |  |  | 		| NETIF_F_LLTX | 
					
						
							| 
									
										
										
										
											2007-12-27 08:17:22 -06:00
										 |  |  | 		| NETIF_F_NETNS_LOCAL; | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | 	dev->ethtool_ops	= &loopback_ethtool_ops; | 
					
						
							| 
									
										
										
										
											2007-10-09 01:40:57 -07:00
										 |  |  | 	dev->header_ops		= ð_header_ops; | 
					
						
							| 
									
										
										
										
											2008-11-19 21:46:18 -08:00
										 |  |  | 	dev->netdev_ops		= &loopback_ops; | 
					
						
							|  |  |  | 	dev->destructor		= loopback_dev_free; | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2007-09-25 19:16:28 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-18 14:05:18 -07:00
										 |  |  | /* Setup and register the loopback device. */ | 
					
						
							| 
									
										
										
										
											2007-10-08 20:38:39 -07:00
										 |  |  | static __net_init int loopback_net_init(struct net *net) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | 	struct net_device *dev; | 
					
						
							|  |  |  | 	int err; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	err = -ENOMEM; | 
					
						
							|  |  |  | 	dev = alloc_netdev(0, "lo", loopback_setup); | 
					
						
							|  |  |  | 	if (!dev) | 
					
						
							|  |  |  | 		goto out; | 
					
						
							| 
									
										
										
										
											2007-07-30 16:37:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-25 21:47:49 +09:00
										 |  |  | 	dev_net_set(dev, net); | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | 	err = register_netdev(dev); | 
					
						
							| 
									
										
										
										
											2007-07-30 16:37:19 -07:00
										 |  |  | 	if (err) | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | 		goto out_free_netdev; | 
					
						
							| 
									
										
										
										
											2007-07-30 16:37:19 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-26 22:10:56 -07:00
										 |  |  | 	net->loopback_dev = dev; | 
					
						
							| 
									
										
										
										
											2007-10-15 12:55:33 -07:00
										 |  |  | 	return 0; | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | out_free_netdev: | 
					
						
							|  |  |  | 	free_netdev(dev); | 
					
						
							| 
									
										
										
										
											2007-10-15 12:55:33 -07:00
										 |  |  | out: | 
					
						
							| 
									
										
										
										
											2009-11-25 15:14:13 -08:00
										 |  |  | 	if (net_eq(net, &init_net)) | 
					
						
							| 
									
										
										
										
											2007-10-15 12:55:33 -07:00
										 |  |  | 		panic("loopback: Failed to register netdevice: %d\n", err); | 
					
						
							|  |  |  | 	return err; | 
					
						
							| 
									
										
										
										
											2007-09-25 19:18:04 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-11-07 22:54:20 -08:00
										 |  |  | /* Registered in net/core/dev.c */ | 
					
						
							|  |  |  | struct pernet_operations __net_initdata loopback_net_ops = { | 
					
						
							| 
									
										
										
										
											2007-09-26 22:10:56 -07:00
										 |  |  |        .init = loopback_net_init, | 
					
						
							|  |  |  | }; |