 b07c26511e
			
		
	
	
	b07c26511e
	
	
	
		
			
			The combination of two commits: commit8e4e1713e4("openvswitch: Simplify datapath locking.") commit2537b4dd0a("openvswitch:: link upper device for port devices") introduced a bug where upper_dev wasn't unlinked upon netdev_unregister notification The following steps: modprobe openvswitch ovs-dpctl add-dp test ip tuntap add dev tap1 mode tap ovs-dpctl add-if test tap1 ip tuntap del dev tap1 mode tap are causing multiple warnings: [ 62.747557] gre: GRE over IPv4 demultiplexor driver [ 62.749579] openvswitch: Open vSwitch switching datapath [ 62.755087] device test entered promiscuous mode [ 62.765911] device tap1 entered promiscuous mode [ 62.766033] IPv6: ADDRCONF(NETDEV_UP): tap1: link is not ready [ 62.769017] ------------[ cut here ]------------ [ 62.769022] WARNING: CPU: 1 PID: 3267 at net/core/dev.c:5501 rollback_registered_many+0x20f/0x240() [ 62.769023] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video [ 62.769051] CPU: 1 PID: 3267 Comm: ip Not tainted 3.12.0-rc3+ #60 [ 62.769052] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 62.769053] 0000000000000009 ffff8807f25cbd28 ffffffff8175e575 0000000000000006 [ 62.769055] 0000000000000000 ffff8807f25cbd68 ffffffff8105314c ffff8807f25cbd58 [ 62.769057] ffff8807f2634000 ffff8807f25cbdc8 ffff8807f25cbd88 ffff8807f25cbdc8 [ 62.769059] Call Trace: [ 62.769062] [<ffffffff8175e575>] dump_stack+0x55/0x76 [ 62.769065] [<ffffffff8105314c>] warn_slowpath_common+0x8c/0xc0 [ 62.769067] [<ffffffff8105319a>] warn_slowpath_null+0x1a/0x20 [ 62.769069] [<ffffffff8162a04f>] rollback_registered_many+0x20f/0x240 [ 62.769071] [<ffffffff8162a101>] rollback_registered+0x31/0x40 [ 62.769073] [<ffffffff8162a488>] unregister_netdevice_queue+0x58/0x90 [ 62.769075] [<ffffffff8154f900>] __tun_detach+0x140/0x340 [ 62.769077] [<ffffffff8154fb36>] tun_chr_close+0x36/0x60 [ 62.769080] [<ffffffff811bddaf>] __fput+0xff/0x260 [ 62.769082] [<ffffffff811bdf5e>] ____fput+0xe/0x10 [ 62.769084] [<ffffffff8107b515>] task_work_run+0xb5/0xe0 [ 62.769087] [<ffffffff810029b9>] do_notify_resume+0x59/0x80 [ 62.769089] [<ffffffff813a41fe>] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 62.769091] [<ffffffff81770f5a>] int_signal+0x12/0x17 [ 62.769093] ---[ end trace 838756c62e156ffb ]--- [ 62.769481] ------------[ cut here ]------------ [ 62.769485] WARNING: CPU: 1 PID: 92 at fs/sysfs/inode.c:325 sysfs_hash_and_remove+0xa9/0xb0() [ 62.769486] sysfs: can not remove 'master', no directory [ 62.769486] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video [ 62.769514] CPU: 1 PID: 92 Comm: kworker/1:2 Tainted: G W 3.12.0-rc3+ #60 [ 62.769515] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 62.769518] Workqueue: events ovs_dp_notify_wq [openvswitch] [ 62.769519] 0000000000000009 ffff880807ad3ac8 ffffffff8175e575 0000000000000006 [ 62.769521] ffff880807ad3b18 ffff880807ad3b08 ffffffff8105314c ffff880807ad3b28 [ 62.769523] 0000000000000000 ffffffff81a87a1f ffff8807f2634000 ffff880037038500 [ 62.769525] Call Trace: [ 62.769528] [<ffffffff8175e575>] dump_stack+0x55/0x76 [ 62.769529] [<ffffffff8105314c>] warn_slowpath_common+0x8c/0xc0 [ 62.769531] [<ffffffff81053236>] warn_slowpath_fmt+0x46/0x50 [ 62.769533] [<ffffffff8123e7e9>] sysfs_hash_and_remove+0xa9/0xb0 [ 62.769535] [<ffffffff81240e96>] sysfs_remove_link+0x26/0x30 [ 62.769538] [<ffffffff81631ef7>] __netdev_adjacent_dev_remove+0xf7/0x150 [ 62.769540] [<ffffffff81632037>] __netdev_adjacent_dev_unlink_lists+0x27/0x50 [ 62.769542] [<ffffffff8163213a>] __netdev_adjacent_dev_unlink_neighbour+0x3a/0x50 [ 62.769544] [<ffffffff8163218d>] netdev_upper_dev_unlink+0x3d/0x140 [ 62.769548] [<ffffffffa033c2db>] netdev_destroy+0x4b/0x80 [openvswitch] [ 62.769550] [<ffffffffa033b696>] ovs_vport_del+0x46/0x60 [openvswitch] [ 62.769552] [<ffffffffa0335314>] ovs_dp_detach_port+0x44/0x60 [openvswitch] [ 62.769555] [<ffffffffa0336574>] ovs_dp_notify_wq+0xb4/0x150 [openvswitch] [ 62.769557] [<ffffffff81075c28>] process_one_work+0x1d8/0x6a0 [ 62.769559] [<ffffffff81075bc8>] ? process_one_work+0x178/0x6a0 [ 62.769562] [<ffffffff8107659b>] worker_thread+0x11b/0x370 [ 62.769564] [<ffffffff81076480>] ? rescuer_thread+0x350/0x350 [ 62.769566] [<ffffffff8107f44a>] kthread+0xea/0xf0 [ 62.769568] [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150 [ 62.769570] [<ffffffff81770bac>] ret_from_fork+0x7c/0xb0 [ 62.769572] [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150 [ 62.769573] ---[ end trace 838756c62e156ffc ]--- [ 62.769574] ------------[ cut here ]------------ [ 62.769576] WARNING: CPU: 1 PID: 92 at fs/sysfs/inode.c:325 sysfs_hash_and_remove+0xa9/0xb0() [ 62.769577] sysfs: can not remove 'upper_test', no directory [ 62.769577] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video [ 62.769603] CPU: 1 PID: 92 Comm: kworker/1:2 Tainted: G W 3.12.0-rc3+ #60 [ 62.769604] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 62.769606] Workqueue: events ovs_dp_notify_wq [openvswitch] [ 62.769607] 0000000000000009 ffff880807ad3ac8 ffffffff8175e575 0000000000000006 [ 62.769609] ffff880807ad3b18 ffff880807ad3b08 ffffffff8105314c ffff880807ad3b58 [ 62.769611] 0000000000000000 ffff880807ad3bd9 ffff8807f2634000 ffff880037038500 [ 62.769613] Call Trace: [ 62.769615] [<ffffffff8175e575>] dump_stack+0x55/0x76 [ 62.769617] [<ffffffff8105314c>] warn_slowpath_common+0x8c/0xc0 [ 62.769619] [<ffffffff81053236>] warn_slowpath_fmt+0x46/0x50 [ 62.769621] [<ffffffff8123e7e9>] sysfs_hash_and_remove+0xa9/0xb0 [ 62.769622] [<ffffffff81240e96>] sysfs_remove_link+0x26/0x30 [ 62.769624] [<ffffffff81631f22>] __netdev_adjacent_dev_remove+0x122/0x150 [ 62.769627] [<ffffffff81632037>] __netdev_adjacent_dev_unlink_lists+0x27/0x50 [ 62.769629] [<ffffffff8163213a>] __netdev_adjacent_dev_unlink_neighbour+0x3a/0x50 [ 62.769631] [<ffffffff8163218d>] netdev_upper_dev_unlink+0x3d/0x140 [ 62.769633] [<ffffffffa033c2db>] netdev_destroy+0x4b/0x80 [openvswitch] [ 62.769636] [<ffffffffa033b696>] ovs_vport_del+0x46/0x60 [openvswitch] [ 62.769638] [<ffffffffa0335314>] ovs_dp_detach_port+0x44/0x60 [openvswitch] [ 62.769640] [<ffffffffa0336574>] ovs_dp_notify_wq+0xb4/0x150 [openvswitch] [ 62.769642] [<ffffffff81075c28>] process_one_work+0x1d8/0x6a0 [ 62.769644] [<ffffffff81075bc8>] ? process_one_work+0x178/0x6a0 [ 62.769646] [<ffffffff8107659b>] worker_thread+0x11b/0x370 [ 62.769648] [<ffffffff81076480>] ? rescuer_thread+0x350/0x350 [ 62.769650] [<ffffffff8107f44a>] kthread+0xea/0xf0 [ 62.769652] [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150 [ 62.769654] [<ffffffff81770bac>] ret_from_fork+0x7c/0xb0 [ 62.769656] [<ffffffff8107f360>] ? flush_kthread_worker+0x150/0x150 [ 62.769657] ---[ end trace 838756c62e156ffd ]--- [ 62.769724] device tap1 left promiscuous mode This patch also affects moving devices between net namespaces. OVS used to ignore netns move notifications which caused problems. Like: ovs-dpctl add-if test tap1 ip link set tap1 netns 3512 and then removing tap1 inside the namespace will cause hang on missing dev_put. With this patch OVS will detach dev upon receiving netns move event. Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Signed-off-by: Jesse Gross <jesse@nicira.com>
		
			
				
	
	
		
			233 lines
		
	
	
	
		
			5.6 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			233 lines
		
	
	
	
		
			5.6 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2007-2012 Nicira, Inc.
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of version 2 of the GNU General Public
 | |
|  * License as published by the Free Software Foundation.
 | |
|  *
 | |
|  * This program is distributed in the hope that it will be useful, but
 | |
|  * WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 | |
|  * General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU General Public License
 | |
|  * along with this program; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 | |
|  * 02110-1301, USA
 | |
|  */
 | |
| 
 | |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 | |
| 
 | |
| #include <linux/if_arp.h>
 | |
| #include <linux/if_bridge.h>
 | |
| #include <linux/if_vlan.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/llc.h>
 | |
| #include <linux/rtnetlink.h>
 | |
| #include <linux/skbuff.h>
 | |
| #include <linux/openvswitch.h>
 | |
| 
 | |
| #include <net/llc.h>
 | |
| 
 | |
| #include "datapath.h"
 | |
| #include "vport-internal_dev.h"
 | |
| #include "vport-netdev.h"
 | |
| 
 | |
| /* Must be called with rcu_read_lock. */
 | |
| static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 | |
| {
 | |
| 	if (unlikely(!vport))
 | |
| 		goto error;
 | |
| 
 | |
| 	if (unlikely(skb_warn_if_lro(skb)))
 | |
| 		goto error;
 | |
| 
 | |
| 	/* Make our own copy of the packet.  Otherwise we will mangle the
 | |
| 	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
 | |
| 	 */
 | |
| 	skb = skb_share_check(skb, GFP_ATOMIC);
 | |
| 	if (unlikely(!skb))
 | |
| 		return;
 | |
| 
 | |
| 	skb_push(skb, ETH_HLEN);
 | |
| 	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
 | |
| 
 | |
| 	ovs_vport_receive(vport, skb, NULL);
 | |
| 	return;
 | |
| 
 | |
| error:
 | |
| 	kfree_skb(skb);
 | |
| }
 | |
| 
 | |
| /* Called with rcu_read_lock and bottom-halves disabled. */
 | |
| static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
 | |
| {
 | |
| 	struct sk_buff *skb = *pskb;
 | |
| 	struct vport *vport;
 | |
| 
 | |
| 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
 | |
| 		return RX_HANDLER_PASS;
 | |
| 
 | |
| 	vport = ovs_netdev_get_vport(skb->dev);
 | |
| 
 | |
| 	netdev_port_receive(vport, skb);
 | |
| 
 | |
| 	return RX_HANDLER_CONSUMED;
 | |
| }
 | |
| 
 | |
| static struct net_device *get_dpdev(struct datapath *dp)
 | |
| {
 | |
| 	struct vport *local;
 | |
| 
 | |
| 	local = ovs_vport_ovsl(dp, OVSP_LOCAL);
 | |
| 	BUG_ON(!local);
 | |
| 	return netdev_vport_priv(local)->dev;
 | |
| }
 | |
| 
 | |
| static struct vport *netdev_create(const struct vport_parms *parms)
 | |
| {
 | |
| 	struct vport *vport;
 | |
| 	struct netdev_vport *netdev_vport;
 | |
| 	int err;
 | |
| 
 | |
| 	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
 | |
| 				&ovs_netdev_vport_ops, parms);
 | |
| 	if (IS_ERR(vport)) {
 | |
| 		err = PTR_ERR(vport);
 | |
| 		goto error;
 | |
| 	}
 | |
| 
 | |
| 	netdev_vport = netdev_vport_priv(vport);
 | |
| 
 | |
| 	netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
 | |
| 	if (!netdev_vport->dev) {
 | |
| 		err = -ENODEV;
 | |
| 		goto error_free_vport;
 | |
| 	}
 | |
| 
 | |
| 	if (netdev_vport->dev->flags & IFF_LOOPBACK ||
 | |
| 	    netdev_vport->dev->type != ARPHRD_ETHER ||
 | |
| 	    ovs_is_internal_dev(netdev_vport->dev)) {
 | |
| 		err = -EINVAL;
 | |
| 		goto error_put;
 | |
| 	}
 | |
| 
 | |
| 	rtnl_lock();
 | |
| 	err = netdev_master_upper_dev_link(netdev_vport->dev,
 | |
| 					   get_dpdev(vport->dp));
 | |
| 	if (err)
 | |
| 		goto error_unlock;
 | |
| 
 | |
| 	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
 | |
| 					 vport);
 | |
| 	if (err)
 | |
| 		goto error_master_upper_dev_unlink;
 | |
| 
 | |
| 	dev_set_promiscuity(netdev_vport->dev, 1);
 | |
| 	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
 | |
| 	rtnl_unlock();
 | |
| 
 | |
| 	return vport;
 | |
| 
 | |
| error_master_upper_dev_unlink:
 | |
| 	netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
 | |
| error_unlock:
 | |
| 	rtnl_unlock();
 | |
| error_put:
 | |
| 	dev_put(netdev_vport->dev);
 | |
| error_free_vport:
 | |
| 	ovs_vport_free(vport);
 | |
| error:
 | |
| 	return ERR_PTR(err);
 | |
| }
 | |
| 
 | |
| static void free_port_rcu(struct rcu_head *rcu)
 | |
| {
 | |
| 	struct netdev_vport *netdev_vport = container_of(rcu,
 | |
| 					struct netdev_vport, rcu);
 | |
| 
 | |
| 	dev_put(netdev_vport->dev);
 | |
| 	ovs_vport_free(vport_from_priv(netdev_vport));
 | |
| }
 | |
| 
 | |
| void ovs_netdev_detach_dev(struct vport *vport)
 | |
| {
 | |
| 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 | |
| 
 | |
| 	ASSERT_RTNL();
 | |
| 	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
 | |
| 	netdev_rx_handler_unregister(netdev_vport->dev);
 | |
| 	netdev_upper_dev_unlink(netdev_vport->dev,
 | |
| 				netdev_master_upper_dev_get(netdev_vport->dev));
 | |
| 	dev_set_promiscuity(netdev_vport->dev, -1);
 | |
| }
 | |
| 
 | |
| static void netdev_destroy(struct vport *vport)
 | |
| {
 | |
| 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 | |
| 
 | |
| 	rtnl_lock();
 | |
| 	if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)
 | |
| 		ovs_netdev_detach_dev(vport);
 | |
| 	rtnl_unlock();
 | |
| 
 | |
| 	call_rcu(&netdev_vport->rcu, free_port_rcu);
 | |
| }
 | |
| 
 | |
| const char *ovs_netdev_get_name(const struct vport *vport)
 | |
| {
 | |
| 	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 | |
| 	return netdev_vport->dev->name;
 | |
| }
 | |
| 
 | |
| static unsigned int packet_length(const struct sk_buff *skb)
 | |
| {
 | |
| 	unsigned int length = skb->len - ETH_HLEN;
 | |
| 
 | |
| 	if (skb->protocol == htons(ETH_P_8021Q))
 | |
| 		length -= VLAN_HLEN;
 | |
| 
 | |
| 	return length;
 | |
| }
 | |
| 
 | |
| static int netdev_send(struct vport *vport, struct sk_buff *skb)
 | |
| {
 | |
| 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 | |
| 	int mtu = netdev_vport->dev->mtu;
 | |
| 	int len;
 | |
| 
 | |
| 	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
 | |
| 		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
 | |
| 				     netdev_vport->dev->name,
 | |
| 				     packet_length(skb), mtu);
 | |
| 		goto drop;
 | |
| 	}
 | |
| 
 | |
| 	skb->dev = netdev_vport->dev;
 | |
| 	len = skb->len;
 | |
| 	dev_queue_xmit(skb);
 | |
| 
 | |
| 	return len;
 | |
| 
 | |
| drop:
 | |
| 	kfree_skb(skb);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /* Returns null if this device is not attached to a datapath. */
 | |
| struct vport *ovs_netdev_get_vport(struct net_device *dev)
 | |
| {
 | |
| 	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
 | |
| 		return (struct vport *)
 | |
| 			rcu_dereference_rtnl(dev->rx_handler_data);
 | |
| 	else
 | |
| 		return NULL;
 | |
| }
 | |
| 
 | |
| const struct vport_ops ovs_netdev_vport_ops = {
 | |
| 	.type		= OVS_VPORT_TYPE_NETDEV,
 | |
| 	.create		= netdev_create,
 | |
| 	.destroy	= netdev_destroy,
 | |
| 	.get_name	= ovs_netdev_get_name,
 | |
| 	.send		= netdev_send,
 | |
| };
 |