IPoIB/cm: Fix SRQ WR leak
SRQ WR leakage has been observed with IPoIB/CM: e.g. flipping ports on and off will, with time, leak out all WRs and then all connections will start getting RNR NAKs. Fix this in the way suggested by spec: move the QP being destroyed to the error state, wait for "Last WQE Reached" event and then post WR on a "drain QP" connected to the same CQ. Once we observe a completion on the drain QP, it's safe to call ib_destroy_qp. Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
		
					parent
					
						
							
								24bd1e4e32
							
						
					
				
			
			
				commit
				
					
						518b1646f8
					
				
			
		
					 3 changed files with 211 additions and 36 deletions
				
			
		| 
						 | 
					@ -132,12 +132,46 @@ struct ipoib_cm_data {
 | 
				
			||||||
	__be32 mtu;
 | 
						__be32 mtu;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Quoting 10.3.1 Queue Pair and EE Context States:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note, for QPs that are associated with an SRQ, the Consumer should take the
 | 
				
			||||||
 | 
					 * QP through the Error State before invoking a Destroy QP or a Modify QP to the
 | 
				
			||||||
 | 
					 * Reset State.  The Consumer may invoke the Destroy QP without first performing
 | 
				
			||||||
 | 
					 * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
 | 
				
			||||||
 | 
					 * Last WQE Reached Event. However, if the Consumer does not wait for the
 | 
				
			||||||
 | 
					 * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
 | 
				
			||||||
 | 
					 * leakage may occur. Therefore, it is good programming practice to tear down a
 | 
				
			||||||
 | 
					 * QP that is associated with an SRQ by using the following process:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * - Put the QP in the Error State
 | 
				
			||||||
 | 
					 * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
 | 
				
			||||||
 | 
					 * - either:
 | 
				
			||||||
 | 
					 *       drain the CQ by invoking the Poll CQ verb and either wait for CQ
 | 
				
			||||||
 | 
					 *       to be empty or the number of Poll CQ operations has exceeded
 | 
				
			||||||
 | 
					 *       CQ capacity size;
 | 
				
			||||||
 | 
					 * - or
 | 
				
			||||||
 | 
					 *       post another WR that completes on the same CQ and wait for this
 | 
				
			||||||
 | 
					 *       WR to return as a WC;
 | 
				
			||||||
 | 
					 * - and then invoke a Destroy QP or Reset QP.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * We use the second option and wait for a completion on the
 | 
				
			||||||
 | 
					 * rx_drain_qp before destroying QPs attached to our SRQ.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum ipoib_cm_state {
 | 
				
			||||||
 | 
						IPOIB_CM_RX_LIVE,
 | 
				
			||||||
 | 
						IPOIB_CM_RX_ERROR, /* Ignored by stale task */
 | 
				
			||||||
 | 
						IPOIB_CM_RX_FLUSH  /* Last WQE Reached event observed */
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct ipoib_cm_rx {
 | 
					struct ipoib_cm_rx {
 | 
				
			||||||
	struct ib_cm_id     *id;
 | 
						struct ib_cm_id     *id;
 | 
				
			||||||
	struct ib_qp        *qp;
 | 
						struct ib_qp        *qp;
 | 
				
			||||||
	struct list_head     list;
 | 
						struct list_head     list;
 | 
				
			||||||
	struct net_device   *dev;
 | 
						struct net_device   *dev;
 | 
				
			||||||
	unsigned long        jiffies;
 | 
						unsigned long        jiffies;
 | 
				
			||||||
 | 
						enum ipoib_cm_state  state;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct ipoib_cm_tx {
 | 
					struct ipoib_cm_tx {
 | 
				
			||||||
| 
						 | 
					@ -165,10 +199,16 @@ struct ipoib_cm_dev_priv {
 | 
				
			||||||
	struct ib_srq  	       *srq;
 | 
						struct ib_srq  	       *srq;
 | 
				
			||||||
	struct ipoib_cm_rx_buf *srq_ring;
 | 
						struct ipoib_cm_rx_buf *srq_ring;
 | 
				
			||||||
	struct ib_cm_id        *id;
 | 
						struct ib_cm_id        *id;
 | 
				
			||||||
	struct list_head        passive_ids;
 | 
						struct ib_qp           *rx_drain_qp;   /* generates WR described in 10.3.1 */
 | 
				
			||||||
 | 
						struct list_head        passive_ids;   /* state: LIVE */
 | 
				
			||||||
 | 
						struct list_head        rx_error_list; /* state: ERROR */
 | 
				
			||||||
 | 
						struct list_head        rx_flush_list; /* state: FLUSH, drain not started */
 | 
				
			||||||
 | 
						struct list_head        rx_drain_list; /* state: FLUSH, drain started */
 | 
				
			||||||
 | 
						struct list_head        rx_reap_list;  /* state: FLUSH, drain done */
 | 
				
			||||||
	struct work_struct      start_task;
 | 
						struct work_struct      start_task;
 | 
				
			||||||
	struct work_struct      reap_task;
 | 
						struct work_struct      reap_task;
 | 
				
			||||||
	struct work_struct      skb_task;
 | 
						struct work_struct      skb_task;
 | 
				
			||||||
 | 
						struct work_struct      rx_reap_task;
 | 
				
			||||||
	struct delayed_work     stale_task;
 | 
						struct delayed_work     stale_task;
 | 
				
			||||||
	struct sk_buff_head     skb_queue;
 | 
						struct sk_buff_head     skb_queue;
 | 
				
			||||||
	struct list_head        start_list;
 | 
						struct list_head        start_list;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -37,6 +37,7 @@
 | 
				
			||||||
#include <net/dst.h>
 | 
					#include <net/dst.h>
 | 
				
			||||||
#include <net/icmp.h>
 | 
					#include <net/icmp.h>
 | 
				
			||||||
#include <linux/icmpv6.h>
 | 
					#include <linux/icmpv6.h>
 | 
				
			||||||
 | 
					#include <linux/delay.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
 | 
					#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
 | 
				
			||||||
static int data_debug_level;
 | 
					static int data_debug_level;
 | 
				
			||||||
| 
						 | 
					@ -62,6 +63,16 @@ struct ipoib_cm_id {
 | 
				
			||||||
	u32 remote_mtu;
 | 
						u32 remote_mtu;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct ib_qp_attr ipoib_cm_err_attr = {
 | 
				
			||||||
 | 
						.qp_state = IB_QPS_ERR
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct ib_recv_wr ipoib_cm_rx_drain_wr = {
 | 
				
			||||||
 | 
						.wr_id = IPOIB_CM_RX_DRAIN_WRID
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 | 
					static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 | 
				
			||||||
			       struct ib_cm_event *event);
 | 
								       struct ib_cm_event *event);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -150,11 +161,44 @@ partial_error:
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ib_recv_wr *bad_wr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* rx_drain_qp send queue depth is 1, so
 | 
				
			||||||
 | 
						 * make sure we have at most 1 outstanding WR. */
 | 
				
			||||||
 | 
						if (list_empty(&priv->cm.rx_flush_list) ||
 | 
				
			||||||
 | 
						    !list_empty(&priv->cm.rx_drain_list))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr))
 | 
				
			||||||
 | 
							ipoib_warn(priv, "failed to post rx_drain wr\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ipoib_cm_rx *p = ctx;
 | 
				
			||||||
 | 
						struct ipoib_dev_priv *priv = netdev_priv(p->dev);
 | 
				
			||||||
 | 
						unsigned long flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irqsave(&priv->lock, flags);
 | 
				
			||||||
 | 
						list_move(&p->list, &priv->cm.rx_flush_list);
 | 
				
			||||||
 | 
						p->state = IPOIB_CM_RX_FLUSH;
 | 
				
			||||||
 | 
						ipoib_cm_start_rx_drain(priv);
 | 
				
			||||||
 | 
						spin_unlock_irqrestore(&priv->lock, flags);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
 | 
					static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
 | 
				
			||||||
					   struct ipoib_cm_rx *p)
 | 
										   struct ipoib_cm_rx *p)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ipoib_dev_priv *priv = netdev_priv(dev);
 | 
						struct ipoib_dev_priv *priv = netdev_priv(dev);
 | 
				
			||||||
	struct ib_qp_init_attr attr = {
 | 
						struct ib_qp_init_attr attr = {
 | 
				
			||||||
 | 
							.event_handler = ipoib_cm_rx_event_handler,
 | 
				
			||||||
		.send_cq = priv->cq, /* does not matter, we never send anything */
 | 
							.send_cq = priv->cq, /* does not matter, we never send anything */
 | 
				
			||||||
		.recv_cq = priv->cq,
 | 
							.recv_cq = priv->cq,
 | 
				
			||||||
		.srq = priv->cm.srq,
 | 
							.srq = priv->cm.srq,
 | 
				
			||||||
| 
						 | 
					@ -256,6 +300,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cm_id->context = p;
 | 
						cm_id->context = p;
 | 
				
			||||||
	p->jiffies = jiffies;
 | 
						p->jiffies = jiffies;
 | 
				
			||||||
 | 
						p->state = IPOIB_CM_RX_LIVE;
 | 
				
			||||||
	spin_lock_irq(&priv->lock);
 | 
						spin_lock_irq(&priv->lock);
 | 
				
			||||||
	if (list_empty(&priv->cm.passive_ids))
 | 
						if (list_empty(&priv->cm.passive_ids))
 | 
				
			||||||
		queue_delayed_work(ipoib_workqueue,
 | 
							queue_delayed_work(ipoib_workqueue,
 | 
				
			||||||
| 
						 | 
					@ -277,7 +322,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ipoib_cm_rx *p;
 | 
						struct ipoib_cm_rx *p;
 | 
				
			||||||
	struct ipoib_dev_priv *priv;
 | 
						struct ipoib_dev_priv *priv;
 | 
				
			||||||
	int ret;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (event->event) {
 | 
						switch (event->event) {
 | 
				
			||||||
	case IB_CM_REQ_RECEIVED:
 | 
						case IB_CM_REQ_RECEIVED:
 | 
				
			||||||
| 
						 | 
					@ -289,20 +333,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 | 
				
			||||||
	case IB_CM_REJ_RECEIVED:
 | 
						case IB_CM_REJ_RECEIVED:
 | 
				
			||||||
		p = cm_id->context;
 | 
							p = cm_id->context;
 | 
				
			||||||
		priv = netdev_priv(p->dev);
 | 
							priv = netdev_priv(p->dev);
 | 
				
			||||||
		spin_lock_irq(&priv->lock);
 | 
							if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
 | 
				
			||||||
		if (list_empty(&p->list))
 | 
								ipoib_warn(priv, "unable to move qp to error state\n");
 | 
				
			||||||
			ret = 0; /* Connection is going away already. */
 | 
							/* Fall through */
 | 
				
			||||||
		else {
 | 
					 | 
				
			||||||
			list_del_init(&p->list);
 | 
					 | 
				
			||||||
			ret = -ECONNRESET;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		spin_unlock_irq(&priv->lock);
 | 
					 | 
				
			||||||
		if (ret) {
 | 
					 | 
				
			||||||
			ib_destroy_qp(p->qp);
 | 
					 | 
				
			||||||
			kfree(p);
 | 
					 | 
				
			||||||
			return ret;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -354,8 +387,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 | 
				
			||||||
		       wr_id, wc->status);
 | 
							       wr_id, wc->status);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(wr_id >= ipoib_recvq_size)) {
 | 
						if (unlikely(wr_id >= ipoib_recvq_size)) {
 | 
				
			||||||
		ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
 | 
							if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
 | 
				
			||||||
			   wr_id, ipoib_recvq_size);
 | 
								spin_lock_irqsave(&priv->lock, flags);
 | 
				
			||||||
 | 
								list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
 | 
				
			||||||
 | 
								ipoib_cm_start_rx_drain(priv);
 | 
				
			||||||
 | 
								queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
 | 
				
			||||||
 | 
								spin_unlock_irqrestore(&priv->lock, flags);
 | 
				
			||||||
 | 
							} else
 | 
				
			||||||
 | 
								ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
 | 
				
			||||||
 | 
									   wr_id, ipoib_recvq_size);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -374,9 +414,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 | 
				
			||||||
		if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
 | 
							if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
 | 
				
			||||||
			spin_lock_irqsave(&priv->lock, flags);
 | 
								spin_lock_irqsave(&priv->lock, flags);
 | 
				
			||||||
			p->jiffies = jiffies;
 | 
								p->jiffies = jiffies;
 | 
				
			||||||
			/* Move this entry to list head, but do
 | 
								/* Move this entry to list head, but do not re-add it
 | 
				
			||||||
			 * not re-add it if it has been removed. */
 | 
								 * if it has been moved out of list. */
 | 
				
			||||||
			if (!list_empty(&p->list))
 | 
								if (p->state == IPOIB_CM_RX_LIVE)
 | 
				
			||||||
				list_move(&p->list, &priv->cm.passive_ids);
 | 
									list_move(&p->list, &priv->cm.passive_ids);
 | 
				
			||||||
			spin_unlock_irqrestore(&priv->lock, flags);
 | 
								spin_unlock_irqrestore(&priv->lock, flags);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -583,17 +623,43 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
 | 
				
			||||||
int ipoib_cm_dev_open(struct net_device *dev)
 | 
					int ipoib_cm_dev_open(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ipoib_dev_priv *priv = netdev_priv(dev);
 | 
						struct ipoib_dev_priv *priv = netdev_priv(dev);
 | 
				
			||||||
 | 
						struct ib_qp_init_attr qp_init_attr = {
 | 
				
			||||||
 | 
							.send_cq = priv->cq,   /* does not matter, we never send anything */
 | 
				
			||||||
 | 
							.recv_cq = priv->cq,
 | 
				
			||||||
 | 
							.cap.max_send_wr = 1,  /* FIXME: 0 Seems not to work */
 | 
				
			||||||
 | 
							.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
 | 
				
			||||||
 | 
							.cap.max_recv_wr = 1,
 | 
				
			||||||
 | 
							.cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */
 | 
				
			||||||
 | 
							.sq_sig_type = IB_SIGNAL_ALL_WR,
 | 
				
			||||||
 | 
							.qp_type = IB_QPT_UC,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
 | 
						if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr);
 | 
				
			||||||
 | 
						if (IS_ERR(priv->cm.rx_drain_qp)) {
 | 
				
			||||||
 | 
							printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
 | 
				
			||||||
 | 
							ret = PTR_ERR(priv->cm.rx_drain_qp);
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We put the QP in error state directly.  This way, a "flush
 | 
				
			||||||
 | 
						 * error" WC will be immediately generated for each WR we post.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE);
 | 
				
			||||||
 | 
						if (ret) {
 | 
				
			||||||
 | 
							ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
 | 
				
			||||||
 | 
							goto err_qp;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
 | 
						priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
 | 
				
			||||||
	if (IS_ERR(priv->cm.id)) {
 | 
						if (IS_ERR(priv->cm.id)) {
 | 
				
			||||||
		printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
 | 
							printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
 | 
				
			||||||
		ret = PTR_ERR(priv->cm.id);
 | 
							ret = PTR_ERR(priv->cm.id);
 | 
				
			||||||
		priv->cm.id = NULL;
 | 
							goto err_cm;
 | 
				
			||||||
		return ret;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
 | 
						ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
 | 
				
			||||||
| 
						 | 
					@ -601,35 +667,79 @@ int ipoib_cm_dev_open(struct net_device *dev)
 | 
				
			||||||
	if (ret) {
 | 
						if (ret) {
 | 
				
			||||||
		printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
 | 
							printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
 | 
				
			||||||
		       IPOIB_CM_IETF_ID | priv->qp->qp_num);
 | 
							       IPOIB_CM_IETF_ID | priv->qp->qp_num);
 | 
				
			||||||
		ib_destroy_cm_id(priv->cm.id);
 | 
							goto err_listen;
 | 
				
			||||||
		priv->cm.id = NULL;
 | 
					 | 
				
			||||||
		return ret;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					err_listen:
 | 
				
			||||||
 | 
						ib_destroy_cm_id(priv->cm.id);
 | 
				
			||||||
 | 
					err_cm:
 | 
				
			||||||
 | 
						priv->cm.id = NULL;
 | 
				
			||||||
 | 
					err_qp:
 | 
				
			||||||
 | 
						ib_destroy_qp(priv->cm.rx_drain_qp);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ipoib_cm_dev_stop(struct net_device *dev)
 | 
					void ipoib_cm_dev_stop(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ipoib_dev_priv *priv = netdev_priv(dev);
 | 
						struct ipoib_dev_priv *priv = netdev_priv(dev);
 | 
				
			||||||
	struct ipoib_cm_rx *p;
 | 
						struct ipoib_cm_rx *p, *n;
 | 
				
			||||||
 | 
						unsigned long begin;
 | 
				
			||||||
 | 
						LIST_HEAD(list);
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
 | 
						if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ib_destroy_cm_id(priv->cm.id);
 | 
						ib_destroy_cm_id(priv->cm.id);
 | 
				
			||||||
	priv->cm.id = NULL;
 | 
						priv->cm.id = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_irq(&priv->lock);
 | 
						spin_lock_irq(&priv->lock);
 | 
				
			||||||
	while (!list_empty(&priv->cm.passive_ids)) {
 | 
						while (!list_empty(&priv->cm.passive_ids)) {
 | 
				
			||||||
		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
 | 
							p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
 | 
				
			||||||
		list_del_init(&p->list);
 | 
							list_move(&p->list, &priv->cm.rx_error_list);
 | 
				
			||||||
 | 
							p->state = IPOIB_CM_RX_ERROR;
 | 
				
			||||||
		spin_unlock_irq(&priv->lock);
 | 
							spin_unlock_irq(&priv->lock);
 | 
				
			||||||
 | 
							ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
 | 
								ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
 | 
				
			||||||
 | 
							spin_lock_irq(&priv->lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Wait for all RX to be drained */
 | 
				
			||||||
 | 
						begin = jiffies;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (!list_empty(&priv->cm.rx_error_list) ||
 | 
				
			||||||
 | 
						       !list_empty(&priv->cm.rx_flush_list) ||
 | 
				
			||||||
 | 
						       !list_empty(&priv->cm.rx_drain_list)) {
 | 
				
			||||||
 | 
							if (!time_after(jiffies, begin + 5 * HZ)) {
 | 
				
			||||||
 | 
								ipoib_warn(priv, "RX drain timing out\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * assume the HW is wedged and just free up everything.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								list_splice_init(&priv->cm.rx_flush_list, &list);
 | 
				
			||||||
 | 
								list_splice_init(&priv->cm.rx_error_list, &list);
 | 
				
			||||||
 | 
								list_splice_init(&priv->cm.rx_drain_list, &list);
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							spin_unlock_irq(&priv->lock);
 | 
				
			||||||
 | 
							msleep(1);
 | 
				
			||||||
 | 
							spin_lock_irq(&priv->lock);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_splice_init(&priv->cm.rx_reap_list, &list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_unlock_irq(&priv->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_for_each_entry_safe(p, n, &list, list) {
 | 
				
			||||||
		ib_destroy_cm_id(p->id);
 | 
							ib_destroy_cm_id(p->id);
 | 
				
			||||||
		ib_destroy_qp(p->qp);
 | 
							ib_destroy_qp(p->qp);
 | 
				
			||||||
		kfree(p);
 | 
							kfree(p);
 | 
				
			||||||
		spin_lock_irq(&priv->lock);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	spin_unlock_irq(&priv->lock);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ib_destroy_qp(priv->cm.rx_drain_qp);
 | 
				
			||||||
	cancel_delayed_work(&priv->cm.stale_task);
 | 
						cancel_delayed_work(&priv->cm.stale_task);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1079,24 +1189,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
 | 
				
			||||||
		queue_work(ipoib_workqueue, &priv->cm.skb_task);
 | 
							queue_work(ipoib_workqueue, &priv->cm.skb_task);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ipoib_cm_rx_reap(struct work_struct *work)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 | 
				
			||||||
 | 
											   cm.rx_reap_task);
 | 
				
			||||||
 | 
						struct ipoib_cm_rx *p, *n;
 | 
				
			||||||
 | 
						LIST_HEAD(list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_irq(&priv->lock);
 | 
				
			||||||
 | 
						list_splice_init(&priv->cm.rx_reap_list, &list);
 | 
				
			||||||
 | 
						spin_unlock_irq(&priv->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_for_each_entry_safe(p, n, &list, list) {
 | 
				
			||||||
 | 
							ib_destroy_cm_id(p->id);
 | 
				
			||||||
 | 
							ib_destroy_qp(p->qp);
 | 
				
			||||||
 | 
							kfree(p);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void ipoib_cm_stale_task(struct work_struct *work)
 | 
					static void ipoib_cm_stale_task(struct work_struct *work)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 | 
						struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 | 
				
			||||||
						   cm.stale_task.work);
 | 
											   cm.stale_task.work);
 | 
				
			||||||
	struct ipoib_cm_rx *p;
 | 
						struct ipoib_cm_rx *p;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_irq(&priv->lock);
 | 
						spin_lock_irq(&priv->lock);
 | 
				
			||||||
	while (!list_empty(&priv->cm.passive_ids)) {
 | 
						while (!list_empty(&priv->cm.passive_ids)) {
 | 
				
			||||||
		/* List if sorted by LRU, start from tail,
 | 
							/* List is sorted by LRU, start from tail,
 | 
				
			||||||
		 * stop when we see a recently used entry */
 | 
							 * stop when we see a recently used entry */
 | 
				
			||||||
		p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
 | 
							p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
 | 
				
			||||||
		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
 | 
							if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		list_del_init(&p->list);
 | 
							list_move(&p->list, &priv->cm.rx_error_list);
 | 
				
			||||||
 | 
							p->state = IPOIB_CM_RX_ERROR;
 | 
				
			||||||
		spin_unlock_irq(&priv->lock);
 | 
							spin_unlock_irq(&priv->lock);
 | 
				
			||||||
		ib_destroy_cm_id(p->id);
 | 
							ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
 | 
				
			||||||
		ib_destroy_qp(p->qp);
 | 
							if (ret)
 | 
				
			||||||
		kfree(p);
 | 
								ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
 | 
				
			||||||
		spin_lock_irq(&priv->lock);
 | 
							spin_lock_irq(&priv->lock);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1164,9 +1294,14 @@ int ipoib_cm_dev_init(struct net_device *dev)
 | 
				
			||||||
	INIT_LIST_HEAD(&priv->cm.passive_ids);
 | 
						INIT_LIST_HEAD(&priv->cm.passive_ids);
 | 
				
			||||||
	INIT_LIST_HEAD(&priv->cm.reap_list);
 | 
						INIT_LIST_HEAD(&priv->cm.reap_list);
 | 
				
			||||||
	INIT_LIST_HEAD(&priv->cm.start_list);
 | 
						INIT_LIST_HEAD(&priv->cm.start_list);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&priv->cm.rx_error_list);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&priv->cm.rx_flush_list);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&priv->cm.rx_drain_list);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&priv->cm.rx_reap_list);
 | 
				
			||||||
	INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
 | 
						INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
 | 
				
			||||||
	INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
 | 
						INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
 | 
				
			||||||
	INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
 | 
						INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
 | 
				
			||||||
 | 
						INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
 | 
				
			||||||
	INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
 | 
						INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb_queue_head_init(&priv->cm.skb_queue);
 | 
						skb_queue_head_init(&priv->cm.skb_queue);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -173,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 | 
				
			||||||
	size = ipoib_sendq_size + ipoib_recvq_size + 1;
 | 
						size = ipoib_sendq_size + ipoib_recvq_size + 1;
 | 
				
			||||||
	ret = ipoib_cm_dev_init(dev);
 | 
						ret = ipoib_cm_dev_init(dev);
 | 
				
			||||||
	if (!ret)
 | 
						if (!ret)
 | 
				
			||||||
		size += ipoib_recvq_size;
 | 
							size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
 | 
						priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
 | 
				
			||||||
	if (IS_ERR(priv->cq)) {
 | 
						if (IS_ERR(priv->cq)) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue