IB/qib: RCU locking for MR validation
Profiling indicates that MR validation locking is expensive. The MR table is largely read-only and is a suitable candidate for RCU locking. The patch uses RCU locking during validation to eliminate one lock/unlock during that validation. Reviewed-by: Mike Heinz <michael.william.heinz@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
		
					parent
					
						
							
								6a82649f21
							
						
					
				
			
			
				commit
				
					
						8aac4cc3a9
					
				
			
		
					 4 changed files with 66 additions and 50 deletions
				
			
		| 
						 | 
					@ -40,8 +40,7 @@
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Returns 0 if successful, otherwise returns -errno.
 | 
					 * Returns 0 if successful, otherwise returns -errno.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Increments mr reference count and sets published
 | 
					 * Increments mr reference count as required.
 | 
				
			||||||
 * as required.
 | 
					 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Sets the lkey field mr for non-dma regions.
 | 
					 * Sets the lkey field mr for non-dma regions.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -60,10 +59,12 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* special case for dma_mr lkey == 0 */
 | 
						/* special case for dma_mr lkey == 0 */
 | 
				
			||||||
	if (dma_region) {
 | 
						if (dma_region) {
 | 
				
			||||||
		/* should the dma_mr be relative to the pd? */
 | 
							struct qib_mregion *tmr;
 | 
				
			||||||
		if (!dev->dma_mr) {
 | 
					
 | 
				
			||||||
 | 
							tmr = rcu_dereference(dev->dma_mr);
 | 
				
			||||||
 | 
							if (!tmr) {
 | 
				
			||||||
			qib_get_mr(mr);
 | 
								qib_get_mr(mr);
 | 
				
			||||||
			dev->dma_mr = mr;
 | 
								rcu_assign_pointer(dev->dma_mr, mr);
 | 
				
			||||||
			mr->lkey_published = 1;
 | 
								mr->lkey_published = 1;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		goto success;
 | 
							goto success;
 | 
				
			||||||
| 
						 | 
					@ -93,7 +94,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
 | 
				
			||||||
		rkt->gen++;
 | 
							rkt->gen++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	qib_get_mr(mr);
 | 
						qib_get_mr(mr);
 | 
				
			||||||
	rkt->table[r] = mr;
 | 
						rcu_assign_pointer(rkt->table[r], mr);
 | 
				
			||||||
	mr->lkey_published = 1;
 | 
						mr->lkey_published = 1;
 | 
				
			||||||
success:
 | 
					success:
 | 
				
			||||||
	spin_unlock_irqrestore(&rkt->lock, flags);
 | 
						spin_unlock_irqrestore(&rkt->lock, flags);
 | 
				
			||||||
| 
						 | 
					@ -120,33 +121,30 @@ void qib_free_lkey(struct qib_mregion *mr)
 | 
				
			||||||
	spin_lock_irqsave(&rkt->lock, flags);
 | 
						spin_lock_irqsave(&rkt->lock, flags);
 | 
				
			||||||
	if (!mr->lkey_published)
 | 
						if (!mr->lkey_published)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	mr->lkey_published = 0;
 | 
						if (lkey == 0)
 | 
				
			||||||
 | 
							rcu_assign_pointer(dev->dma_mr, NULL);
 | 
				
			||||||
 | 
						else {
 | 
				
			||||||
	spin_lock_irqsave(&dev->lk_table.lock, flags);
 | 
					 | 
				
			||||||
	if (lkey == 0) {
 | 
					 | 
				
			||||||
		if (dev->dma_mr && dev->dma_mr == mr) {
 | 
					 | 
				
			||||||
			qib_put_mr(dev->dma_mr);
 | 
					 | 
				
			||||||
			dev->dma_mr = NULL;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		r = lkey >> (32 - ib_qib_lkey_table_size);
 | 
							r = lkey >> (32 - ib_qib_lkey_table_size);
 | 
				
			||||||
		qib_put_mr(dev->dma_mr);
 | 
							rcu_assign_pointer(rkt->table[r], NULL);
 | 
				
			||||||
		rkt->table[r] = NULL;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						qib_put_mr(mr);
 | 
				
			||||||
 | 
						mr->lkey_published = 0;
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	spin_unlock_irqrestore(&dev->lk_table.lock, flags);
 | 
						spin_unlock_irqrestore(&rkt->lock, flags);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * qib_lkey_ok - check IB SGE for validity and initialize
 | 
					 * qib_lkey_ok - check IB SGE for validity and initialize
 | 
				
			||||||
 * @rkt: table containing lkey to check SGE against
 | 
					 * @rkt: table containing lkey to check SGE against
 | 
				
			||||||
 | 
					 * @pd: protection domain
 | 
				
			||||||
 * @isge: outgoing internal SGE
 | 
					 * @isge: outgoing internal SGE
 | 
				
			||||||
 * @sge: SGE to check
 | 
					 * @sge: SGE to check
 | 
				
			||||||
 * @acc: access flags
 | 
					 * @acc: access flags
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Return 1 if valid and successful, otherwise returns 0.
 | 
					 * Return 1 if valid and successful, otherwise returns 0.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 | 
					 * increments the reference count upon success
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 * Check the IB SGE for validity and initialize our internal version
 | 
					 * Check the IB SGE for validity and initialize our internal version
 | 
				
			||||||
 * of it.
 | 
					 * of it.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -156,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 | 
				
			||||||
	struct qib_mregion *mr;
 | 
						struct qib_mregion *mr;
 | 
				
			||||||
	unsigned n, m;
 | 
						unsigned n, m;
 | 
				
			||||||
	size_t off;
 | 
						size_t off;
 | 
				
			||||||
	unsigned long flags;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We use LKEY == zero for kernel virtual addresses
 | 
						 * We use LKEY == zero for kernel virtual addresses
 | 
				
			||||||
	 * (see qib_get_dma_mr and qib_dma.c).
 | 
						 * (see qib_get_dma_mr and qib_dma.c).
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	spin_lock_irqsave(&rkt->lock, flags);
 | 
						rcu_read_lock();
 | 
				
			||||||
	if (sge->lkey == 0) {
 | 
						if (sge->lkey == 0) {
 | 
				
			||||||
		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 | 
							struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (pd->user)
 | 
							if (pd->user)
 | 
				
			||||||
			goto bail;
 | 
								goto bail;
 | 
				
			||||||
		if (!dev->dma_mr)
 | 
							mr = rcu_dereference(dev->dma_mr);
 | 
				
			||||||
 | 
							if (!mr)
 | 
				
			||||||
			goto bail;
 | 
								goto bail;
 | 
				
			||||||
		qib_get_mr(dev->dma_mr);
 | 
							if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
 | 
				
			||||||
		spin_unlock_irqrestore(&rkt->lock, flags);
 | 
								goto bail;
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		isge->mr = dev->dma_mr;
 | 
							isge->mr = mr;
 | 
				
			||||||
		isge->vaddr = (void *) sge->addr;
 | 
							isge->vaddr = (void *) sge->addr;
 | 
				
			||||||
		isge->length = sge->length;
 | 
							isge->length = sge->length;
 | 
				
			||||||
		isge->sge_length = sge->length;
 | 
							isge->sge_length = sge->length;
 | 
				
			||||||
| 
						 | 
					@ -181,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 | 
				
			||||||
		isge->n = 0;
 | 
							isge->n = 0;
 | 
				
			||||||
		goto ok;
 | 
							goto ok;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
 | 
						mr = rcu_dereference(
 | 
				
			||||||
	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
 | 
							rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
 | 
				
			||||||
		     mr->pd != &pd->ibpd))
 | 
						if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
 | 
				
			||||||
		goto bail;
 | 
							goto bail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	off = sge->addr - mr->user_base;
 | 
						off = sge->addr - mr->user_base;
 | 
				
			||||||
	if (unlikely(sge->addr < mr->user_base ||
 | 
						if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length ||
 | 
				
			||||||
		     off + sge->length > mr->length ||
 | 
							     (mr->access_flags & acc) == 0))
 | 
				
			||||||
		     (mr->access_flags & acc) != acc))
 | 
					 | 
				
			||||||
		goto bail;
 | 
							goto bail;
 | 
				
			||||||
	qib_get_mr(mr);
 | 
						if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
 | 
				
			||||||
	spin_unlock_irqrestore(&rkt->lock, flags);
 | 
							goto bail;
 | 
				
			||||||
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	off += mr->offset;
 | 
						off += mr->offset;
 | 
				
			||||||
	if (mr->page_shift) {
 | 
						if (mr->page_shift) {
 | 
				
			||||||
| 
						 | 
					@ -228,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
 | 
				
			||||||
ok:
 | 
					ok:
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
bail:
 | 
					bail:
 | 
				
			||||||
	spin_unlock_irqrestore(&rkt->lock, flags);
 | 
						rcu_read_unlock();
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * qib_rkey_ok - check the IB virtual address, length, and RKEY
 | 
					 * qib_rkey_ok - check the IB virtual address, length, and RKEY
 | 
				
			||||||
 * @dev: infiniband device
 | 
					 * @qp: qp for validation
 | 
				
			||||||
 * @ss: SGE state
 | 
					 * @sge: SGE state
 | 
				
			||||||
 * @len: length of data
 | 
					 * @len: length of data
 | 
				
			||||||
 * @vaddr: virtual address to place data
 | 
					 * @vaddr: virtual address to place data
 | 
				
			||||||
 * @rkey: rkey to check
 | 
					 * @rkey: rkey to check
 | 
				
			||||||
 * @acc: access flags
 | 
					 * @acc: access flags
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Return 1 if successful, otherwise 0.
 | 
					 * Return 1 if successful, otherwise 0.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * increments the reference count upon success
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 | 
					int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 | 
				
			||||||
		u32 len, u64 vaddr, u32 rkey, int acc)
 | 
							u32 len, u64 vaddr, u32 rkey, int acc)
 | 
				
			||||||
| 
						 | 
					@ -250,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 | 
				
			||||||
	struct qib_mregion *mr;
 | 
						struct qib_mregion *mr;
 | 
				
			||||||
	unsigned n, m;
 | 
						unsigned n, m;
 | 
				
			||||||
	size_t off;
 | 
						size_t off;
 | 
				
			||||||
	unsigned long flags;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We use RKEY == zero for kernel virtual addresses
 | 
						 * We use RKEY == zero for kernel virtual addresses
 | 
				
			||||||
	 * (see qib_get_dma_mr and qib_dma.c).
 | 
						 * (see qib_get_dma_mr and qib_dma.c).
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	spin_lock_irqsave(&rkt->lock, flags);
 | 
						rcu_read_lock();
 | 
				
			||||||
	if (rkey == 0) {
 | 
						if (rkey == 0) {
 | 
				
			||||||
		struct qib_pd *pd = to_ipd(qp->ibqp.pd);
 | 
							struct qib_pd *pd = to_ipd(qp->ibqp.pd);
 | 
				
			||||||
		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 | 
							struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (pd->user)
 | 
							if (pd->user)
 | 
				
			||||||
			goto bail;
 | 
								goto bail;
 | 
				
			||||||
		if (!dev->dma_mr)
 | 
							mr = rcu_dereference(dev->dma_mr);
 | 
				
			||||||
 | 
							if (!mr)
 | 
				
			||||||
			goto bail;
 | 
								goto bail;
 | 
				
			||||||
		qib_get_mr(dev->dma_mr);
 | 
							if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
 | 
				
			||||||
		spin_unlock_irqrestore(&rkt->lock, flags);
 | 
								goto bail;
 | 
				
			||||||
 | 
							rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sge->mr = dev->dma_mr;
 | 
							sge->mr = mr;
 | 
				
			||||||
		sge->vaddr = (void *) vaddr;
 | 
							sge->vaddr = (void *) vaddr;
 | 
				
			||||||
		sge->length = len;
 | 
							sge->length = len;
 | 
				
			||||||
		sge->sge_length = len;
 | 
							sge->sge_length = len;
 | 
				
			||||||
| 
						 | 
					@ -277,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 | 
				
			||||||
		goto ok;
 | 
							goto ok;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
 | 
						mr = rcu_dereference(
 | 
				
			||||||
	if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 | 
							rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
 | 
				
			||||||
 | 
						if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 | 
				
			||||||
		goto bail;
 | 
							goto bail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	off = vaddr - mr->iova;
 | 
						off = vaddr - mr->iova;
 | 
				
			||||||
	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
 | 
						if (unlikely(vaddr < mr->iova || off + len > mr->length ||
 | 
				
			||||||
		     (mr->access_flags & acc) == 0))
 | 
							     (mr->access_flags & acc) == 0))
 | 
				
			||||||
		goto bail;
 | 
							goto bail;
 | 
				
			||||||
	qib_get_mr(mr);
 | 
						if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
 | 
				
			||||||
	spin_unlock_irqrestore(&rkt->lock, flags);
 | 
							goto bail;
 | 
				
			||||||
 | 
						rcu_read_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	off += mr->offset;
 | 
						off += mr->offset;
 | 
				
			||||||
	if (mr->page_shift) {
 | 
						if (mr->page_shift) {
 | 
				
			||||||
| 
						 | 
					@ -322,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 | 
				
			||||||
ok:
 | 
					ok:
 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
bail:
 | 
					bail:
 | 
				
			||||||
	spin_unlock_irqrestore(&rkt->lock, flags);
 | 
						rcu_read_unlock();
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -527,3 +527,10 @@ int qib_dealloc_fmr(struct ib_fmr *ibfmr)
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void mr_rcu_callback(struct rcu_head *list)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						complete(&mr->comp);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2066,7 +2066,9 @@ int qib_register_ib_device(struct qib_devdata *dd)
 | 
				
			||||||
		ret = -ENOMEM;
 | 
							ret = -ENOMEM;
 | 
				
			||||||
		goto err_lk;
 | 
							goto err_lk;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	memset(dev->lk_table.table, 0, lk_tab_size);
 | 
						RCU_INIT_POINTER(dev->dma_mr, NULL);
 | 
				
			||||||
 | 
						for (i = 0; i < dev->lk_table.max; i++)
 | 
				
			||||||
 | 
							RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
 | 
				
			||||||
	INIT_LIST_HEAD(&dev->pending_mmaps);
 | 
						INIT_LIST_HEAD(&dev->pending_mmaps);
 | 
				
			||||||
	spin_lock_init(&dev->pending_lock);
 | 
						spin_lock_init(&dev->pending_lock);
 | 
				
			||||||
	dev->mmap_offset = PAGE_SIZE;
 | 
						dev->mmap_offset = PAGE_SIZE;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -305,6 +305,7 @@ struct qib_mregion {
 | 
				
			||||||
	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
 | 
						u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
 | 
				
			||||||
	u8  lkey_published;     /* in global table */
 | 
						u8  lkey_published;     /* in global table */
 | 
				
			||||||
	struct completion comp; /* complete when refcount goes to zero */
 | 
						struct completion comp; /* complete when refcount goes to zero */
 | 
				
			||||||
 | 
						struct rcu_head list;
 | 
				
			||||||
	atomic_t refcount;
 | 
						atomic_t refcount;
 | 
				
			||||||
	struct qib_segarray *map[0];    /* the segments */
 | 
						struct qib_segarray *map[0];    /* the segments */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					@ -1022,10 +1023,12 @@ static inline void qib_get_mr(struct qib_mregion *mr)
 | 
				
			||||||
	atomic_inc(&mr->refcount);
 | 
						atomic_inc(&mr->refcount);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void mr_rcu_callback(struct rcu_head *list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void qib_put_mr(struct qib_mregion *mr)
 | 
					static inline void qib_put_mr(struct qib_mregion *mr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (unlikely(atomic_dec_and_test(&mr->refcount)))
 | 
						if (unlikely(atomic_dec_and_test(&mr->refcount)))
 | 
				
			||||||
		complete(&mr->comp);
 | 
							call_rcu(&mr->list, mr_rcu_callback);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void qib_put_ss(struct qib_sge_state *ss)
 | 
					static inline void qib_put_ss(struct qib_sge_state *ss)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue