mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-16 14:02:10 +00:00
IB/mthca: Fix potential AB-BA deadlock with CQ locks
When destroying a QP, mthca locks both the QP's send CQ and receive CQ. However, the following scenario is perfectly valid: QP_a: send_cq == CQ_x, recv_cq == CQ_y QP_b: send_cq == CQ_y, recv_cq == CQ_x The old mthca code simply locked send_cq and then recv_cq, which in this case could lead to an AB-BA deadlock if QP_a and QP_b were destroyed simultaneously. We can fix this by changing the locking code to lock the CQ with the lower CQ number first, which will create a consistent lock ordering. Also, the second CQ is locked with spin_lock_nested() to tell lockdep that we know what we're doing with the lock nesting. This bug was found by lockdep. Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
e54b82d739
commit
a19aa5c5fd
@ -136,8 +136,8 @@ struct mthca_ah {
|
||||
* We have one global lock that protects dev->cq/qp_table. Each
|
||||
* struct mthca_cq/qp also has its own lock. An individual qp lock
|
||||
* may be taken inside of an individual cq lock. Both cqs attached to
|
||||
* a qp may be locked, with the send cq locked first. No other
|
||||
* nesting should be done.
|
||||
* a qp may be locked, with the cq with the lower cqn locked first.
|
||||
* No other nesting should be done.
|
||||
*
|
||||
* Each struct mthca_cq/qp also has an ref count, protected by the
|
||||
* corresponding table lock. The pointer from the cq/qp_table to the
|
||||
|
@ -1263,6 +1263,32 @@ int mthca_alloc_qp(struct mthca_dev *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
|
||||
{
|
||||
if (send_cq == recv_cq)
|
||||
spin_lock_irq(&send_cq->lock);
|
||||
else if (send_cq->cqn < recv_cq->cqn) {
|
||||
spin_lock_irq(&send_cq->lock);
|
||||
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
|
||||
} else {
|
||||
spin_lock_irq(&recv_cq->lock);
|
||||
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
}
|
||||
|
||||
static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
|
||||
{
|
||||
if (send_cq == recv_cq)
|
||||
spin_unlock_irq(&send_cq->lock);
|
||||
else if (send_cq->cqn < recv_cq->cqn) {
|
||||
spin_unlock(&recv_cq->lock);
|
||||
spin_unlock_irq(&send_cq->lock);
|
||||
} else {
|
||||
spin_unlock(&send_cq->lock);
|
||||
spin_unlock_irq(&recv_cq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
int mthca_alloc_sqp(struct mthca_dev *dev,
|
||||
struct mthca_pd *pd,
|
||||
struct mthca_cq *send_cq,
|
||||
@ -1315,17 +1341,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
|
||||
* Lock CQs here, so that CQ polling code can do QP lookup
|
||||
* without taking a lock.
|
||||
*/
|
||||
spin_lock_irq(&send_cq->lock);
|
||||
if (send_cq != recv_cq)
|
||||
spin_lock(&recv_cq->lock);
|
||||
mthca_lock_cqs(send_cq, recv_cq);
|
||||
|
||||
spin_lock(&dev->qp_table.lock);
|
||||
mthca_array_clear(&dev->qp_table.qp, mqpn);
|
||||
spin_unlock(&dev->qp_table.lock);
|
||||
|
||||
if (send_cq != recv_cq)
|
||||
spin_unlock(&recv_cq->lock);
|
||||
spin_unlock_irq(&send_cq->lock);
|
||||
mthca_unlock_cqs(send_cq, recv_cq);
|
||||
|
||||
err_out:
|
||||
dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
|
||||
@ -1359,9 +1381,7 @@ void mthca_free_qp(struct mthca_dev *dev,
|
||||
* Lock CQs here, so that CQ polling code can do QP lookup
|
||||
* without taking a lock.
|
||||
*/
|
||||
spin_lock_irq(&send_cq->lock);
|
||||
if (send_cq != recv_cq)
|
||||
spin_lock(&recv_cq->lock);
|
||||
mthca_lock_cqs(send_cq, recv_cq);
|
||||
|
||||
spin_lock(&dev->qp_table.lock);
|
||||
mthca_array_clear(&dev->qp_table.qp,
|
||||
@ -1369,9 +1389,7 @@ void mthca_free_qp(struct mthca_dev *dev,
|
||||
--qp->refcount;
|
||||
spin_unlock(&dev->qp_table.lock);
|
||||
|
||||
if (send_cq != recv_cq)
|
||||
spin_unlock(&recv_cq->lock);
|
||||
spin_unlock_irq(&send_cq->lock);
|
||||
mthca_unlock_cqs(send_cq, recv_cq);
|
||||
|
||||
wait_event(qp->wait, !get_qp_refcount(dev, qp));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user