drbd: Ignore the exit code of a fence-peer handler if it returns too late

In case the connection was established and lost again before
the a fence-peer handler returns, ignore the exit code of this
instance. (And use the exit code of the later started instance)

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Philipp Reisner 2013-06-25 16:50:06 +02:00 committed by Jens Axboe
parent f9eb7bf424
commit 28e448bb30
3 changed files with 17 additions and 3 deletions

View File

@ -832,6 +832,7 @@ struct drbd_tconn { /* is a resource from the config file */
unsigned susp_nod:1; /* IO suspended because no data */ unsigned susp_nod:1; /* IO suspended because no data */
unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
struct mutex cstate_mutex; /* Protects graceful disconnects */ struct mutex cstate_mutex; /* Protects graceful disconnects */
unsigned int connect_cnt; /* Inc each time a connection is established */
unsigned long flags; unsigned long flags;
struct net_conf *net_conf; /* content protected by rcu */ struct net_conf *net_conf; /* content protected by rcu */

View File

@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
bool conn_try_outdate_peer(struct drbd_tconn *tconn) bool conn_try_outdate_peer(struct drbd_tconn *tconn)
{ {
unsigned int connect_cnt;
union drbd_state mask = { }; union drbd_state mask = { };
union drbd_state val = { }; union drbd_state val = { };
enum drbd_fencing_p fp; enum drbd_fencing_p fp;
@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
return false; return false;
} }
spin_lock_irq(&tconn->req_lock);
connect_cnt = tconn->connect_cnt;
spin_unlock_irq(&tconn->req_lock);
fp = highest_fencing_policy(tconn); fp = highest_fencing_policy(tconn);
switch (fp) { switch (fp) {
case FP_NOT_AVAIL: case FP_NOT_AVAIL:
@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
here, because we might were able to re-establish the connection in the here, because we might were able to re-establish the connection in the
meantime. */ meantime. */
spin_lock_irq(&tconn->req_lock); spin_lock_irq(&tconn->req_lock);
if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
_conn_request_state(tconn, mask, val, CS_VERBOSE); if (tconn->connect_cnt != connect_cnt)
/* In case the connection was established and droped
while the fence-peer handler was running, ignore it */
conn_info(tconn, "Ignoring fence-peer exit code\n");
else
_conn_request_state(tconn, mask, val, CS_VERBOSE);
}
spin_unlock_irq(&tconn->req_lock); spin_unlock_irq(&tconn->req_lock);
return conn_highest_pdsk(tconn) <= D_OUTDATED; return conn_highest_pdsk(tconn) <= D_OUTDATED;

View File

@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
drbd_thread_restart_nowait(&mdev->tconn->receiver); drbd_thread_restart_nowait(&mdev->tconn->receiver);
/* Resume AL writing if we get a connection */ /* Resume AL writing if we get a connection */
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
drbd_resume_al(mdev); drbd_resume_al(mdev);
mdev->tconn->connect_cnt++;
}
/* remember last attach time so request_timer_fn() won't /* remember last attach time so request_timer_fn() won't
* kill newly established sessions while we are still trying to thaw * kill newly established sessions while we are still trying to thaw