mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-14 12:49:08 +00:00
[PATCH] OCFS2: The Second Oracle Cluster Filesystem
A distributed lock manager built with the cluster file system use case in mind. The OCFS2 dlm exposes a VMS style API, though things have been simplified internally. The only lock levels implemented currently are NLMODE, PRMODE and EXMODE. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
This commit is contained in:
parent
98211489d4
commit
6714d8e86b
6
fs/ocfs2/dlm/Makefile
Normal file
6
fs/ocfs2/dlm/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
EXTRA_CFLAGS += -Ifs/ocfs2
|
||||
|
||||
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o
|
||||
|
||||
ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
|
||||
dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
|
214
fs/ocfs2/dlm/dlmapi.h
Normal file
214
fs/ocfs2/dlm/dlmapi.h
Normal file
@ -0,0 +1,214 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmapi.h
|
||||
*
|
||||
* externally exported dlm interfaces
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef DLMAPI_H
|
||||
#define DLMAPI_H
|
||||
|
||||
struct dlm_lock;
|
||||
struct dlm_ctxt;
|
||||
|
||||
/* NOTE: changes made to this enum should be reflected in dlmdebug.c */
|
||||
enum dlm_status {
|
||||
DLM_NORMAL = 0, /* 0: request in progress */
|
||||
DLM_GRANTED, /* 1: request granted */
|
||||
DLM_DENIED, /* 2: request denied */
|
||||
DLM_DENIED_NOLOCKS, /* 3: request denied, out of system resources */
|
||||
DLM_WORKING, /* 4: async request in progress */
|
||||
DLM_BLOCKED, /* 5: lock request blocked */
|
||||
DLM_BLOCKED_ORPHAN, /* 6: lock request blocked by a orphan lock*/
|
||||
DLM_DENIED_GRACE_PERIOD, /* 7: topological change in progress */
|
||||
DLM_SYSERR, /* 8: system error */
|
||||
DLM_NOSUPPORT, /* 9: unsupported */
|
||||
DLM_CANCELGRANT, /* 10: can't cancel convert: already granted */
|
||||
DLM_IVLOCKID, /* 11: bad lockid */
|
||||
DLM_SYNC, /* 12: synchronous request granted */
|
||||
DLM_BADTYPE, /* 13: bad resource type */
|
||||
DLM_BADRESOURCE, /* 14: bad resource handle */
|
||||
DLM_MAXHANDLES, /* 15: no more resource handles */
|
||||
DLM_NOCLINFO, /* 16: can't contact cluster manager */
|
||||
DLM_NOLOCKMGR, /* 17: can't contact lock manager */
|
||||
DLM_NOPURGED, /* 18: can't contact purge daemon */
|
||||
DLM_BADARGS, /* 19: bad api args */
|
||||
DLM_VOID, /* 20: no status */
|
||||
DLM_NOTQUEUED, /* 21: NOQUEUE was specified and request failed */
|
||||
DLM_IVBUFLEN, /* 22: invalid resource name length */
|
||||
DLM_CVTUNGRANT, /* 23: attempted to convert ungranted lock */
|
||||
DLM_BADPARAM, /* 24: invalid lock mode specified */
|
||||
DLM_VALNOTVALID, /* 25: value block has been invalidated */
|
||||
DLM_REJECTED, /* 26: request rejected, unrecognized client */
|
||||
DLM_ABORT, /* 27: blocked lock request cancelled */
|
||||
DLM_CANCEL, /* 28: conversion request cancelled */
|
||||
DLM_IVRESHANDLE, /* 29: invalid resource handle */
|
||||
DLM_DEADLOCK, /* 30: deadlock recovery refused this request */
|
||||
DLM_DENIED_NOASTS, /* 31: failed to allocate AST */
|
||||
DLM_FORWARD, /* 32: request must wait for primary's response */
|
||||
DLM_TIMEOUT, /* 33: timeout value for lock has expired */
|
||||
DLM_IVGROUPID, /* 34: invalid group specification */
|
||||
DLM_VERS_CONFLICT, /* 35: version conflicts prevent request handling */
|
||||
DLM_BAD_DEVICE_PATH, /* 36: Locks device does not exist or path wrong */
|
||||
DLM_NO_DEVICE_PERMISSION, /* 37: Client has insufficient pers for device */
|
||||
DLM_NO_CONTROL_DEVICE, /* 38: Cannot set options on opened device */
|
||||
|
||||
DLM_RECOVERING, /* 39: extension, allows caller to fail a lock
|
||||
request if it is being recovered */
|
||||
DLM_MIGRATING, /* 40: extension, allows caller to fail a lock
|
||||
request if it is being migrated */
|
||||
DLM_MAXSTATS, /* 41: upper limit for return code validation */
|
||||
};
|
||||
|
||||
/* for pretty-printing dlm_status error messages */
|
||||
const char *dlm_errmsg(enum dlm_status err);
|
||||
/* for pretty-printing dlm_status error names */
|
||||
const char *dlm_errname(enum dlm_status err);
|
||||
|
||||
/* Eventually the DLM will use standard errno values, but in the
|
||||
* meantime this lets us track dlm errors as they bubble up. When we
|
||||
* bring its error reporting into line with the rest of the stack,
|
||||
* these can just be replaced with calls to mlog_errno. */
|
||||
#define dlm_error(st) do { \
|
||||
if ((st) != DLM_RECOVERING && \
|
||||
(st) != DLM_MIGRATING && \
|
||||
(st) != DLM_FORWARD) \
|
||||
mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \
|
||||
} while (0)
|
||||
|
||||
#define DLM_LKSB_UNUSED1 0x01
|
||||
#define DLM_LKSB_PUT_LVB 0x02
|
||||
#define DLM_LKSB_GET_LVB 0x04
|
||||
#define DLM_LKSB_UNUSED2 0x08
|
||||
#define DLM_LKSB_UNUSED3 0x10
|
||||
#define DLM_LKSB_UNUSED4 0x20
|
||||
#define DLM_LKSB_UNUSED5 0x40
|
||||
#define DLM_LKSB_UNUSED6 0x80
|
||||
|
||||
#define DLM_LVB_LEN 64
|
||||
|
||||
/* Callers are only allowed access to the lvb and status members of
|
||||
* this struct. */
|
||||
struct dlm_lockstatus {
|
||||
enum dlm_status status;
|
||||
u32 flags;
|
||||
struct dlm_lock *lockid;
|
||||
char lvb[DLM_LVB_LEN];
|
||||
};
|
||||
|
||||
/* Valid lock modes. */
|
||||
#define LKM_IVMODE (-1) /* invalid mode */
|
||||
#define LKM_NLMODE 0 /* null lock */
|
||||
#define LKM_CRMODE 1 /* concurrent read unsupported */
|
||||
#define LKM_CWMODE 2 /* concurrent write unsupported */
|
||||
#define LKM_PRMODE 3 /* protected read */
|
||||
#define LKM_PWMODE 4 /* protected write unsupported */
|
||||
#define LKM_EXMODE 5 /* exclusive */
|
||||
#define LKM_MAXMODE 5
|
||||
#define LKM_MODEMASK 0xff
|
||||
|
||||
/* Flags passed to dlmlock and dlmunlock:
|
||||
* reserved: flags used by the "real" dlm
|
||||
* only a few are supported by this dlm
|
||||
* (U) = unsupported by ocfs2 dlm */
|
||||
#define LKM_ORPHAN 0x00000010 /* this lock is orphanable (U) */
|
||||
#define LKM_PARENTABLE 0x00000020 /* this lock was orphaned (U) */
|
||||
#define LKM_BLOCK 0x00000040 /* blocking lock request (U) */
|
||||
#define LKM_LOCAL 0x00000080 /* local lock request */
|
||||
#define LKM_VALBLK 0x00000100 /* lock value block request */
|
||||
#define LKM_NOQUEUE 0x00000200 /* non blocking request */
|
||||
#define LKM_CONVERT 0x00000400 /* conversion request */
|
||||
#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock (U) */
|
||||
#define LKM_UNLOCK 0x00001000 /* deallocate this lock */
|
||||
#define LKM_CANCEL 0x00002000 /* cancel conversion request */
|
||||
#define LKM_DEQALL 0x00004000 /* remove all locks held by proc (U) */
|
||||
#define LKM_INVVALBLK 0x00008000 /* invalidate lock value block */
|
||||
#define LKM_SYNCSTS 0x00010000 /* return synchronous status if poss (U) */
|
||||
#define LKM_TIMEOUT 0x00020000 /* lock request contains timeout (U) */
|
||||
#define LKM_SNGLDLCK 0x00040000 /* request can self-deadlock (U) */
|
||||
#define LKM_FINDLOCAL 0x00080000 /* find local lock request (U) */
|
||||
#define LKM_PROC_OWNED 0x00100000 /* owned by process, not group (U) */
|
||||
#define LKM_XID 0x00200000 /* use transaction id for deadlock (U) */
|
||||
#define LKM_XID_CONFLICT 0x00400000 /* do not allow lock inheritance (U) */
|
||||
#define LKM_FORCE 0x00800000 /* force unlock flag */
|
||||
#define LKM_REVVALBLK 0x01000000 /* temporary solution: re-validate
|
||||
lock value block (U) */
|
||||
/* unused */
|
||||
#define LKM_UNUSED1 0x00000001 /* unused */
|
||||
#define LKM_UNUSED2 0x00000002 /* unused */
|
||||
#define LKM_UNUSED3 0x00000004 /* unused */
|
||||
#define LKM_UNUSED4 0x00000008 /* unused */
|
||||
#define LKM_UNUSED5 0x02000000 /* unused */
|
||||
#define LKM_UNUSED6 0x04000000 /* unused */
|
||||
#define LKM_UNUSED7 0x08000000 /* unused */
|
||||
|
||||
/* ocfs2 extensions: internal only
|
||||
* should never be used by caller */
|
||||
#define LKM_MIGRATION 0x10000000 /* extension: lockres is to be migrated
|
||||
to another node */
|
||||
#define LKM_PUT_LVB 0x20000000 /* extension: lvb is being passed
|
||||
should be applied to lockres */
|
||||
#define LKM_GET_LVB 0x40000000 /* extension: lvb should be copied
|
||||
from lockres when lock is granted */
|
||||
#define LKM_RECOVERY 0x80000000 /* extension: flag for recovery lock
|
||||
used to avoid recovery rwsem */
|
||||
|
||||
|
||||
typedef void (dlm_astlockfunc_t)(void *);
|
||||
typedef void (dlm_bastlockfunc_t)(void *, int);
|
||||
typedef void (dlm_astunlockfunc_t)(void *, enum dlm_status);
|
||||
|
||||
enum dlm_status dlmlock(struct dlm_ctxt *dlm,
|
||||
int mode,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags,
|
||||
const char *name,
|
||||
dlm_astlockfunc_t *ast,
|
||||
void *data,
|
||||
dlm_bastlockfunc_t *bast);
|
||||
|
||||
enum dlm_status dlmunlock(struct dlm_ctxt *dlm,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags,
|
||||
dlm_astunlockfunc_t *unlockast,
|
||||
void *data);
|
||||
|
||||
struct dlm_ctxt * dlm_register_domain(const char *domain, u32 key);
|
||||
|
||||
void dlm_unregister_domain(struct dlm_ctxt *dlm);
|
||||
|
||||
void dlm_print_one_lock(struct dlm_lock *lockid);
|
||||
|
||||
typedef void (dlm_eviction_func)(int, void *);
|
||||
struct dlm_eviction_cb {
|
||||
struct list_head ec_item;
|
||||
dlm_eviction_func *ec_func;
|
||||
void *ec_data;
|
||||
};
|
||||
void dlm_setup_eviction_cb(struct dlm_eviction_cb *cb,
|
||||
dlm_eviction_func *f,
|
||||
void *data);
|
||||
void dlm_register_eviction_cb(struct dlm_ctxt *dlm,
|
||||
struct dlm_eviction_cb *cb);
|
||||
void dlm_unregister_eviction_cb(struct dlm_eviction_cb *cb);
|
||||
|
||||
#endif /* DLMAPI_H */
|
466
fs/ocfs2/dlm/dlmast.c
Normal file
466
fs/ocfs2/dlm/dlmast.c
Normal file
@ -0,0 +1,466 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmast.c
|
||||
*
|
||||
* AST and BAST functionality for local and remote nodes
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/tcp.h"
|
||||
#include "cluster/endian.h"
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
|
||||
/* Should be called as an ast gets queued to see if the new
|
||||
* lock level will obsolete a pending bast.
|
||||
* For example, if dlm_thread queued a bast for an EX lock that
|
||||
* was blocking another EX, but before sending the bast the
|
||||
* lock owner downconverted to NL, the bast is now obsolete.
|
||||
* Only the ast should be sent.
|
||||
* This is needed because the lock and convert paths can queue
|
||||
* asts out-of-band (not waiting for dlm_thread) in order to
|
||||
* allow for LKM_NOQUEUE to get immediate responses. */
|
||||
static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
|
||||
{
|
||||
assert_spin_locked(&dlm->ast_lock);
|
||||
assert_spin_locked(&lock->spinlock);
|
||||
|
||||
if (lock->ml.highest_blocked == LKM_IVMODE)
|
||||
return 0;
|
||||
BUG_ON(lock->ml.highest_blocked == LKM_NLMODE);
|
||||
|
||||
if (lock->bast_pending &&
|
||||
list_empty(&lock->bast_list))
|
||||
/* old bast already sent, ok */
|
||||
return 0;
|
||||
|
||||
if (lock->ml.type == LKM_EXMODE)
|
||||
/* EX blocks anything left, any bast still valid */
|
||||
return 0;
|
||||
else if (lock->ml.type == LKM_NLMODE)
|
||||
/* NL blocks nothing, no reason to send any bast, cancel it */
|
||||
return 1;
|
||||
else if (lock->ml.highest_blocked != LKM_EXMODE)
|
||||
/* PR only blocks EX */
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
|
||||
{
|
||||
mlog_entry_void();
|
||||
|
||||
BUG_ON(!dlm);
|
||||
BUG_ON(!lock);
|
||||
|
||||
assert_spin_locked(&dlm->ast_lock);
|
||||
if (!list_empty(&lock->ast_list)) {
|
||||
mlog(ML_ERROR, "ast list not empty!! pending=%d, newlevel=%d\n",
|
||||
lock->ast_pending, lock->ml.type);
|
||||
BUG();
|
||||
}
|
||||
BUG_ON(!list_empty(&lock->ast_list));
|
||||
if (lock->ast_pending)
|
||||
mlog(0, "lock has an ast getting flushed right now\n");
|
||||
|
||||
/* putting lock on list, add a ref */
|
||||
dlm_lock_get(lock);
|
||||
spin_lock(&lock->spinlock);
|
||||
|
||||
/* check to see if this ast obsoletes the bast */
|
||||
if (dlm_should_cancel_bast(dlm, lock)) {
|
||||
struct dlm_lock_resource *res = lock->lockres;
|
||||
mlog(0, "%s: cancelling bast for %.*s\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name);
|
||||
lock->bast_pending = 0;
|
||||
list_del_init(&lock->bast_list);
|
||||
lock->ml.highest_blocked = LKM_IVMODE;
|
||||
/* removing lock from list, remove a ref. guaranteed
|
||||
* this won't be the last ref because of the get above,
|
||||
* so res->spinlock will not be taken here */
|
||||
dlm_lock_put(lock);
|
||||
/* free up the reserved bast that we are cancelling.
|
||||
* guaranteed that this will not be the last reserved
|
||||
* ast because *both* an ast and a bast were reserved
|
||||
* to get to this point. the res->spinlock will not be
|
||||
* taken here */
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
}
|
||||
list_add_tail(&lock->ast_list, &dlm->pending_asts);
|
||||
lock->ast_pending = 1;
|
||||
spin_unlock(&lock->spinlock);
|
||||
}
|
||||
|
||||
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
|
||||
{
|
||||
mlog_entry_void();
|
||||
|
||||
BUG_ON(!dlm);
|
||||
BUG_ON(!lock);
|
||||
|
||||
spin_lock(&dlm->ast_lock);
|
||||
__dlm_queue_ast(dlm, lock);
|
||||
spin_unlock(&dlm->ast_lock);
|
||||
}
|
||||
|
||||
|
||||
static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
|
||||
{
|
||||
mlog_entry_void();
|
||||
|
||||
BUG_ON(!dlm);
|
||||
BUG_ON(!lock);
|
||||
assert_spin_locked(&dlm->ast_lock);
|
||||
|
||||
BUG_ON(!list_empty(&lock->bast_list));
|
||||
if (lock->bast_pending)
|
||||
mlog(0, "lock has a bast getting flushed right now\n");
|
||||
|
||||
/* putting lock on list, add a ref */
|
||||
dlm_lock_get(lock);
|
||||
spin_lock(&lock->spinlock);
|
||||
list_add_tail(&lock->bast_list, &dlm->pending_basts);
|
||||
lock->bast_pending = 1;
|
||||
spin_unlock(&lock->spinlock);
|
||||
}
|
||||
|
||||
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
|
||||
{
|
||||
mlog_entry_void();
|
||||
|
||||
BUG_ON(!dlm);
|
||||
BUG_ON(!lock);
|
||||
|
||||
spin_lock(&dlm->ast_lock);
|
||||
__dlm_queue_bast(dlm, lock);
|
||||
spin_unlock(&dlm->ast_lock);
|
||||
}
|
||||
|
||||
static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
struct dlm_lockstatus *lksb = lock->lksb;
|
||||
BUG_ON(!lksb);
|
||||
|
||||
/* only updates if this node masters the lockres */
|
||||
if (res->owner == dlm->node_num) {
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
/* check the lksb flags for the direction */
|
||||
if (lksb->flags & DLM_LKSB_GET_LVB) {
|
||||
mlog(0, "getting lvb from lockres for %s node\n",
|
||||
lock->ml.node == dlm->node_num ? "master" :
|
||||
"remote");
|
||||
memcpy(lksb->lvb, res->lvb, DLM_LVB_LEN);
|
||||
} else if (lksb->flags & DLM_LKSB_PUT_LVB) {
|
||||
mlog(0, "setting lvb from lockres for %s node\n",
|
||||
lock->ml.node == dlm->node_num ? "master" :
|
||||
"remote");
|
||||
memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
|
||||
/* reset any lvb flags on the lksb */
|
||||
lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
|
||||
}
|
||||
|
||||
void dlm_do_local_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
dlm_astlockfunc_t *fn;
|
||||
struct dlm_lockstatus *lksb;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
lksb = lock->lksb;
|
||||
fn = lock->ast;
|
||||
BUG_ON(lock->ml.node != dlm->node_num);
|
||||
|
||||
dlm_update_lvb(dlm, res, lock);
|
||||
(*fn)(lock->astdata);
|
||||
}
|
||||
|
||||
|
||||
int dlm_do_remote_ast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
int ret;
|
||||
struct dlm_lockstatus *lksb;
|
||||
int lksbflags;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
lksb = lock->lksb;
|
||||
BUG_ON(lock->ml.node == dlm->node_num);
|
||||
|
||||
lksbflags = lksb->flags;
|
||||
dlm_update_lvb(dlm, res, lock);
|
||||
|
||||
/* lock request came from another node
|
||||
* go do the ast over there */
|
||||
ret = dlm_send_proxy_ast(dlm, res, lock, lksbflags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int blocked_type)
|
||||
{
|
||||
dlm_bastlockfunc_t *fn = lock->bast;
|
||||
|
||||
mlog_entry_void();
|
||||
BUG_ON(lock->ml.node != dlm->node_num);
|
||||
|
||||
(*fn)(lock->astdata, blocked_type);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
|
||||
{
|
||||
int ret;
|
||||
unsigned int locklen;
|
||||
struct dlm_ctxt *dlm = data;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
struct dlm_lock *lock = NULL;
|
||||
struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf;
|
||||
char *name;
|
||||
struct list_head *iter, *head=NULL;
|
||||
u64 cookie;
|
||||
u32 flags;
|
||||
|
||||
if (!dlm_grab(dlm)) {
|
||||
dlm_error(DLM_REJECTED);
|
||||
return DLM_REJECTED;
|
||||
}
|
||||
|
||||
mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
|
||||
"Domain %s not fully joined!\n", dlm->name);
|
||||
|
||||
name = past->name;
|
||||
locklen = past->namelen;
|
||||
cookie = be64_to_cpu(past->cookie);
|
||||
flags = be32_to_cpu(past->flags);
|
||||
|
||||
if (locklen > DLM_LOCKID_NAME_MAX) {
|
||||
ret = DLM_IVBUFLEN;
|
||||
mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n");
|
||||
goto leave;
|
||||
}
|
||||
|
||||
if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
|
||||
(LKM_PUT_LVB|LKM_GET_LVB)) {
|
||||
mlog(ML_ERROR, "both PUT and GET lvb specified\n");
|
||||
ret = DLM_BADARGS;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
|
||||
(flags & LKM_GET_LVB ? "get lvb" : "none"));
|
||||
|
||||
mlog(0, "type=%d, blocked_type=%d\n", past->type, past->blocked_type);
|
||||
|
||||
if (past->type != DLM_AST &&
|
||||
past->type != DLM_BAST) {
|
||||
mlog(ML_ERROR, "Unknown ast type! %d, cookie=%"MLFu64", "
|
||||
"name=%.*s\n", past->type, cookie, locklen, name);
|
||||
ret = DLM_IVLOCKID;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
res = dlm_lookup_lockres(dlm, name, locklen);
|
||||
if (!res) {
|
||||
mlog(ML_ERROR, "got %sast for unknown lockres! "
|
||||
"cookie=%"MLFu64", name=%.*s, namelen=%u\n",
|
||||
past->type == DLM_AST ? "" : "b",
|
||||
cookie, locklen, name, locklen);
|
||||
ret = DLM_IVLOCKID;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* cannot get a proxy ast message if this node owns it */
|
||||
BUG_ON(res->owner == dlm->node_num);
|
||||
|
||||
mlog(0, "lockres %.*s\n", res->lockname.len, res->lockname.name);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
if (res->state & DLM_LOCK_RES_RECOVERING) {
|
||||
mlog(0, "responding with DLM_RECOVERING!\n");
|
||||
ret = DLM_RECOVERING;
|
||||
goto unlock_out;
|
||||
}
|
||||
if (res->state & DLM_LOCK_RES_MIGRATING) {
|
||||
mlog(0, "responding with DLM_MIGRATING!\n");
|
||||
ret = DLM_MIGRATING;
|
||||
goto unlock_out;
|
||||
}
|
||||
/* try convert queue for both ast/bast */
|
||||
head = &res->converting;
|
||||
lock = NULL;
|
||||
list_for_each(iter, head) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
if (be64_to_cpu(lock->ml.cookie) == cookie)
|
||||
goto do_ast;
|
||||
}
|
||||
|
||||
/* if not on convert, try blocked for ast, granted for bast */
|
||||
if (past->type == DLM_AST)
|
||||
head = &res->blocked;
|
||||
else
|
||||
head = &res->granted;
|
||||
|
||||
list_for_each(iter, head) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
if (be64_to_cpu(lock->ml.cookie) == cookie)
|
||||
goto do_ast;
|
||||
}
|
||||
|
||||
mlog(ML_ERROR, "got %sast for unknown lock! cookie=%"MLFu64", "
|
||||
"name=%.*s, namelen=%u\n",
|
||||
past->type == DLM_AST ? "" : "b", cookie, locklen, name, locklen);
|
||||
|
||||
ret = DLM_NORMAL;
|
||||
unlock_out:
|
||||
spin_unlock(&res->spinlock);
|
||||
goto leave;
|
||||
|
||||
do_ast:
|
||||
ret = DLM_NORMAL;
|
||||
if (past->type == DLM_AST) {
|
||||
/* do not alter lock refcount. switching lists. */
|
||||
list_del_init(&lock->list);
|
||||
list_add_tail(&lock->list, &res->granted);
|
||||
mlog(0, "ast: adding to granted list... type=%d, "
|
||||
"convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
|
||||
if (lock->ml.convert_type != LKM_IVMODE) {
|
||||
lock->ml.type = lock->ml.convert_type;
|
||||
lock->ml.convert_type = LKM_IVMODE;
|
||||
} else {
|
||||
// should already be there....
|
||||
}
|
||||
|
||||
lock->lksb->status = DLM_NORMAL;
|
||||
|
||||
/* if we requested the lvb, fetch it into our lksb now */
|
||||
if (flags & LKM_GET_LVB) {
|
||||
BUG_ON(!(lock->lksb->flags & DLM_LKSB_GET_LVB));
|
||||
memcpy(lock->lksb->lvb, past->lvb, DLM_LVB_LEN);
|
||||
}
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
if (past->type == DLM_AST)
|
||||
dlm_do_local_ast(dlm, res, lock);
|
||||
else
|
||||
dlm_do_local_bast(dlm, res, lock, past->blocked_type);
|
||||
|
||||
leave:
|
||||
|
||||
if (res)
|
||||
dlm_lockres_put(res);
|
||||
|
||||
dlm_put(dlm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int msg_type,
|
||||
int blocked_type, int flags)
|
||||
{
|
||||
int ret = 0;
|
||||
struct dlm_proxy_ast past;
|
||||
struct kvec vec[2];
|
||||
size_t veclen = 1;
|
||||
int status;
|
||||
|
||||
mlog_entry("res %.*s, to=%u, type=%d, blocked_type=%d\n",
|
||||
res->lockname.len, res->lockname.name, lock->ml.node,
|
||||
msg_type, blocked_type);
|
||||
|
||||
memset(&past, 0, sizeof(struct dlm_proxy_ast));
|
||||
past.node_idx = dlm->node_num;
|
||||
past.type = msg_type;
|
||||
past.blocked_type = blocked_type;
|
||||
past.namelen = res->lockname.len;
|
||||
memcpy(past.name, res->lockname.name, past.namelen);
|
||||
past.cookie = lock->ml.cookie;
|
||||
|
||||
vec[0].iov_len = sizeof(struct dlm_proxy_ast);
|
||||
vec[0].iov_base = &past;
|
||||
if (flags & DLM_LKSB_GET_LVB) {
|
||||
mlog(0, "returning requested LVB data\n");
|
||||
be32_add_cpu(&past.flags, LKM_GET_LVB);
|
||||
vec[1].iov_len = DLM_LVB_LEN;
|
||||
vec[1].iov_base = lock->lksb->lvb;
|
||||
veclen++;
|
||||
}
|
||||
|
||||
ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
|
||||
lock->ml.node, &status);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
else {
|
||||
if (status == DLM_RECOVERING) {
|
||||
mlog(ML_ERROR, "sent AST to node %u, it thinks this "
|
||||
"node is dead!\n", lock->ml.node);
|
||||
BUG();
|
||||
} else if (status == DLM_MIGRATING) {
|
||||
mlog(ML_ERROR, "sent AST to node %u, it returned "
|
||||
"DLM_MIGRATING!\n", lock->ml.node);
|
||||
BUG();
|
||||
} else if (status != DLM_NORMAL) {
|
||||
mlog(ML_ERROR, "AST to node %u returned %d!\n",
|
||||
lock->ml.node, status);
|
||||
/* ignore it */
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
884
fs/ocfs2/dlm/dlmcommon.h
Normal file
884
fs/ocfs2/dlm/dlmcommon.h
Normal file
@ -0,0 +1,884 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmcommon.h
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef DLMCOMMON_H
|
||||
#define DLMCOMMON_H
|
||||
|
||||
#include <linux/kref.h>
|
||||
|
||||
#define DLM_HB_NODE_DOWN_PRI (0xf000000)
|
||||
#define DLM_HB_NODE_UP_PRI (0x8000000)
|
||||
|
||||
#define DLM_LOCKID_NAME_MAX 32
|
||||
|
||||
#define DLM_DOMAIN_NAME_MAX_LEN 255
|
||||
#define DLM_LOCK_RES_OWNER_UNKNOWN O2NM_MAX_NODES
|
||||
#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
|
||||
#define DLM_THREAD_MS 200 // flush at least every 200 ms
|
||||
|
||||
#define DLM_HASH_BITS 7
|
||||
#define DLM_HASH_SIZE (1 << DLM_HASH_BITS)
|
||||
#define DLM_HASH_MASK (DLM_HASH_SIZE - 1)
|
||||
|
||||
enum dlm_ast_type {
|
||||
DLM_AST = 0,
|
||||
DLM_BAST,
|
||||
DLM_ASTUNLOCK
|
||||
};
|
||||
|
||||
|
||||
#define LKM_VALID_FLAGS (LKM_VALBLK | LKM_CONVERT | LKM_UNLOCK | \
|
||||
LKM_CANCEL | LKM_INVVALBLK | LKM_FORCE | \
|
||||
LKM_RECOVERY | LKM_LOCAL | LKM_NOQUEUE)
|
||||
|
||||
#define DLM_RECOVERY_LOCK_NAME "$RECOVERY"
|
||||
#define DLM_RECOVERY_LOCK_NAME_LEN 9
|
||||
|
||||
static inline int dlm_is_recovery_lock(const char *lock_name, int name_len)
|
||||
{
|
||||
if (name_len == DLM_RECOVERY_LOCK_NAME_LEN &&
|
||||
memcmp(lock_name, DLM_RECOVERY_LOCK_NAME, name_len)==0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define DLM_RECO_STATE_ACTIVE 0x0001
|
||||
|
||||
struct dlm_recovery_ctxt
|
||||
{
|
||||
struct list_head resources;
|
||||
struct list_head received;
|
||||
struct list_head node_data;
|
||||
u8 new_master;
|
||||
u8 dead_node;
|
||||
u16 state;
|
||||
unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
wait_queue_head_t event;
|
||||
};
|
||||
|
||||
enum dlm_ctxt_state {
|
||||
DLM_CTXT_NEW = 0,
|
||||
DLM_CTXT_JOINED,
|
||||
DLM_CTXT_IN_SHUTDOWN,
|
||||
DLM_CTXT_LEAVING,
|
||||
};
|
||||
|
||||
struct dlm_ctxt
|
||||
{
|
||||
struct list_head list;
|
||||
struct list_head *resources;
|
||||
struct list_head dirty_list;
|
||||
struct list_head purge_list;
|
||||
struct list_head pending_asts;
|
||||
struct list_head pending_basts;
|
||||
unsigned int purge_count;
|
||||
spinlock_t spinlock;
|
||||
spinlock_t ast_lock;
|
||||
char *name;
|
||||
u8 node_num;
|
||||
u32 key;
|
||||
u8 joining_node;
|
||||
wait_queue_head_t dlm_join_events;
|
||||
unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
struct dlm_recovery_ctxt reco;
|
||||
spinlock_t master_lock;
|
||||
struct list_head master_list;
|
||||
struct list_head mle_hb_events;
|
||||
|
||||
/* these give a really vague idea of the system load */
|
||||
atomic_t local_resources;
|
||||
atomic_t remote_resources;
|
||||
atomic_t unknown_resources;
|
||||
|
||||
/* NOTE: Next three are protected by dlm_domain_lock */
|
||||
struct kref dlm_refs;
|
||||
enum dlm_ctxt_state dlm_state;
|
||||
unsigned int num_joins;
|
||||
|
||||
struct o2hb_callback_func dlm_hb_up;
|
||||
struct o2hb_callback_func dlm_hb_down;
|
||||
struct task_struct *dlm_thread_task;
|
||||
struct task_struct *dlm_reco_thread_task;
|
||||
wait_queue_head_t dlm_thread_wq;
|
||||
wait_queue_head_t dlm_reco_thread_wq;
|
||||
wait_queue_head_t ast_wq;
|
||||
wait_queue_head_t migration_wq;
|
||||
|
||||
struct work_struct dispatched_work;
|
||||
struct list_head work_list;
|
||||
spinlock_t work_lock;
|
||||
struct list_head dlm_domain_handlers;
|
||||
struct list_head dlm_eviction_callbacks;
|
||||
};
|
||||
|
||||
/* these keventd work queue items are for less-frequently
|
||||
* called functions that cannot be directly called from the
|
||||
* net message handlers for some reason, usually because
|
||||
* they need to send net messages of their own. */
|
||||
void dlm_dispatch_work(void *data);
|
||||
|
||||
struct dlm_lock_resource;
|
||||
struct dlm_work_item;
|
||||
|
||||
typedef void (dlm_workfunc_t)(struct dlm_work_item *, void *);
|
||||
|
||||
struct dlm_request_all_locks_priv
|
||||
{
|
||||
u8 reco_master;
|
||||
u8 dead_node;
|
||||
};
|
||||
|
||||
struct dlm_mig_lockres_priv
|
||||
{
|
||||
struct dlm_lock_resource *lockres;
|
||||
u8 real_master;
|
||||
};
|
||||
|
||||
struct dlm_assert_master_priv
|
||||
{
|
||||
struct dlm_lock_resource *lockres;
|
||||
u8 request_from;
|
||||
u32 flags;
|
||||
unsigned ignore_higher:1;
|
||||
};
|
||||
|
||||
|
||||
struct dlm_work_item
|
||||
{
|
||||
struct list_head list;
|
||||
dlm_workfunc_t *func;
|
||||
struct dlm_ctxt *dlm;
|
||||
void *data;
|
||||
union {
|
||||
struct dlm_request_all_locks_priv ral;
|
||||
struct dlm_mig_lockres_priv ml;
|
||||
struct dlm_assert_master_priv am;
|
||||
} u;
|
||||
};
|
||||
|
||||
static inline void dlm_init_work_item(struct dlm_ctxt *dlm,
|
||||
struct dlm_work_item *i,
|
||||
dlm_workfunc_t *f, void *data)
|
||||
{
|
||||
memset(i, 0, sizeof(*i));
|
||||
i->func = f;
|
||||
INIT_LIST_HEAD(&i->list);
|
||||
i->data = data;
|
||||
i->dlm = dlm; /* must have already done a dlm_grab on this! */
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
|
||||
u8 node)
|
||||
{
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
|
||||
dlm->joining_node = node;
|
||||
wake_up(&dlm->dlm_join_events);
|
||||
}
|
||||
|
||||
#define DLM_LOCK_RES_UNINITED 0x00000001
|
||||
#define DLM_LOCK_RES_RECOVERING 0x00000002
|
||||
#define DLM_LOCK_RES_READY 0x00000004
|
||||
#define DLM_LOCK_RES_DIRTY 0x00000008
|
||||
#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
|
||||
#define DLM_LOCK_RES_MIGRATING 0x00000020
|
||||
|
||||
#define DLM_PURGE_INTERVAL_MS (8 * 1000)
|
||||
|
||||
struct dlm_lock_resource
|
||||
{
|
||||
/* WARNING: Please see the comment in dlm_init_lockres before
|
||||
* adding fields here. */
|
||||
struct list_head list;
|
||||
struct kref refs;
|
||||
|
||||
/* please keep these next 3 in this order
|
||||
* some funcs want to iterate over all lists */
|
||||
struct list_head granted;
|
||||
struct list_head converting;
|
||||
struct list_head blocked;
|
||||
|
||||
struct list_head dirty;
|
||||
struct list_head recovering; // dlm_recovery_ctxt.resources list
|
||||
|
||||
/* unused lock resources have their last_used stamped and are
|
||||
* put on a list for the dlm thread to run. */
|
||||
struct list_head purge;
|
||||
unsigned long last_used;
|
||||
|
||||
unsigned migration_pending:1;
|
||||
atomic_t asts_reserved;
|
||||
spinlock_t spinlock;
|
||||
wait_queue_head_t wq;
|
||||
u8 owner; //node which owns the lock resource, or unknown
|
||||
u16 state;
|
||||
struct qstr lockname;
|
||||
char lvb[DLM_LVB_LEN];
|
||||
};
|
||||
|
||||
struct dlm_migratable_lock
|
||||
{
|
||||
__be64 cookie;
|
||||
|
||||
/* these 3 are just padding for the in-memory structure, but
|
||||
* list and flags are actually used when sent over the wire */
|
||||
__be16 pad1;
|
||||
u8 list; // 0=granted, 1=converting, 2=blocked
|
||||
u8 flags;
|
||||
|
||||
s8 type;
|
||||
s8 convert_type;
|
||||
s8 highest_blocked;
|
||||
u8 node;
|
||||
}; // 16 bytes
|
||||
|
||||
struct dlm_lock
|
||||
{
|
||||
struct dlm_migratable_lock ml;
|
||||
|
||||
struct list_head list;
|
||||
struct list_head ast_list;
|
||||
struct list_head bast_list;
|
||||
struct dlm_lock_resource *lockres;
|
||||
spinlock_t spinlock;
|
||||
struct kref lock_refs;
|
||||
|
||||
// ast and bast must be callable while holding a spinlock!
|
||||
dlm_astlockfunc_t *ast;
|
||||
dlm_bastlockfunc_t *bast;
|
||||
void *astdata;
|
||||
struct dlm_lockstatus *lksb;
|
||||
unsigned ast_pending:1,
|
||||
bast_pending:1,
|
||||
convert_pending:1,
|
||||
lock_pending:1,
|
||||
cancel_pending:1,
|
||||
unlock_pending:1,
|
||||
lksb_kernel_allocated:1;
|
||||
};
|
||||
|
||||
|
||||
#define DLM_LKSB_UNUSED1 0x01
|
||||
#define DLM_LKSB_PUT_LVB 0x02
|
||||
#define DLM_LKSB_GET_LVB 0x04
|
||||
#define DLM_LKSB_UNUSED2 0x08
|
||||
#define DLM_LKSB_UNUSED3 0x10
|
||||
#define DLM_LKSB_UNUSED4 0x20
|
||||
#define DLM_LKSB_UNUSED5 0x40
|
||||
#define DLM_LKSB_UNUSED6 0x80
|
||||
|
||||
|
||||
enum dlm_lockres_list {
|
||||
DLM_GRANTED_LIST = 0,
|
||||
DLM_CONVERTING_LIST,
|
||||
DLM_BLOCKED_LIST
|
||||
};
|
||||
|
||||
static inline struct list_head *
|
||||
dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx)
|
||||
{
|
||||
struct list_head *ret = NULL;
|
||||
if (idx == DLM_GRANTED_LIST)
|
||||
ret = &res->granted;
|
||||
else if (idx == DLM_CONVERTING_LIST)
|
||||
ret = &res->converting;
|
||||
else if (idx == DLM_BLOCKED_LIST)
|
||||
ret = &res->blocked;
|
||||
else
|
||||
BUG();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct dlm_node_iter
|
||||
{
|
||||
unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
int curnode;
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
DLM_MASTER_REQUEST_MSG = 500,
|
||||
DLM_UNUSED_MSG1, /* 501 */
|
||||
DLM_ASSERT_MASTER_MSG, /* 502 */
|
||||
DLM_CREATE_LOCK_MSG, /* 503 */
|
||||
DLM_CONVERT_LOCK_MSG, /* 504 */
|
||||
DLM_PROXY_AST_MSG, /* 505 */
|
||||
DLM_UNLOCK_LOCK_MSG, /* 506 */
|
||||
DLM_UNUSED_MSG2, /* 507 */
|
||||
DLM_MIGRATE_REQUEST_MSG, /* 508 */
|
||||
DLM_MIG_LOCKRES_MSG, /* 509 */
|
||||
DLM_QUERY_JOIN_MSG, /* 510 */
|
||||
DLM_ASSERT_JOINED_MSG, /* 511 */
|
||||
DLM_CANCEL_JOIN_MSG, /* 512 */
|
||||
DLM_EXIT_DOMAIN_MSG, /* 513 */
|
||||
DLM_MASTER_REQUERY_MSG, /* 514 */
|
||||
DLM_LOCK_REQUEST_MSG, /* 515 */
|
||||
DLM_RECO_DATA_DONE_MSG, /* 516 */
|
||||
DLM_BEGIN_RECO_MSG, /* 517 */
|
||||
DLM_FINALIZE_RECO_MSG /* 518 */
|
||||
};
|
||||
|
||||
struct dlm_reco_node_data
|
||||
{
|
||||
int state;
|
||||
u8 node_num;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
enum {
|
||||
DLM_RECO_NODE_DATA_DEAD = -1,
|
||||
DLM_RECO_NODE_DATA_INIT = 0,
|
||||
DLM_RECO_NODE_DATA_REQUESTING,
|
||||
DLM_RECO_NODE_DATA_REQUESTED,
|
||||
DLM_RECO_NODE_DATA_RECEIVING,
|
||||
DLM_RECO_NODE_DATA_DONE,
|
||||
DLM_RECO_NODE_DATA_FINALIZE_SENT,
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
DLM_MASTER_RESP_NO = 0,
|
||||
DLM_MASTER_RESP_YES,
|
||||
DLM_MASTER_RESP_MAYBE,
|
||||
DLM_MASTER_RESP_ERROR
|
||||
};
|
||||
|
||||
|
||||
struct dlm_master_request
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 namelen;
|
||||
__be16 pad1;
|
||||
__be32 flags;
|
||||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
#define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001
|
||||
#define DLM_ASSERT_MASTER_REQUERY 0x00000002
|
||||
#define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004
|
||||
struct dlm_assert_master
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 namelen;
|
||||
__be16 pad1;
|
||||
__be32 flags;
|
||||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
struct dlm_migrate_request
|
||||
{
|
||||
u8 master;
|
||||
u8 new_master;
|
||||
u8 namelen;
|
||||
u8 pad1;
|
||||
__be32 pad2;
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
struct dlm_master_requery
|
||||
{
|
||||
u8 pad1;
|
||||
u8 pad2;
|
||||
u8 node_idx;
|
||||
u8 namelen;
|
||||
__be32 pad3;
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
#define DLM_MRES_RECOVERY 0x01
|
||||
#define DLM_MRES_MIGRATION 0x02
|
||||
#define DLM_MRES_ALL_DONE 0x04
|
||||
|
||||
/*
|
||||
* We would like to get one whole lockres into a single network
|
||||
* message whenever possible. Generally speaking, there will be
|
||||
* at most one dlm_lock on a lockres for each node in the cluster,
|
||||
* plus (infrequently) any additional locks coming in from userdlm.
|
||||
*
|
||||
* struct _dlm_lockres_page
|
||||
* {
|
||||
* dlm_migratable_lockres mres;
|
||||
* dlm_migratable_lock ml[DLM_MAX_MIGRATABLE_LOCKS];
|
||||
* u8 pad[DLM_MIG_LOCKRES_RESERVED];
|
||||
* };
|
||||
*
|
||||
* from ../cluster/tcp.h
|
||||
* NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg))
|
||||
* (roughly 4080 bytes)
|
||||
* and sizeof(dlm_migratable_lockres) = 112 bytes
|
||||
* and sizeof(dlm_migratable_lock) = 16 bytes
|
||||
*
|
||||
* Choosing DLM_MAX_MIGRATABLE_LOCKS=240 and
|
||||
* DLM_MIG_LOCKRES_RESERVED=128 means we have this:
|
||||
*
|
||||
* (DLM_MAX_MIGRATABLE_LOCKS * sizeof(dlm_migratable_lock)) +
|
||||
* sizeof(dlm_migratable_lockres) + DLM_MIG_LOCKRES_RESERVED =
|
||||
* NET_MAX_PAYLOAD_BYTES
|
||||
* (240 * 16) + 112 + 128 = 4080
|
||||
*
|
||||
* So a lockres would need more than 240 locks before it would
|
||||
* use more than one network packet to recover. Not too bad.
|
||||
*/
|
||||
#define DLM_MAX_MIGRATABLE_LOCKS 240
|
||||
|
||||
struct dlm_migratable_lockres
|
||||
{
|
||||
u8 master;
|
||||
u8 lockname_len;
|
||||
u8 num_locks; // locks sent in this structure
|
||||
u8 flags;
|
||||
__be32 total_locks; // locks to be sent for this migration cookie
|
||||
__be64 mig_cookie; // cookie for this lockres migration
|
||||
// or zero if not needed
|
||||
// 16 bytes
|
||||
u8 lockname[DLM_LOCKID_NAME_MAX];
|
||||
// 48 bytes
|
||||
u8 lvb[DLM_LVB_LEN];
|
||||
// 112 bytes
|
||||
struct dlm_migratable_lock ml[0]; // 16 bytes each, begins at byte 112
|
||||
};
|
||||
#define DLM_MIG_LOCKRES_MAX_LEN \
|
||||
(sizeof(struct dlm_migratable_lockres) + \
|
||||
(sizeof(struct dlm_migratable_lock) * \
|
||||
DLM_MAX_MIGRATABLE_LOCKS) )
|
||||
|
||||
/* from above, 128 bytes
|
||||
* for some undetermined future use */
|
||||
#define DLM_MIG_LOCKRES_RESERVED (NET_MAX_PAYLOAD_BYTES - \
|
||||
DLM_MIG_LOCKRES_MAX_LEN)
|
||||
|
||||
struct dlm_create_lock
|
||||
{
|
||||
__be64 cookie;
|
||||
|
||||
__be32 flags;
|
||||
u8 pad1;
|
||||
u8 node_idx;
|
||||
s8 requested_type;
|
||||
u8 namelen;
|
||||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
struct dlm_convert_lock
|
||||
{
|
||||
__be64 cookie;
|
||||
|
||||
__be32 flags;
|
||||
u8 pad1;
|
||||
u8 node_idx;
|
||||
s8 requested_type;
|
||||
u8 namelen;
|
||||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
|
||||
s8 lvb[0];
|
||||
};
|
||||
#define DLM_CONVERT_LOCK_MAX_LEN (sizeof(struct dlm_convert_lock)+DLM_LVB_LEN)
|
||||
|
||||
struct dlm_unlock_lock
|
||||
{
|
||||
__be64 cookie;
|
||||
|
||||
__be32 flags;
|
||||
__be16 pad1;
|
||||
u8 node_idx;
|
||||
u8 namelen;
|
||||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
|
||||
s8 lvb[0];
|
||||
};
|
||||
#define DLM_UNLOCK_LOCK_MAX_LEN (sizeof(struct dlm_unlock_lock)+DLM_LVB_LEN)
|
||||
|
||||
struct dlm_proxy_ast
|
||||
{
|
||||
__be64 cookie;
|
||||
|
||||
__be32 flags;
|
||||
u8 node_idx;
|
||||
u8 type;
|
||||
u8 blocked_type;
|
||||
u8 namelen;
|
||||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
|
||||
s8 lvb[0];
|
||||
};
|
||||
#define DLM_PROXY_AST_MAX_LEN (sizeof(struct dlm_proxy_ast)+DLM_LVB_LEN)
|
||||
|
||||
#define DLM_MOD_KEY (0x666c6172)
|
||||
enum dlm_query_join_response {
|
||||
JOIN_DISALLOW = 0,
|
||||
JOIN_OK,
|
||||
JOIN_OK_NO_MAP,
|
||||
};
|
||||
|
||||
struct dlm_lock_request
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 dead_node;
|
||||
__be16 pad1;
|
||||
__be32 pad2;
|
||||
};
|
||||
|
||||
struct dlm_reco_data_done
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 dead_node;
|
||||
__be16 pad1;
|
||||
__be32 pad2;
|
||||
|
||||
/* unused for now */
|
||||
/* eventually we can use this to attempt
|
||||
* lvb recovery based on each node's info */
|
||||
u8 reco_lvb[DLM_LVB_LEN];
|
||||
};
|
||||
|
||||
struct dlm_begin_reco
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 dead_node;
|
||||
__be16 pad1;
|
||||
__be32 pad2;
|
||||
};
|
||||
|
||||
|
||||
struct dlm_query_join_request
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 pad1[2];
|
||||
u8 name_len;
|
||||
u8 domain[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
struct dlm_assert_joined
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 pad1[2];
|
||||
u8 name_len;
|
||||
u8 domain[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
struct dlm_cancel_join
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 pad1[2];
|
||||
u8 name_len;
|
||||
u8 domain[O2NM_MAX_NAME_LEN];
|
||||
};
|
||||
|
||||
struct dlm_exit_domain
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 pad1[3];
|
||||
};
|
||||
|
||||
struct dlm_finalize_reco
|
||||
{
|
||||
u8 node_idx;
|
||||
u8 dead_node;
|
||||
__be16 pad1;
|
||||
__be32 pad2;
|
||||
};
|
||||
|
||||
static inline enum dlm_status
|
||||
__dlm_lockres_state_to_status(struct dlm_lock_resource *res)
|
||||
{
|
||||
enum dlm_status status = DLM_NORMAL;
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
if (res->state & DLM_LOCK_RES_RECOVERING)
|
||||
status = DLM_RECOVERING;
|
||||
else if (res->state & DLM_LOCK_RES_MIGRATING)
|
||||
status = DLM_MIGRATING;
|
||||
else if (res->state & DLM_LOCK_RES_IN_PROGRESS)
|
||||
status = DLM_FORWARD;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
|
||||
struct dlm_lockstatus *lksb);
|
||||
void dlm_lock_get(struct dlm_lock *lock);
|
||||
void dlm_lock_put(struct dlm_lock *lock);
|
||||
|
||||
void dlm_lock_attach_lockres(struct dlm_lock *lock,
|
||||
struct dlm_lock_resource *res);
|
||||
|
||||
int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
|
||||
void dlm_revert_pending_convert(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
void dlm_revert_pending_lock(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
|
||||
int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
|
||||
int dlm_launch_thread(struct dlm_ctxt *dlm);
|
||||
void dlm_complete_thread(struct dlm_ctxt *dlm);
|
||||
int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
|
||||
void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
|
||||
void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
|
||||
|
||||
void dlm_put(struct dlm_ctxt *dlm);
|
||||
struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
|
||||
int dlm_domain_fully_joined(struct dlm_ctxt *dlm);
|
||||
|
||||
void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
void dlm_purge_lockres(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *lockres);
|
||||
void dlm_lockres_get(struct dlm_lock_resource *res);
|
||||
void dlm_lockres_put(struct dlm_lock_resource *res);
|
||||
void __dlm_unhash_lockres(struct dlm_lock_resource *res);
|
||||
void __dlm_insert_lockres(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
|
||||
const char *name,
|
||||
unsigned int len);
|
||||
struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
|
||||
const char *name,
|
||||
unsigned int len);
|
||||
|
||||
int dlm_is_host_down(int errno);
|
||||
void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
u8 owner);
|
||||
struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
|
||||
const char *lockid,
|
||||
int flags);
|
||||
struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
|
||||
const char *name,
|
||||
unsigned int namelen);
|
||||
|
||||
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
void dlm_do_local_ast(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
int dlm_do_remote_ast(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock);
|
||||
void dlm_do_local_bast(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
int blocked_type);
|
||||
int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
int msg_type,
|
||||
int blocked_type, int flags);
|
||||
static inline int dlm_send_proxy_bast(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
int blocked_type)
|
||||
{
|
||||
return dlm_send_proxy_ast_msg(dlm, res, lock, DLM_BAST,
|
||||
blocked_type, 0);
|
||||
}
|
||||
|
||||
static inline int dlm_send_proxy_ast(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
int flags)
|
||||
{
|
||||
return dlm_send_proxy_ast_msg(dlm, res, lock, DLM_AST,
|
||||
0, flags);
|
||||
}
|
||||
|
||||
void dlm_print_one_lock_resource(struct dlm_lock_resource *res);
|
||||
void __dlm_print_one_lock_resource(struct dlm_lock_resource *res);
|
||||
|
||||
u8 dlm_nm_this_node(struct dlm_ctxt *dlm);
|
||||
void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
|
||||
void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
|
||||
|
||||
|
||||
int dlm_nm_init(struct dlm_ctxt *dlm);
|
||||
int dlm_heartbeat_init(struct dlm_ctxt *dlm);
|
||||
void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data);
|
||||
void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data);
|
||||
|
||||
int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
|
||||
int dlm_migrate_lockres(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
u8 target);
|
||||
int dlm_finish_migration(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
u8 old_master);
|
||||
void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res);
|
||||
|
||||
int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
|
||||
|
||||
int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
int ignore_higher,
|
||||
u8 request_from,
|
||||
u32 flags);
|
||||
|
||||
|
||||
int dlm_send_one_lockres(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_migratable_lockres *mres,
|
||||
u8 send_to,
|
||||
u8 flags);
|
||||
void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
|
||||
/* will exit holding res->spinlock, but may drop in function */
|
||||
void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags);
|
||||
void __dlm_wait_on_lockres_flags_set(struct dlm_lock_resource *res, int flags);
|
||||
|
||||
/* will exit holding res->spinlock, but may drop in function */
|
||||
static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
|
||||
{
|
||||
__dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS|
|
||||
DLM_LOCK_RES_RECOVERING|
|
||||
DLM_LOCK_RES_MIGRATING));
|
||||
}
|
||||
|
||||
|
||||
int dlm_init_mle_cache(void);
|
||||
void dlm_destroy_mle_cache(void);
|
||||
void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up);
|
||||
void dlm_clean_master_list(struct dlm_ctxt *dlm,
|
||||
u8 dead_node);
|
||||
int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
|
||||
|
||||
static inline const char * dlm_lock_mode_name(int mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case LKM_EXMODE:
|
||||
return "EX";
|
||||
case LKM_PRMODE:
|
||||
return "PR";
|
||||
case LKM_NLMODE:
|
||||
return "NL";
|
||||
}
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
|
||||
static inline int dlm_lock_compatible(int existing, int request)
|
||||
{
|
||||
/* NO_LOCK compatible with all */
|
||||
if (request == LKM_NLMODE ||
|
||||
existing == LKM_NLMODE)
|
||||
return 1;
|
||||
|
||||
/* EX incompatible with all non-NO_LOCK */
|
||||
if (request == LKM_EXMODE)
|
||||
return 0;
|
||||
|
||||
/* request must be PR, which is compatible with PR */
|
||||
if (existing == LKM_PRMODE)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int dlm_lock_on_list(struct list_head *head,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *tmplock;
|
||||
|
||||
list_for_each(iter, head) {
|
||||
tmplock = list_entry(iter, struct dlm_lock, list);
|
||||
if (tmplock == lock)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static inline enum dlm_status dlm_err_to_dlm_status(int err)
|
||||
{
|
||||
enum dlm_status ret;
|
||||
if (err == -ENOMEM)
|
||||
ret = DLM_SYSERR;
|
||||
else if (err == -ETIMEDOUT || o2net_link_down(err, NULL))
|
||||
ret = DLM_NOLOCKMGR;
|
||||
else if (err == -EINVAL)
|
||||
ret = DLM_BADPARAM;
|
||||
else if (err == -ENAMETOOLONG)
|
||||
ret = DLM_IVBUFLEN;
|
||||
else
|
||||
ret = DLM_BADARGS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static inline void dlm_node_iter_init(unsigned long *map,
|
||||
struct dlm_node_iter *iter)
|
||||
{
|
||||
memcpy(iter->node_map, map, sizeof(iter->node_map));
|
||||
iter->curnode = -1;
|
||||
}
|
||||
|
||||
static inline int dlm_node_iter_next(struct dlm_node_iter *iter)
|
||||
{
|
||||
int bit;
|
||||
bit = find_next_bit(iter->node_map, O2NM_MAX_NODES, iter->curnode+1);
|
||||
if (bit >= O2NM_MAX_NODES) {
|
||||
iter->curnode = O2NM_MAX_NODES;
|
||||
return -ENOENT;
|
||||
}
|
||||
iter->curnode = bit;
|
||||
return bit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif /* DLMCOMMON_H */
|
530
fs/ocfs2/dlm/dlmconvert.c
Normal file
530
fs/ocfs2/dlm/dlmconvert.c
Normal file
@ -0,0 +1,530 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmconvert.c
|
||||
*
|
||||
* underlying calls for lock conversion
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/tcp.h"
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
|
||||
#include "dlmconvert.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
/* NOTE: __dlmconvert_master is the only function in here that
|
||||
* needs a spinlock held on entry (res->spinlock) and it is the
|
||||
* only one that holds a lock on exit (res->spinlock).
|
||||
* All other functions in here need no locks and drop all of
|
||||
* the locks that they acquire. */
|
||||
static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags,
|
||||
int type, int *call_ast,
|
||||
int *kick_thread);
|
||||
static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags, int type);
|
||||
|
||||
/*
|
||||
* this is only called directly by dlmlock(), and only when the
|
||||
* local node is the owner of the lockres
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drops res->spinlock
|
||||
* held on exit: none
|
||||
* returns: see __dlmconvert_master
|
||||
*/
|
||||
enum dlm_status dlmconvert_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags, int type)
|
||||
{
|
||||
int call_ast = 0, kick_thread = 0;
|
||||
enum dlm_status status;
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
/* we are not in a network handler, this is fine */
|
||||
__dlm_wait_on_lockres(res);
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
res->state |= DLM_LOCK_RES_IN_PROGRESS;
|
||||
|
||||
status = __dlmconvert_master(dlm, res, lock, flags, type,
|
||||
&call_ast, &kick_thread);
|
||||
|
||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||
spin_unlock(&res->spinlock);
|
||||
wake_up(&res->wq);
|
||||
if (status != DLM_NORMAL && status != DLM_NOTQUEUED)
|
||||
dlm_error(status);
|
||||
|
||||
/* either queue the ast or release it */
|
||||
if (call_ast)
|
||||
dlm_queue_ast(dlm, lock);
|
||||
else
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
|
||||
if (kick_thread)
|
||||
dlm_kick_thread(dlm, res);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* performs lock conversion at the lockres master site
|
||||
* locking:
|
||||
* caller needs: res->spinlock
|
||||
* taken: takes and drops lock->spinlock
|
||||
* held on exit: res->spinlock
|
||||
* returns: DLM_NORMAL, DLM_NOTQUEUED, DLM_DENIED
|
||||
* call_ast: whether ast should be called for this lock
|
||||
* kick_thread: whether dlm_kick_thread should be called
|
||||
*/
|
||||
static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags,
|
||||
int type, int *call_ast,
|
||||
int *kick_thread)
|
||||
{
|
||||
enum dlm_status status = DLM_NORMAL;
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *tmplock=NULL;
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
mlog_entry("type=%d, convert_type=%d, new convert_type=%d\n",
|
||||
lock->ml.type, lock->ml.convert_type, type);
|
||||
|
||||
spin_lock(&lock->spinlock);
|
||||
|
||||
/* already converting? */
|
||||
if (lock->ml.convert_type != LKM_IVMODE) {
|
||||
mlog(ML_ERROR, "attempted to convert a lock with a lock "
|
||||
"conversion pending\n");
|
||||
status = DLM_DENIED;
|
||||
goto unlock_exit;
|
||||
}
|
||||
|
||||
/* must be on grant queue to convert */
|
||||
if (!dlm_lock_on_list(&res->granted, lock)) {
|
||||
mlog(ML_ERROR, "attempted to convert a lock not on grant "
|
||||
"queue\n");
|
||||
status = DLM_DENIED;
|
||||
goto unlock_exit;
|
||||
}
|
||||
|
||||
if (flags & LKM_VALBLK) {
|
||||
switch (lock->ml.type) {
|
||||
case LKM_EXMODE:
|
||||
/* EX + LKM_VALBLK + convert == set lvb */
|
||||
mlog(0, "will set lvb: converting %s->%s\n",
|
||||
dlm_lock_mode_name(lock->ml.type),
|
||||
dlm_lock_mode_name(type));
|
||||
lock->lksb->flags |= DLM_LKSB_PUT_LVB;
|
||||
break;
|
||||
case LKM_PRMODE:
|
||||
case LKM_NLMODE:
|
||||
/* refetch if new level is not NL */
|
||||
if (type > LKM_NLMODE) {
|
||||
mlog(0, "will fetch new value into "
|
||||
"lvb: converting %s->%s\n",
|
||||
dlm_lock_mode_name(lock->ml.type),
|
||||
dlm_lock_mode_name(type));
|
||||
lock->lksb->flags |= DLM_LKSB_GET_LVB;
|
||||
} else {
|
||||
mlog(0, "will NOT fetch new value "
|
||||
"into lvb: converting %s->%s\n",
|
||||
dlm_lock_mode_name(lock->ml.type),
|
||||
dlm_lock_mode_name(type));
|
||||
flags &= ~(LKM_VALBLK);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* in-place downconvert? */
|
||||
if (type <= lock->ml.type)
|
||||
goto grant;
|
||||
|
||||
/* upconvert from here on */
|
||||
status = DLM_NORMAL;
|
||||
list_for_each(iter, &res->granted) {
|
||||
tmplock = list_entry(iter, struct dlm_lock, list);
|
||||
if (tmplock == lock)
|
||||
continue;
|
||||
if (!dlm_lock_compatible(tmplock->ml.type, type))
|
||||
goto switch_queues;
|
||||
}
|
||||
|
||||
list_for_each(iter, &res->converting) {
|
||||
tmplock = list_entry(iter, struct dlm_lock, list);
|
||||
if (!dlm_lock_compatible(tmplock->ml.type, type))
|
||||
goto switch_queues;
|
||||
/* existing conversion requests take precedence */
|
||||
if (!dlm_lock_compatible(tmplock->ml.convert_type, type))
|
||||
goto switch_queues;
|
||||
}
|
||||
|
||||
/* fall thru to grant */
|
||||
|
||||
grant:
|
||||
mlog(0, "res %.*s, granting %s lock\n", res->lockname.len,
|
||||
res->lockname.name, dlm_lock_mode_name(type));
|
||||
/* immediately grant the new lock type */
|
||||
lock->lksb->status = DLM_NORMAL;
|
||||
if (lock->ml.node == dlm->node_num)
|
||||
mlog(0, "doing in-place convert for nonlocal lock\n");
|
||||
lock->ml.type = type;
|
||||
status = DLM_NORMAL;
|
||||
*call_ast = 1;
|
||||
goto unlock_exit;
|
||||
|
||||
switch_queues:
|
||||
if (flags & LKM_NOQUEUE) {
|
||||
mlog(0, "failed to convert NOQUEUE lock %.*s from "
|
||||
"%d to %d...\n", res->lockname.len, res->lockname.name,
|
||||
lock->ml.type, type);
|
||||
status = DLM_NOTQUEUED;
|
||||
goto unlock_exit;
|
||||
}
|
||||
mlog(0, "res %.*s, queueing...\n", res->lockname.len,
|
||||
res->lockname.name);
|
||||
|
||||
lock->ml.convert_type = type;
|
||||
/* do not alter lock refcount. switching lists. */
|
||||
list_del_init(&lock->list);
|
||||
list_add_tail(&lock->list, &res->converting);
|
||||
|
||||
unlock_exit:
|
||||
spin_unlock(&lock->spinlock);
|
||||
if (status == DLM_DENIED) {
|
||||
__dlm_print_one_lock_resource(res);
|
||||
}
|
||||
if (status == DLM_NORMAL)
|
||||
*kick_thread = 1;
|
||||
return status;
|
||||
}
|
||||
|
||||
void dlm_revert_pending_convert(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
/* do not alter lock refcount. switching lists. */
|
||||
list_del_init(&lock->list);
|
||||
list_add_tail(&lock->list, &res->granted);
|
||||
lock->ml.convert_type = LKM_IVMODE;
|
||||
lock->lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
|
||||
}
|
||||
|
||||
/* messages the master site to do lock conversion
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drops res->spinlock, uses DLM_LOCK_RES_IN_PROGRESS
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_RECOVERING, status from remote node
|
||||
*/
|
||||
enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags, int type)
|
||||
{
|
||||
enum dlm_status status;
|
||||
|
||||
mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
|
||||
lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
if (res->state & DLM_LOCK_RES_RECOVERING) {
|
||||
mlog(0, "bailing out early since res is RECOVERING "
|
||||
"on secondary queue\n");
|
||||
/* __dlm_print_one_lock_resource(res); */
|
||||
status = DLM_RECOVERING;
|
||||
goto bail;
|
||||
}
|
||||
/* will exit this call with spinlock held */
|
||||
__dlm_wait_on_lockres(res);
|
||||
|
||||
if (lock->ml.convert_type != LKM_IVMODE) {
|
||||
__dlm_print_one_lock_resource(res);
|
||||
mlog(ML_ERROR, "converting a remote lock that is already "
|
||||
"converting! (cookie=%"MLFu64", conv=%d)\n",
|
||||
lock->ml.cookie, lock->ml.convert_type);
|
||||
status = DLM_DENIED;
|
||||
goto bail;
|
||||
}
|
||||
res->state |= DLM_LOCK_RES_IN_PROGRESS;
|
||||
/* move lock to local convert queue */
|
||||
/* do not alter lock refcount. switching lists. */
|
||||
list_del_init(&lock->list);
|
||||
list_add_tail(&lock->list, &res->converting);
|
||||
lock->convert_pending = 1;
|
||||
lock->ml.convert_type = type;
|
||||
|
||||
if (flags & LKM_VALBLK) {
|
||||
if (lock->ml.type == LKM_EXMODE) {
|
||||
flags |= LKM_PUT_LVB;
|
||||
lock->lksb->flags |= DLM_LKSB_PUT_LVB;
|
||||
} else {
|
||||
if (lock->ml.convert_type == LKM_NLMODE)
|
||||
flags &= ~LKM_VALBLK;
|
||||
else {
|
||||
flags |= LKM_GET_LVB;
|
||||
lock->lksb->flags |= DLM_LKSB_GET_LVB;
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
/* no locks held here.
|
||||
* need to wait for a reply as to whether it got queued or not. */
|
||||
status = dlm_send_remote_convert_request(dlm, res, lock, flags, type);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||
lock->convert_pending = 0;
|
||||
/* if it failed, move it back to granted queue */
|
||||
if (status != DLM_NORMAL) {
|
||||
if (status != DLM_NOTQUEUED)
|
||||
dlm_error(status);
|
||||
dlm_revert_pending_convert(res, lock);
|
||||
}
|
||||
bail:
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
/* TODO: should this be a wake_one? */
|
||||
/* wake up any IN_PROGRESS waiters */
|
||||
wake_up(&res->wq);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/* sends DLM_CONVERT_LOCK_MSG to master site
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: none
|
||||
* held on exit: none
|
||||
* returns: DLM_NOLOCKMGR, status from remote node
|
||||
*/
|
||||
static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags, int type)
|
||||
{
|
||||
struct dlm_convert_lock convert;
|
||||
int tmpret;
|
||||
enum dlm_status ret;
|
||||
int status = 0;
|
||||
struct kvec vec[2];
|
||||
size_t veclen = 1;
|
||||
|
||||
mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
|
||||
|
||||
memset(&convert, 0, sizeof(struct dlm_convert_lock));
|
||||
convert.node_idx = dlm->node_num;
|
||||
convert.requested_type = type;
|
||||
convert.cookie = lock->ml.cookie;
|
||||
convert.namelen = res->lockname.len;
|
||||
convert.flags = cpu_to_be32(flags);
|
||||
memcpy(convert.name, res->lockname.name, convert.namelen);
|
||||
|
||||
vec[0].iov_len = sizeof(struct dlm_convert_lock);
|
||||
vec[0].iov_base = &convert;
|
||||
|
||||
if (flags & LKM_PUT_LVB) {
|
||||
/* extra data to send if we are updating lvb */
|
||||
vec[1].iov_len = DLM_LVB_LEN;
|
||||
vec[1].iov_base = lock->lksb->lvb;
|
||||
veclen++;
|
||||
}
|
||||
|
||||
tmpret = o2net_send_message_vec(DLM_CONVERT_LOCK_MSG, dlm->key,
|
||||
vec, veclen, res->owner, &status);
|
||||
if (tmpret >= 0) {
|
||||
// successfully sent and received
|
||||
ret = status; // this is already a dlm_status
|
||||
if (ret == DLM_RECOVERING) {
|
||||
mlog(0, "node %u returned DLM_RECOVERING from convert "
|
||||
"message!\n", res->owner);
|
||||
} else if (ret == DLM_MIGRATING) {
|
||||
mlog(0, "node %u returned DLM_MIGRATING from convert "
|
||||
"message!\n", res->owner);
|
||||
} else if (ret == DLM_FORWARD) {
|
||||
mlog(0, "node %u returned DLM_FORWARD from convert "
|
||||
"message!\n", res->owner);
|
||||
} else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
|
||||
dlm_error(ret);
|
||||
} else {
|
||||
mlog_errno(tmpret);
|
||||
if (dlm_is_host_down(tmpret)) {
|
||||
ret = DLM_RECOVERING;
|
||||
mlog(0, "node %u died so returning DLM_RECOVERING "
|
||||
"from convert message!\n", res->owner);
|
||||
} else {
|
||||
ret = dlm_err_to_dlm_status(tmpret);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* handler for DLM_CONVERT_LOCK_MSG on master site
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drop res->spinlock
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS,
|
||||
* status from __dlmconvert_master
|
||||
*/
|
||||
int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
|
||||
{
|
||||
struct dlm_ctxt *dlm = data;
|
||||
struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *lock = NULL;
|
||||
struct dlm_lockstatus *lksb;
|
||||
enum dlm_status status = DLM_NORMAL;
|
||||
u32 flags;
|
||||
int call_ast = 0, kick_thread = 0;
|
||||
|
||||
if (!dlm_grab(dlm)) {
|
||||
dlm_error(DLM_REJECTED);
|
||||
return DLM_REJECTED;
|
||||
}
|
||||
|
||||
mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
|
||||
"Domain %s not fully joined!\n", dlm->name);
|
||||
|
||||
if (cnv->namelen > DLM_LOCKID_NAME_MAX) {
|
||||
status = DLM_IVBUFLEN;
|
||||
dlm_error(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
flags = be32_to_cpu(cnv->flags);
|
||||
|
||||
if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
|
||||
(LKM_PUT_LVB|LKM_GET_LVB)) {
|
||||
mlog(ML_ERROR, "both PUT and GET lvb specified\n");
|
||||
status = DLM_BADARGS;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" :
|
||||
(flags & LKM_GET_LVB ? "get lvb" : "none"));
|
||||
|
||||
status = DLM_IVLOCKID;
|
||||
res = dlm_lookup_lockres(dlm, cnv->name, cnv->namelen);
|
||||
if (!res) {
|
||||
dlm_error(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
list_for_each(iter, &res->granted) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
if (lock->ml.cookie == cnv->cookie &&
|
||||
lock->ml.node == cnv->node_idx) {
|
||||
dlm_lock_get(lock);
|
||||
break;
|
||||
}
|
||||
lock = NULL;
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
if (!lock) {
|
||||
status = DLM_IVLOCKID;
|
||||
dlm_error(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* found the lock */
|
||||
lksb = lock->lksb;
|
||||
|
||||
/* see if caller needed to get/put lvb */
|
||||
if (flags & LKM_PUT_LVB) {
|
||||
BUG_ON(lksb->flags & (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
|
||||
lksb->flags |= DLM_LKSB_PUT_LVB;
|
||||
memcpy(&lksb->lvb[0], &cnv->lvb[0], DLM_LVB_LEN);
|
||||
} else if (flags & LKM_GET_LVB) {
|
||||
BUG_ON(lksb->flags & (DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB));
|
||||
lksb->flags |= DLM_LKSB_GET_LVB;
|
||||
}
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
status = __dlm_lockres_state_to_status(res);
|
||||
if (status == DLM_NORMAL) {
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
res->state |= DLM_LOCK_RES_IN_PROGRESS;
|
||||
status = __dlmconvert_master(dlm, res, lock, flags,
|
||||
cnv->requested_type,
|
||||
&call_ast, &kick_thread);
|
||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
if (status != DLM_NORMAL) {
|
||||
if (status != DLM_NOTQUEUED)
|
||||
dlm_error(status);
|
||||
lksb->flags &= ~(DLM_LKSB_GET_LVB|DLM_LKSB_PUT_LVB);
|
||||
}
|
||||
|
||||
leave:
|
||||
if (!lock)
|
||||
mlog(ML_ERROR, "did not find lock to convert on grant queue! "
|
||||
"cookie=%"MLFu64"\n",
|
||||
cnv->cookie);
|
||||
else
|
||||
dlm_lock_put(lock);
|
||||
|
||||
/* either queue the ast or release it */
|
||||
if (call_ast)
|
||||
dlm_queue_ast(dlm, lock);
|
||||
else
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
|
||||
if (kick_thread)
|
||||
dlm_kick_thread(dlm, res);
|
||||
|
||||
if (res)
|
||||
dlm_lockres_put(res);
|
||||
|
||||
dlm_put(dlm);
|
||||
|
||||
return status;
|
||||
}
|
35
fs/ocfs2/dlm/dlmconvert.h
Normal file
35
fs/ocfs2/dlm/dlmconvert.h
Normal file
@ -0,0 +1,35 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmconvert.h
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef DLMCONVERT_H
|
||||
#define DLMCONVERT_H
|
||||
|
||||
enum dlm_status dlmconvert_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags, int type);
|
||||
enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags, int type);
|
||||
|
||||
#endif
|
246
fs/ocfs2/dlm/dlmdebug.c
Normal file
246
fs/ocfs2/dlm/dlmdebug.c
Normal file
@ -0,0 +1,246 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmdebug.c
|
||||
*
|
||||
* debug functionality for the dlm
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/tcp.h"
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
#include "dlmdebug.h"
|
||||
|
||||
#include "dlmdomain.h"
|
||||
#include "dlmdebug.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
|
||||
{
|
||||
mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
|
||||
res->lockname.len, res->lockname.name,
|
||||
res->owner, res->state);
|
||||
spin_lock(&res->spinlock);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
|
||||
void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
|
||||
{
|
||||
struct list_head *iter2;
|
||||
struct dlm_lock *lock;
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
|
||||
res->lockname.len, res->lockname.name,
|
||||
res->owner, res->state);
|
||||
mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n",
|
||||
res->last_used, list_empty(&res->purge) ? "no" : "yes");
|
||||
mlog(ML_NOTICE, " granted queue: \n");
|
||||
list_for_each(iter2, &res->granted) {
|
||||
lock = list_entry(iter2, struct dlm_lock, list);
|
||||
spin_lock(&lock->spinlock);
|
||||
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
|
||||
"cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie,
|
||||
list_empty(&lock->ast_list) ? 'y' : 'n',
|
||||
lock->ast_pending ? 'y' : 'n',
|
||||
list_empty(&lock->bast_list) ? 'y' : 'n',
|
||||
lock->bast_pending ? 'y' : 'n');
|
||||
spin_unlock(&lock->spinlock);
|
||||
}
|
||||
mlog(ML_NOTICE, " converting queue: \n");
|
||||
list_for_each(iter2, &res->converting) {
|
||||
lock = list_entry(iter2, struct dlm_lock, list);
|
||||
spin_lock(&lock->spinlock);
|
||||
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
|
||||
"cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie,
|
||||
list_empty(&lock->ast_list) ? 'y' : 'n',
|
||||
lock->ast_pending ? 'y' : 'n',
|
||||
list_empty(&lock->bast_list) ? 'y' : 'n',
|
||||
lock->bast_pending ? 'y' : 'n');
|
||||
spin_unlock(&lock->spinlock);
|
||||
}
|
||||
mlog(ML_NOTICE, " blocked queue: \n");
|
||||
list_for_each(iter2, &res->blocked) {
|
||||
lock = list_entry(iter2, struct dlm_lock, list);
|
||||
spin_lock(&lock->spinlock);
|
||||
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
|
||||
"cookie=%"MLFu64", ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
|
||||
lock->ml.type, lock->ml.convert_type, lock->ml.node, lock->ml.cookie,
|
||||
list_empty(&lock->ast_list) ? 'y' : 'n',
|
||||
lock->ast_pending ? 'y' : 'n',
|
||||
list_empty(&lock->bast_list) ? 'y' : 'n',
|
||||
lock->bast_pending ? 'y' : 'n');
|
||||
spin_unlock(&lock->spinlock);
|
||||
}
|
||||
}
|
||||
|
||||
void dlm_print_one_lock(struct dlm_lock *lockid)
|
||||
{
|
||||
dlm_print_one_lock_resource(lockid->lockres);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlm_print_one_lock);
|
||||
|
||||
void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
struct list_head *iter;
|
||||
struct list_head *bucket;
|
||||
int i;
|
||||
|
||||
mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n",
|
||||
dlm->name, dlm->node_num, dlm->key);
|
||||
if (!dlm || !dlm->name) {
|
||||
mlog(ML_ERROR, "dlm=%p\n", dlm);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
for (i=0; i<DLM_HASH_SIZE; i++) {
|
||||
bucket = &(dlm->resources[i]);
|
||||
list_for_each(iter, bucket) {
|
||||
res = list_entry(iter, struct dlm_lock_resource, list);
|
||||
dlm_print_one_lock_resource(res);
|
||||
}
|
||||
}
|
||||
spin_unlock(&dlm->spinlock);
|
||||
}
|
||||
|
||||
static const char *dlm_errnames[] = {
|
||||
[DLM_NORMAL] = "DLM_NORMAL",
|
||||
[DLM_GRANTED] = "DLM_GRANTED",
|
||||
[DLM_DENIED] = "DLM_DENIED",
|
||||
[DLM_DENIED_NOLOCKS] = "DLM_DENIED_NOLOCKS",
|
||||
[DLM_WORKING] = "DLM_WORKING",
|
||||
[DLM_BLOCKED] = "DLM_BLOCKED",
|
||||
[DLM_BLOCKED_ORPHAN] = "DLM_BLOCKED_ORPHAN",
|
||||
[DLM_DENIED_GRACE_PERIOD] = "DLM_DENIED_GRACE_PERIOD",
|
||||
[DLM_SYSERR] = "DLM_SYSERR",
|
||||
[DLM_NOSUPPORT] = "DLM_NOSUPPORT",
|
||||
[DLM_CANCELGRANT] = "DLM_CANCELGRANT",
|
||||
[DLM_IVLOCKID] = "DLM_IVLOCKID",
|
||||
[DLM_SYNC] = "DLM_SYNC",
|
||||
[DLM_BADTYPE] = "DLM_BADTYPE",
|
||||
[DLM_BADRESOURCE] = "DLM_BADRESOURCE",
|
||||
[DLM_MAXHANDLES] = "DLM_MAXHANDLES",
|
||||
[DLM_NOCLINFO] = "DLM_NOCLINFO",
|
||||
[DLM_NOLOCKMGR] = "DLM_NOLOCKMGR",
|
||||
[DLM_NOPURGED] = "DLM_NOPURGED",
|
||||
[DLM_BADARGS] = "DLM_BADARGS",
|
||||
[DLM_VOID] = "DLM_VOID",
|
||||
[DLM_NOTQUEUED] = "DLM_NOTQUEUED",
|
||||
[DLM_IVBUFLEN] = "DLM_IVBUFLEN",
|
||||
[DLM_CVTUNGRANT] = "DLM_CVTUNGRANT",
|
||||
[DLM_BADPARAM] = "DLM_BADPARAM",
|
||||
[DLM_VALNOTVALID] = "DLM_VALNOTVALID",
|
||||
[DLM_REJECTED] = "DLM_REJECTED",
|
||||
[DLM_ABORT] = "DLM_ABORT",
|
||||
[DLM_CANCEL] = "DLM_CANCEL",
|
||||
[DLM_IVRESHANDLE] = "DLM_IVRESHANDLE",
|
||||
[DLM_DEADLOCK] = "DLM_DEADLOCK",
|
||||
[DLM_DENIED_NOASTS] = "DLM_DENIED_NOASTS",
|
||||
[DLM_FORWARD] = "DLM_FORWARD",
|
||||
[DLM_TIMEOUT] = "DLM_TIMEOUT",
|
||||
[DLM_IVGROUPID] = "DLM_IVGROUPID",
|
||||
[DLM_VERS_CONFLICT] = "DLM_VERS_CONFLICT",
|
||||
[DLM_BAD_DEVICE_PATH] = "DLM_BAD_DEVICE_PATH",
|
||||
[DLM_NO_DEVICE_PERMISSION] = "DLM_NO_DEVICE_PERMISSION",
|
||||
[DLM_NO_CONTROL_DEVICE ] = "DLM_NO_CONTROL_DEVICE ",
|
||||
[DLM_RECOVERING] = "DLM_RECOVERING",
|
||||
[DLM_MIGRATING] = "DLM_MIGRATING",
|
||||
[DLM_MAXSTATS] = "DLM_MAXSTATS",
|
||||
};
|
||||
|
||||
static const char *dlm_errmsgs[] = {
|
||||
[DLM_NORMAL] = "request in progress",
|
||||
[DLM_GRANTED] = "request granted",
|
||||
[DLM_DENIED] = "request denied",
|
||||
[DLM_DENIED_NOLOCKS] = "request denied, out of system resources",
|
||||
[DLM_WORKING] = "async request in progress",
|
||||
[DLM_BLOCKED] = "lock request blocked",
|
||||
[DLM_BLOCKED_ORPHAN] = "lock request blocked by a orphan lock",
|
||||
[DLM_DENIED_GRACE_PERIOD] = "topological change in progress",
|
||||
[DLM_SYSERR] = "system error",
|
||||
[DLM_NOSUPPORT] = "unsupported",
|
||||
[DLM_CANCELGRANT] = "can't cancel convert: already granted",
|
||||
[DLM_IVLOCKID] = "bad lockid",
|
||||
[DLM_SYNC] = "synchronous request granted",
|
||||
[DLM_BADTYPE] = "bad resource type",
|
||||
[DLM_BADRESOURCE] = "bad resource handle",
|
||||
[DLM_MAXHANDLES] = "no more resource handles",
|
||||
[DLM_NOCLINFO] = "can't contact cluster manager",
|
||||
[DLM_NOLOCKMGR] = "can't contact lock manager",
|
||||
[DLM_NOPURGED] = "can't contact purge daemon",
|
||||
[DLM_BADARGS] = "bad api args",
|
||||
[DLM_VOID] = "no status",
|
||||
[DLM_NOTQUEUED] = "NOQUEUE was specified and request failed",
|
||||
[DLM_IVBUFLEN] = "invalid resource name length",
|
||||
[DLM_CVTUNGRANT] = "attempted to convert ungranted lock",
|
||||
[DLM_BADPARAM] = "invalid lock mode specified",
|
||||
[DLM_VALNOTVALID] = "value block has been invalidated",
|
||||
[DLM_REJECTED] = "request rejected, unrecognized client",
|
||||
[DLM_ABORT] = "blocked lock request cancelled",
|
||||
[DLM_CANCEL] = "conversion request cancelled",
|
||||
[DLM_IVRESHANDLE] = "invalid resource handle",
|
||||
[DLM_DEADLOCK] = "deadlock recovery refused this request",
|
||||
[DLM_DENIED_NOASTS] = "failed to allocate AST",
|
||||
[DLM_FORWARD] = "request must wait for primary's response",
|
||||
[DLM_TIMEOUT] = "timeout value for lock has expired",
|
||||
[DLM_IVGROUPID] = "invalid group specification",
|
||||
[DLM_VERS_CONFLICT] = "version conflicts prevent request handling",
|
||||
[DLM_BAD_DEVICE_PATH] = "Locks device does not exist or path wrong",
|
||||
[DLM_NO_DEVICE_PERMISSION] = "Client has insufficient perms for device",
|
||||
[DLM_NO_CONTROL_DEVICE] = "Cannot set options on opened device ",
|
||||
[DLM_RECOVERING] = "lock resource being recovered",
|
||||
[DLM_MIGRATING] = "lock resource being migrated",
|
||||
[DLM_MAXSTATS] = "invalid error number",
|
||||
};
|
||||
|
||||
const char *dlm_errmsg(enum dlm_status err)
|
||||
{
|
||||
if (err >= DLM_MAXSTATS || err < 0)
|
||||
return dlm_errmsgs[DLM_MAXSTATS];
|
||||
return dlm_errmsgs[err];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlm_errmsg);
|
||||
|
||||
const char *dlm_errname(enum dlm_status err)
|
||||
{
|
||||
if (err >= DLM_MAXSTATS || err < 0)
|
||||
return dlm_errnames[DLM_MAXSTATS];
|
||||
return dlm_errnames[err];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlm_errname);
|
30
fs/ocfs2/dlm/dlmdebug.h
Normal file
30
fs/ocfs2/dlm/dlmdebug.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmdebug.h
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef DLMDEBUG_H
|
||||
#define DLMDEBUG_H
|
||||
|
||||
void dlm_dump_lock_resources(struct dlm_ctxt *dlm);
|
||||
|
||||
#endif
|
1469
fs/ocfs2/dlm/dlmdomain.c
Normal file
1469
fs/ocfs2/dlm/dlmdomain.c
Normal file
File diff suppressed because it is too large
Load Diff
36
fs/ocfs2/dlm/dlmdomain.h
Normal file
36
fs/ocfs2/dlm/dlmdomain.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmdomain.h
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef DLMDOMAIN_H
|
||||
#define DLMDOMAIN_H
|
||||
|
||||
extern spinlock_t dlm_domain_lock;
|
||||
extern struct list_head dlm_domains;
|
||||
|
||||
int dlm_joined(struct dlm_ctxt *dlm);
|
||||
int dlm_shutting_down(struct dlm_ctxt *dlm);
|
||||
void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
|
||||
int node_num);
|
||||
|
||||
#endif
|
676
fs/ocfs2/dlm/dlmlock.c
Normal file
676
fs/ocfs2/dlm/dlmlock.c
Normal file
@ -0,0 +1,676 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmlock.c
|
||||
*
|
||||
* underlying calls for lock creation
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/tcp.h"
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
|
||||
#include "dlmconvert.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
static spinlock_t dlm_cookie_lock = SPIN_LOCK_UNLOCKED;
|
||||
static u64 dlm_next_cookie = 1;
|
||||
|
||||
static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags);
|
||||
static void dlm_init_lock(struct dlm_lock *newlock, int type,
|
||||
u8 node, u64 cookie);
|
||||
static void dlm_lock_release(struct kref *kref);
|
||||
static void dlm_lock_detach_lockres(struct dlm_lock *lock);
|
||||
|
||||
/* Tell us whether we can grant a new lock request.
|
||||
* locking:
|
||||
* caller needs: res->spinlock
|
||||
* taken: none
|
||||
* held on exit: none
|
||||
* returns: 1 if the lock can be granted, 0 otherwise.
|
||||
*/
|
||||
static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *tmplock;
|
||||
|
||||
list_for_each(iter, &res->granted) {
|
||||
tmplock = list_entry(iter, struct dlm_lock, list);
|
||||
|
||||
if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
|
||||
return 0;
|
||||
}
|
||||
|
||||
list_for_each(iter, &res->converting) {
|
||||
tmplock = list_entry(iter, struct dlm_lock, list);
|
||||
|
||||
if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* performs lock creation at the lockres master site
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drops res->spinlock
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_NOTQUEUED
|
||||
*/
|
||||
static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags)
|
||||
{
|
||||
int call_ast = 0, kick_thread = 0;
|
||||
enum dlm_status status = DLM_NORMAL;
|
||||
|
||||
mlog_entry("type=%d\n", lock->ml.type);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
/* if called from dlm_create_lock_handler, need to
|
||||
* ensure it will not sleep in dlm_wait_on_lockres */
|
||||
status = __dlm_lockres_state_to_status(res);
|
||||
if (status != DLM_NORMAL &&
|
||||
lock->ml.node != dlm->node_num) {
|
||||
/* erf. state changed after lock was dropped. */
|
||||
spin_unlock(&res->spinlock);
|
||||
dlm_error(status);
|
||||
return status;
|
||||
}
|
||||
__dlm_wait_on_lockres(res);
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
|
||||
if (dlm_can_grant_new_lock(res, lock)) {
|
||||
mlog(0, "I can grant this lock right away\n");
|
||||
/* got it right away */
|
||||
lock->lksb->status = DLM_NORMAL;
|
||||
status = DLM_NORMAL;
|
||||
dlm_lock_get(lock);
|
||||
list_add_tail(&lock->list, &res->granted);
|
||||
|
||||
/* for the recovery lock, we can't allow the ast
|
||||
* to be queued since the dlmthread is already
|
||||
* frozen. but the recovery lock is always locked
|
||||
* with LKM_NOQUEUE so we do not need the ast in
|
||||
* this special case */
|
||||
if (!dlm_is_recovery_lock(res->lockname.name,
|
||||
res->lockname.len)) {
|
||||
kick_thread = 1;
|
||||
call_ast = 1;
|
||||
}
|
||||
} else {
|
||||
/* for NOQUEUE request, unless we get the
|
||||
* lock right away, return DLM_NOTQUEUED */
|
||||
if (flags & LKM_NOQUEUE)
|
||||
status = DLM_NOTQUEUED;
|
||||
else {
|
||||
dlm_lock_get(lock);
|
||||
list_add_tail(&lock->list, &res->blocked);
|
||||
kick_thread = 1;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&res->spinlock);
|
||||
wake_up(&res->wq);
|
||||
|
||||
/* either queue the ast or release it */
|
||||
if (call_ast)
|
||||
dlm_queue_ast(dlm, lock);
|
||||
else
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
|
||||
dlm_lockres_calc_usage(dlm, res);
|
||||
if (kick_thread)
|
||||
dlm_kick_thread(dlm, res);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void dlm_revert_pending_lock(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
/* remove from local queue if it failed */
|
||||
list_del_init(&lock->list);
|
||||
lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drops res->spinlock
|
||||
* held on exit: none
|
||||
* returns: DLM_DENIED, DLM_RECOVERING, or net status
|
||||
*/
|
||||
static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags)
|
||||
{
|
||||
enum dlm_status status = DLM_DENIED;
|
||||
|
||||
mlog_entry("type=%d\n", lock->ml.type);
|
||||
mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len,
|
||||
res->lockname.name, flags);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
|
||||
/* will exit this call with spinlock held */
|
||||
__dlm_wait_on_lockres(res);
|
||||
res->state |= DLM_LOCK_RES_IN_PROGRESS;
|
||||
|
||||
/* add lock to local (secondary) queue */
|
||||
dlm_lock_get(lock);
|
||||
list_add_tail(&lock->list, &res->blocked);
|
||||
lock->lock_pending = 1;
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
/* spec seems to say that you will get DLM_NORMAL when the lock
|
||||
* has been queued, meaning we need to wait for a reply here. */
|
||||
status = dlm_send_remote_lock_request(dlm, res, lock, flags);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||
lock->lock_pending = 0;
|
||||
if (status != DLM_NORMAL) {
|
||||
if (status != DLM_NOTQUEUED)
|
||||
dlm_error(status);
|
||||
dlm_revert_pending_lock(res, lock);
|
||||
dlm_lock_put(lock);
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
dlm_lockres_calc_usage(dlm, res);
|
||||
|
||||
wake_up(&res->wq);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
/* for remote lock creation.
|
||||
* locking:
|
||||
* caller needs: none, but need res->state & DLM_LOCK_RES_IN_PROGRESS
|
||||
* taken: none
|
||||
* held on exit: none
|
||||
* returns: DLM_NOLOCKMGR, or net status
|
||||
*/
|
||||
static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock, int flags)
|
||||
{
|
||||
struct dlm_create_lock create;
|
||||
int tmpret, status = 0;
|
||||
enum dlm_status ret;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
memset(&create, 0, sizeof(create));
|
||||
create.node_idx = dlm->node_num;
|
||||
create.requested_type = lock->ml.type;
|
||||
create.cookie = lock->ml.cookie;
|
||||
create.namelen = res->lockname.len;
|
||||
create.flags = cpu_to_be32(flags);
|
||||
memcpy(create.name, res->lockname.name, create.namelen);
|
||||
|
||||
tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create,
|
||||
sizeof(create), res->owner, &status);
|
||||
if (tmpret >= 0) {
|
||||
// successfully sent and received
|
||||
ret = status; // this is already a dlm_status
|
||||
} else {
|
||||
mlog_errno(tmpret);
|
||||
if (dlm_is_host_down(tmpret)) {
|
||||
ret = DLM_RECOVERING;
|
||||
mlog(0, "node %u died so returning DLM_RECOVERING "
|
||||
"from lock message!\n", res->owner);
|
||||
} else {
|
||||
ret = dlm_err_to_dlm_status(tmpret);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void dlm_lock_get(struct dlm_lock *lock)
|
||||
{
|
||||
kref_get(&lock->lock_refs);
|
||||
}
|
||||
|
||||
void dlm_lock_put(struct dlm_lock *lock)
|
||||
{
|
||||
kref_put(&lock->lock_refs, dlm_lock_release);
|
||||
}
|
||||
|
||||
static void dlm_lock_release(struct kref *kref)
|
||||
{
|
||||
struct dlm_lock *lock;
|
||||
|
||||
lock = container_of(kref, struct dlm_lock, lock_refs);
|
||||
|
||||
BUG_ON(!list_empty(&lock->list));
|
||||
BUG_ON(!list_empty(&lock->ast_list));
|
||||
BUG_ON(!list_empty(&lock->bast_list));
|
||||
BUG_ON(lock->ast_pending);
|
||||
BUG_ON(lock->bast_pending);
|
||||
|
||||
dlm_lock_detach_lockres(lock);
|
||||
|
||||
if (lock->lksb_kernel_allocated) {
|
||||
mlog(0, "freeing kernel-allocated lksb\n");
|
||||
kfree(lock->lksb);
|
||||
}
|
||||
kfree(lock);
|
||||
}
|
||||
|
||||
/* associate a lock with it's lockres, getting a ref on the lockres */
|
||||
void dlm_lock_attach_lockres(struct dlm_lock *lock,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
dlm_lockres_get(res);
|
||||
lock->lockres = res;
|
||||
}
|
||||
|
||||
/* drop ref on lockres, if there is still one associated with lock */
|
||||
static void dlm_lock_detach_lockres(struct dlm_lock *lock)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
|
||||
res = lock->lockres;
|
||||
if (res) {
|
||||
lock->lockres = NULL;
|
||||
mlog(0, "removing lock's lockres reference\n");
|
||||
dlm_lockres_put(res);
|
||||
}
|
||||
}
|
||||
|
||||
static void dlm_init_lock(struct dlm_lock *newlock, int type,
|
||||
u8 node, u64 cookie)
|
||||
{
|
||||
INIT_LIST_HEAD(&newlock->list);
|
||||
INIT_LIST_HEAD(&newlock->ast_list);
|
||||
INIT_LIST_HEAD(&newlock->bast_list);
|
||||
spin_lock_init(&newlock->spinlock);
|
||||
newlock->ml.type = type;
|
||||
newlock->ml.convert_type = LKM_IVMODE;
|
||||
newlock->ml.highest_blocked = LKM_IVMODE;
|
||||
newlock->ml.node = node;
|
||||
newlock->ml.pad1 = 0;
|
||||
newlock->ml.list = 0;
|
||||
newlock->ml.flags = 0;
|
||||
newlock->ast = NULL;
|
||||
newlock->bast = NULL;
|
||||
newlock->astdata = NULL;
|
||||
newlock->ml.cookie = cpu_to_be64(cookie);
|
||||
newlock->ast_pending = 0;
|
||||
newlock->bast_pending = 0;
|
||||
newlock->convert_pending = 0;
|
||||
newlock->lock_pending = 0;
|
||||
newlock->unlock_pending = 0;
|
||||
newlock->cancel_pending = 0;
|
||||
newlock->lksb_kernel_allocated = 0;
|
||||
|
||||
kref_init(&newlock->lock_refs);
|
||||
}
|
||||
|
||||
struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
|
||||
struct dlm_lockstatus *lksb)
|
||||
{
|
||||
struct dlm_lock *lock;
|
||||
int kernel_allocated = 0;
|
||||
|
||||
lock = kcalloc(1, sizeof(*lock), GFP_KERNEL);
|
||||
if (!lock)
|
||||
return NULL;
|
||||
|
||||
if (!lksb) {
|
||||
/* zero memory only if kernel-allocated */
|
||||
lksb = kcalloc(1, sizeof(*lksb), GFP_KERNEL);
|
||||
if (!lksb) {
|
||||
kfree(lock);
|
||||
return NULL;
|
||||
}
|
||||
kernel_allocated = 1;
|
||||
}
|
||||
|
||||
dlm_init_lock(lock, type, node, cookie);
|
||||
if (kernel_allocated)
|
||||
lock->lksb_kernel_allocated = 1;
|
||||
lock->lksb = lksb;
|
||||
lksb->lockid = lock;
|
||||
return lock;
|
||||
}
|
||||
|
||||
/* handler for lock creation net message
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drops res->spinlock
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED
|
||||
*/
|
||||
int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data)
|
||||
{
|
||||
struct dlm_ctxt *dlm = data;
|
||||
struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
struct dlm_lock *newlock = NULL;
|
||||
struct dlm_lockstatus *lksb = NULL;
|
||||
enum dlm_status status = DLM_NORMAL;
|
||||
char *name;
|
||||
unsigned int namelen;
|
||||
|
||||
BUG_ON(!dlm);
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
if (!dlm_grab(dlm))
|
||||
return DLM_REJECTED;
|
||||
|
||||
mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
|
||||
"Domain %s not fully joined!\n", dlm->name);
|
||||
|
||||
name = create->name;
|
||||
namelen = create->namelen;
|
||||
|
||||
status = DLM_IVBUFLEN;
|
||||
if (namelen > DLM_LOCKID_NAME_MAX) {
|
||||
dlm_error(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
status = DLM_SYSERR;
|
||||
newlock = dlm_new_lock(create->requested_type,
|
||||
create->node_idx,
|
||||
be64_to_cpu(create->cookie), NULL);
|
||||
if (!newlock) {
|
||||
dlm_error(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
lksb = newlock->lksb;
|
||||
|
||||
if (be32_to_cpu(create->flags) & LKM_GET_LVB) {
|
||||
lksb->flags |= DLM_LKSB_GET_LVB;
|
||||
mlog(0, "set DLM_LKSB_GET_LVB flag\n");
|
||||
}
|
||||
|
||||
status = DLM_IVLOCKID;
|
||||
res = dlm_lookup_lockres(dlm, name, namelen);
|
||||
if (!res) {
|
||||
dlm_error(status);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
status = __dlm_lockres_state_to_status(res);
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
if (status != DLM_NORMAL) {
|
||||
mlog(0, "lockres recovering/migrating/in-progress\n");
|
||||
goto leave;
|
||||
}
|
||||
|
||||
dlm_lock_attach_lockres(newlock, res);
|
||||
|
||||
status = dlmlock_master(dlm, res, newlock, be32_to_cpu(create->flags));
|
||||
leave:
|
||||
if (status != DLM_NORMAL)
|
||||
if (newlock)
|
||||
dlm_lock_put(newlock);
|
||||
|
||||
if (res)
|
||||
dlm_lockres_put(res);
|
||||
|
||||
dlm_put(dlm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
/* fetch next node-local (u8 nodenum + u56 cookie) into u64 */
|
||||
static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
|
||||
{
|
||||
u64 tmpnode = node_num;
|
||||
|
||||
/* shift single byte of node num into top 8 bits */
|
||||
tmpnode <<= 56;
|
||||
|
||||
spin_lock(&dlm_cookie_lock);
|
||||
*cookie = (dlm_next_cookie | tmpnode);
|
||||
if (++dlm_next_cookie & 0xff00000000000000ull) {
|
||||
mlog(0, "This node's cookie will now wrap!\n");
|
||||
dlm_next_cookie = 1;
|
||||
}
|
||||
spin_unlock(&dlm_cookie_lock);
|
||||
}
|
||||
|
||||
enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
|
||||
struct dlm_lockstatus *lksb, int flags,
|
||||
const char *name, dlm_astlockfunc_t *ast, void *data,
|
||||
dlm_bastlockfunc_t *bast)
|
||||
{
|
||||
enum dlm_status status;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
struct dlm_lock *lock = NULL;
|
||||
int convert = 0, recovery = 0;
|
||||
|
||||
/* yes this function is a mess.
|
||||
* TODO: clean this up. lots of common code in the
|
||||
* lock and convert paths, especially in the retry blocks */
|
||||
if (!lksb) {
|
||||
dlm_error(DLM_BADARGS);
|
||||
return DLM_BADARGS;
|
||||
}
|
||||
|
||||
status = DLM_BADPARAM;
|
||||
if (mode != LKM_EXMODE && mode != LKM_PRMODE && mode != LKM_NLMODE) {
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (flags & ~LKM_VALID_FLAGS) {
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
|
||||
convert = (flags & LKM_CONVERT);
|
||||
recovery = (flags & LKM_RECOVERY);
|
||||
|
||||
if (recovery &&
|
||||
(!dlm_is_recovery_lock(name, strlen(name)) || convert) ) {
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
if (convert && (flags & LKM_LOCAL)) {
|
||||
mlog(ML_ERROR, "strange LOCAL convert request!\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (convert) {
|
||||
/* CONVERT request */
|
||||
|
||||
/* if converting, must pass in a valid dlm_lock */
|
||||
lock = lksb->lockid;
|
||||
if (!lock) {
|
||||
mlog(ML_ERROR, "NULL lock pointer in convert "
|
||||
"request\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
res = lock->lockres;
|
||||
if (!res) {
|
||||
mlog(ML_ERROR, "NULL lockres pointer in convert "
|
||||
"request\n");
|
||||
goto error;
|
||||
}
|
||||
dlm_lockres_get(res);
|
||||
|
||||
/* XXX: for ocfs2 purposes, the ast/bast/astdata/lksb are
|
||||
* static after the original lock call. convert requests will
|
||||
* ensure that everything is the same, or return DLM_BADARGS.
|
||||
* this means that DLM_DENIED_NOASTS will never be returned.
|
||||
*/
|
||||
if (lock->lksb != lksb || lock->ast != ast ||
|
||||
lock->bast != bast || lock->astdata != data) {
|
||||
status = DLM_BADARGS;
|
||||
mlog(ML_ERROR, "new args: lksb=%p, ast=%p, bast=%p, "
|
||||
"astdata=%p\n", lksb, ast, bast, data);
|
||||
mlog(ML_ERROR, "orig args: lksb=%p, ast=%p, bast=%p, "
|
||||
"astdata=%p\n", lock->lksb, lock->ast,
|
||||
lock->bast, lock->astdata);
|
||||
goto error;
|
||||
}
|
||||
retry_convert:
|
||||
dlm_wait_for_recovery(dlm);
|
||||
|
||||
if (res->owner == dlm->node_num)
|
||||
status = dlmconvert_master(dlm, res, lock, flags, mode);
|
||||
else
|
||||
status = dlmconvert_remote(dlm, res, lock, flags, mode);
|
||||
if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
|
||||
status == DLM_FORWARD) {
|
||||
/* for now, see how this works without sleeping
|
||||
* and just retry right away. I suspect the reco
|
||||
* or migration will complete fast enough that
|
||||
* no waiting will be necessary */
|
||||
mlog(0, "retrying convert with migration/recovery/"
|
||||
"in-progress\n");
|
||||
msleep(100);
|
||||
goto retry_convert;
|
||||
}
|
||||
} else {
|
||||
u64 tmpcookie;
|
||||
|
||||
/* LOCK request */
|
||||
status = DLM_BADARGS;
|
||||
if (!name) {
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = DLM_IVBUFLEN;
|
||||
if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) {
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
|
||||
dlm_get_next_cookie(dlm->node_num, &tmpcookie);
|
||||
lock = dlm_new_lock(mode, dlm->node_num, tmpcookie, lksb);
|
||||
if (!lock) {
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!recovery)
|
||||
dlm_wait_for_recovery(dlm);
|
||||
|
||||
/* find or create the lock resource */
|
||||
res = dlm_get_lock_resource(dlm, name, flags);
|
||||
if (!res) {
|
||||
status = DLM_IVLOCKID;
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
|
||||
mlog(0, "type=%d, flags = 0x%x\n", mode, flags);
|
||||
mlog(0, "creating lock: lock=%p res=%p\n", lock, res);
|
||||
|
||||
dlm_lock_attach_lockres(lock, res);
|
||||
lock->ast = ast;
|
||||
lock->bast = bast;
|
||||
lock->astdata = data;
|
||||
|
||||
retry_lock:
|
||||
if (flags & LKM_VALBLK) {
|
||||
mlog(0, "LKM_VALBLK passed by caller\n");
|
||||
|
||||
/* LVB requests for non PR, PW or EX locks are
|
||||
* ignored. */
|
||||
if (mode < LKM_PRMODE)
|
||||
flags &= ~LKM_VALBLK;
|
||||
else {
|
||||
flags |= LKM_GET_LVB;
|
||||
lock->lksb->flags |= DLM_LKSB_GET_LVB;
|
||||
}
|
||||
}
|
||||
|
||||
if (res->owner == dlm->node_num)
|
||||
status = dlmlock_master(dlm, res, lock, flags);
|
||||
else
|
||||
status = dlmlock_remote(dlm, res, lock, flags);
|
||||
|
||||
if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
|
||||
status == DLM_FORWARD) {
|
||||
mlog(0, "retrying lock with migration/"
|
||||
"recovery/in progress\n");
|
||||
msleep(100);
|
||||
dlm_wait_for_recovery(dlm);
|
||||
goto retry_lock;
|
||||
}
|
||||
|
||||
if (status != DLM_NORMAL) {
|
||||
lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
|
||||
if (status != DLM_NOTQUEUED)
|
||||
dlm_error(status);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
error:
|
||||
if (status != DLM_NORMAL) {
|
||||
if (lock && !convert)
|
||||
dlm_lock_put(lock);
|
||||
// this is kind of unnecessary
|
||||
lksb->status = status;
|
||||
}
|
||||
|
||||
/* put lockres ref from the convert path
|
||||
* or from dlm_get_lock_resource */
|
||||
if (res)
|
||||
dlm_lockres_put(res);
|
||||
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlmlock);
|
2666
fs/ocfs2/dlm/dlmmaster.c
Normal file
2666
fs/ocfs2/dlm/dlmmaster.c
Normal file
File diff suppressed because it is too large
Load Diff
2132
fs/ocfs2/dlm/dlmrecovery.c
Normal file
2132
fs/ocfs2/dlm/dlmrecovery.c
Normal file
File diff suppressed because it is too large
Load Diff
695
fs/ocfs2/dlm/dlmthread.c
Normal file
695
fs/ocfs2/dlm/dlmthread.c
Normal file
@ -0,0 +1,695 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmthread.c
|
||||
*
|
||||
* standalone DLM module
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/tcp.h"
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
#include "dlmdomain.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD)
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
extern spinlock_t dlm_domain_lock;
|
||||
extern struct list_head dlm_domains;
|
||||
|
||||
static int dlm_thread(void *data);
|
||||
|
||||
static void dlm_flush_asts(struct dlm_ctxt *dlm);
|
||||
|
||||
#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num)
|
||||
|
||||
/* will exit holding res->spinlock, but may drop in function */
|
||||
/* waits until flags are cleared on res->state */
|
||||
void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
add_wait_queue(&res->wq, &wait);
|
||||
repeat:
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (res->state & flags) {
|
||||
spin_unlock(&res->spinlock);
|
||||
schedule();
|
||||
spin_lock(&res->spinlock);
|
||||
goto repeat;
|
||||
}
|
||||
remove_wait_queue(&res->wq, &wait);
|
||||
current->state = TASK_RUNNING;
|
||||
}
|
||||
|
||||
|
||||
static int __dlm_lockres_unused(struct dlm_lock_resource *res)
|
||||
{
|
||||
if (list_empty(&res->granted) &&
|
||||
list_empty(&res->converting) &&
|
||||
list_empty(&res->blocked) &&
|
||||
list_empty(&res->dirty))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Call whenever you may have added or deleted something from one of
|
||||
* the lockres queue's. This will figure out whether it belongs on the
|
||||
* unused list or not and does the appropriate thing. */
|
||||
void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
if (__dlm_lockres_unused(res)){
|
||||
if (list_empty(&res->purge)) {
|
||||
mlog(0, "putting lockres %.*s from purge list\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
|
||||
res->last_used = jiffies;
|
||||
list_add_tail(&res->purge, &dlm->purge_list);
|
||||
dlm->purge_count++;
|
||||
}
|
||||
} else if (!list_empty(&res->purge)) {
|
||||
mlog(0, "removing lockres %.*s from purge list\n",
|
||||
res->lockname.len, res->lockname.name);
|
||||
|
||||
list_del_init(&res->purge);
|
||||
dlm->purge_count--;
|
||||
}
|
||||
}
|
||||
|
||||
void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
|
||||
spin_lock(&dlm->spinlock);
|
||||
spin_lock(&res->spinlock);
|
||||
|
||||
__dlm_lockres_calc_usage(dlm, res);
|
||||
|
||||
spin_unlock(&res->spinlock);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
}
|
||||
|
||||
/* TODO: Eventual API: Called with the dlm spinlock held, may drop it
|
||||
* to do migration, but will re-acquire before exit. */
|
||||
void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres)
|
||||
{
|
||||
int master;
|
||||
int ret;
|
||||
|
||||
spin_lock(&lockres->spinlock);
|
||||
master = lockres->owner == dlm->node_num;
|
||||
spin_unlock(&lockres->spinlock);
|
||||
|
||||
mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len,
|
||||
lockres->lockname.name, master);
|
||||
|
||||
/* Non master is the easy case -- no migration required, just
|
||||
* quit. */
|
||||
if (!master)
|
||||
goto finish;
|
||||
|
||||
/* Wheee! Migrate lockres here! */
|
||||
spin_unlock(&dlm->spinlock);
|
||||
again:
|
||||
|
||||
ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES);
|
||||
if (ret == -ENOTEMPTY) {
|
||||
mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
|
||||
lockres->lockname.len, lockres->lockname.name);
|
||||
|
||||
BUG();
|
||||
} else if (ret < 0) {
|
||||
mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n",
|
||||
lockres->lockname.len, lockres->lockname.name);
|
||||
goto again;
|
||||
}
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
|
||||
finish:
|
||||
if (!list_empty(&lockres->purge)) {
|
||||
list_del_init(&lockres->purge);
|
||||
dlm->purge_count--;
|
||||
}
|
||||
__dlm_unhash_lockres(lockres);
|
||||
}
|
||||
|
||||
static void dlm_run_purge_list(struct dlm_ctxt *dlm,
|
||||
int purge_now)
|
||||
{
|
||||
unsigned int run_max, unused;
|
||||
unsigned long purge_jiffies;
|
||||
struct dlm_lock_resource *lockres;
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
run_max = dlm->purge_count;
|
||||
|
||||
while(run_max && !list_empty(&dlm->purge_list)) {
|
||||
run_max--;
|
||||
|
||||
lockres = list_entry(dlm->purge_list.next,
|
||||
struct dlm_lock_resource, purge);
|
||||
|
||||
/* Status of the lockres *might* change so double
|
||||
* check. If the lockres is unused, holding the dlm
|
||||
* spinlock will prevent people from getting and more
|
||||
* refs on it -- there's no need to keep the lockres
|
||||
* spinlock. */
|
||||
spin_lock(&lockres->spinlock);
|
||||
unused = __dlm_lockres_unused(lockres);
|
||||
spin_unlock(&lockres->spinlock);
|
||||
|
||||
if (!unused)
|
||||
continue;
|
||||
|
||||
purge_jiffies = lockres->last_used +
|
||||
msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
|
||||
|
||||
/* Make sure that we want to be processing this guy at
|
||||
* this time. */
|
||||
if (!purge_now && time_after(purge_jiffies, jiffies)) {
|
||||
/* Since resources are added to the purge list
|
||||
* in tail order, we can stop at the first
|
||||
* unpurgable resource -- anyone added after
|
||||
* him will have a greater last_used value */
|
||||
break;
|
||||
}
|
||||
|
||||
list_del_init(&lockres->purge);
|
||||
dlm->purge_count--;
|
||||
|
||||
/* This may drop and reacquire the dlm spinlock if it
|
||||
* has to do migration. */
|
||||
mlog(0, "calling dlm_purge_lockres!\n");
|
||||
dlm_purge_lockres(dlm, lockres);
|
||||
mlog(0, "DONE calling dlm_purge_lockres!\n");
|
||||
|
||||
/* Avoid adding any scheduling latencies */
|
||||
cond_resched_lock(&dlm->spinlock);
|
||||
}
|
||||
|
||||
spin_unlock(&dlm->spinlock);
|
||||
}
|
||||
|
||||
static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
struct dlm_lock *lock, *target;
|
||||
struct list_head *iter;
|
||||
struct list_head *head;
|
||||
int can_grant = 1;
|
||||
|
||||
//mlog(0, "res->lockname.len=%d\n", res->lockname.len);
|
||||
//mlog(0, "res->lockname.name=%p\n", res->lockname.name);
|
||||
//mlog(0, "shuffle res %.*s\n", res->lockname.len,
|
||||
// res->lockname.name);
|
||||
|
||||
/* because this function is called with the lockres
|
||||
* spinlock, and because we know that it is not migrating/
|
||||
* recovering/in-progress, it is fine to reserve asts and
|
||||
* basts right before queueing them all throughout */
|
||||
assert_spin_locked(&res->spinlock);
|
||||
BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
|
||||
DLM_LOCK_RES_RECOVERING|
|
||||
DLM_LOCK_RES_IN_PROGRESS)));
|
||||
|
||||
converting:
|
||||
if (list_empty(&res->converting))
|
||||
goto blocked;
|
||||
mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len,
|
||||
res->lockname.name);
|
||||
|
||||
target = list_entry(res->converting.next, struct dlm_lock, list);
|
||||
if (target->ml.convert_type == LKM_IVMODE) {
|
||||
mlog(ML_ERROR, "%.*s: converting a lock with no "
|
||||
"convert_type!\n", res->lockname.len, res->lockname.name);
|
||||
BUG();
|
||||
}
|
||||
head = &res->granted;
|
||||
list_for_each(iter, head) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
if (lock==target)
|
||||
continue;
|
||||
if (!dlm_lock_compatible(lock->ml.type,
|
||||
target->ml.convert_type)) {
|
||||
can_grant = 0;
|
||||
/* queue the BAST if not already */
|
||||
if (lock->ml.highest_blocked == LKM_IVMODE) {
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
dlm_queue_bast(dlm, lock);
|
||||
}
|
||||
/* update the highest_blocked if needed */
|
||||
if (lock->ml.highest_blocked < target->ml.convert_type)
|
||||
lock->ml.highest_blocked =
|
||||
target->ml.convert_type;
|
||||
}
|
||||
}
|
||||
head = &res->converting;
|
||||
list_for_each(iter, head) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
if (lock==target)
|
||||
continue;
|
||||
if (!dlm_lock_compatible(lock->ml.type,
|
||||
target->ml.convert_type)) {
|
||||
can_grant = 0;
|
||||
if (lock->ml.highest_blocked == LKM_IVMODE) {
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
dlm_queue_bast(dlm, lock);
|
||||
}
|
||||
if (lock->ml.highest_blocked < target->ml.convert_type)
|
||||
lock->ml.highest_blocked =
|
||||
target->ml.convert_type;
|
||||
}
|
||||
}
|
||||
|
||||
/* we can convert the lock */
|
||||
if (can_grant) {
|
||||
spin_lock(&target->spinlock);
|
||||
BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
|
||||
|
||||
mlog(0, "calling ast for converting lock: %.*s, have: %d, "
|
||||
"granting: %d, node: %u\n", res->lockname.len,
|
||||
res->lockname.name, target->ml.type,
|
||||
target->ml.convert_type, target->ml.node);
|
||||
|
||||
target->ml.type = target->ml.convert_type;
|
||||
target->ml.convert_type = LKM_IVMODE;
|
||||
list_del_init(&target->list);
|
||||
list_add_tail(&target->list, &res->granted);
|
||||
|
||||
BUG_ON(!target->lksb);
|
||||
target->lksb->status = DLM_NORMAL;
|
||||
|
||||
spin_unlock(&target->spinlock);
|
||||
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
dlm_queue_ast(dlm, target);
|
||||
/* go back and check for more */
|
||||
goto converting;
|
||||
}
|
||||
|
||||
blocked:
|
||||
if (list_empty(&res->blocked))
|
||||
goto leave;
|
||||
target = list_entry(res->blocked.next, struct dlm_lock, list);
|
||||
|
||||
head = &res->granted;
|
||||
list_for_each(iter, head) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
if (lock==target)
|
||||
continue;
|
||||
if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
|
||||
can_grant = 0;
|
||||
if (lock->ml.highest_blocked == LKM_IVMODE) {
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
dlm_queue_bast(dlm, lock);
|
||||
}
|
||||
if (lock->ml.highest_blocked < target->ml.type)
|
||||
lock->ml.highest_blocked = target->ml.type;
|
||||
}
|
||||
}
|
||||
|
||||
head = &res->converting;
|
||||
list_for_each(iter, head) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
if (lock==target)
|
||||
continue;
|
||||
if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
|
||||
can_grant = 0;
|
||||
if (lock->ml.highest_blocked == LKM_IVMODE) {
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
dlm_queue_bast(dlm, lock);
|
||||
}
|
||||
if (lock->ml.highest_blocked < target->ml.type)
|
||||
lock->ml.highest_blocked = target->ml.type;
|
||||
}
|
||||
}
|
||||
|
||||
/* we can grant the blocked lock (only
|
||||
* possible if converting list empty) */
|
||||
if (can_grant) {
|
||||
spin_lock(&target->spinlock);
|
||||
BUG_ON(target->ml.highest_blocked != LKM_IVMODE);
|
||||
|
||||
mlog(0, "calling ast for blocked lock: %.*s, granting: %d, "
|
||||
"node: %u\n", res->lockname.len, res->lockname.name,
|
||||
target->ml.type, target->ml.node);
|
||||
|
||||
// target->ml.type is already correct
|
||||
list_del_init(&target->list);
|
||||
list_add_tail(&target->list, &res->granted);
|
||||
|
||||
BUG_ON(!target->lksb);
|
||||
target->lksb->status = DLM_NORMAL;
|
||||
|
||||
spin_unlock(&target->spinlock);
|
||||
|
||||
__dlm_lockres_reserve_ast(res);
|
||||
dlm_queue_ast(dlm, target);
|
||||
/* go back and check for more */
|
||||
goto converting;
|
||||
}
|
||||
|
||||
leave:
|
||||
return;
|
||||
}
|
||||
|
||||
/* must have NO locks when calling this with res !=NULL * */
|
||||
void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
|
||||
{
|
||||
mlog_entry("dlm=%p, res=%p\n", dlm, res);
|
||||
if (res) {
|
||||
spin_lock(&dlm->spinlock);
|
||||
spin_lock(&res->spinlock);
|
||||
__dlm_dirty_lockres(dlm, res);
|
||||
spin_unlock(&res->spinlock);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
}
|
||||
wake_up(&dlm->dlm_thread_wq);
|
||||
}
|
||||
|
||||
void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
|
||||
{
|
||||
mlog_entry("dlm=%p, res=%p\n", dlm, res);
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
/* don't shuffle secondary queues */
|
||||
if ((res->owner == dlm->node_num) &&
|
||||
!(res->state & DLM_LOCK_RES_DIRTY)) {
|
||||
list_add_tail(&res->dirty, &dlm->dirty_list);
|
||||
res->state |= DLM_LOCK_RES_DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Launch the NM thread for the mounted volume */
|
||||
int dlm_launch_thread(struct dlm_ctxt *dlm)
|
||||
{
|
||||
mlog(0, "starting dlm thread...\n");
|
||||
|
||||
dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread");
|
||||
if (IS_ERR(dlm->dlm_thread_task)) {
|
||||
mlog_errno(PTR_ERR(dlm->dlm_thread_task));
|
||||
dlm->dlm_thread_task = NULL;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dlm_complete_thread(struct dlm_ctxt *dlm)
|
||||
{
|
||||
if (dlm->dlm_thread_task) {
|
||||
mlog(ML_KTHREAD, "waiting for dlm thread to exit\n");
|
||||
kthread_stop(dlm->dlm_thread_task);
|
||||
dlm->dlm_thread_task = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int dlm_dirty_list_empty(struct dlm_ctxt *dlm)
|
||||
{
|
||||
int empty;
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
empty = list_empty(&dlm->dirty_list);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
return empty;
|
||||
}
|
||||
|
||||
static void dlm_flush_asts(struct dlm_ctxt *dlm)
|
||||
{
|
||||
int ret;
|
||||
struct dlm_lock *lock;
|
||||
struct dlm_lock_resource *res;
|
||||
u8 hi;
|
||||
|
||||
spin_lock(&dlm->ast_lock);
|
||||
while (!list_empty(&dlm->pending_asts)) {
|
||||
lock = list_entry(dlm->pending_asts.next,
|
||||
struct dlm_lock, ast_list);
|
||||
/* get an extra ref on lock */
|
||||
dlm_lock_get(lock);
|
||||
res = lock->lockres;
|
||||
mlog(0, "delivering an ast for this lockres\n");
|
||||
|
||||
BUG_ON(!lock->ast_pending);
|
||||
|
||||
/* remove from list (including ref) */
|
||||
list_del_init(&lock->ast_list);
|
||||
dlm_lock_put(lock);
|
||||
spin_unlock(&dlm->ast_lock);
|
||||
|
||||
if (lock->ml.node != dlm->node_num) {
|
||||
ret = dlm_do_remote_ast(dlm, res, lock);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
} else
|
||||
dlm_do_local_ast(dlm, res, lock);
|
||||
|
||||
spin_lock(&dlm->ast_lock);
|
||||
|
||||
/* possible that another ast was queued while
|
||||
* we were delivering the last one */
|
||||
if (!list_empty(&lock->ast_list)) {
|
||||
mlog(0, "aha another ast got queued while "
|
||||
"we were finishing the last one. will "
|
||||
"keep the ast_pending flag set.\n");
|
||||
} else
|
||||
lock->ast_pending = 0;
|
||||
|
||||
/* drop the extra ref.
|
||||
* this may drop it completely. */
|
||||
dlm_lock_put(lock);
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
}
|
||||
|
||||
while (!list_empty(&dlm->pending_basts)) {
|
||||
lock = list_entry(dlm->pending_basts.next,
|
||||
struct dlm_lock, bast_list);
|
||||
/* get an extra ref on lock */
|
||||
dlm_lock_get(lock);
|
||||
res = lock->lockres;
|
||||
|
||||
BUG_ON(!lock->bast_pending);
|
||||
|
||||
/* get the highest blocked lock, and reset */
|
||||
spin_lock(&lock->spinlock);
|
||||
BUG_ON(lock->ml.highest_blocked <= LKM_IVMODE);
|
||||
hi = lock->ml.highest_blocked;
|
||||
lock->ml.highest_blocked = LKM_IVMODE;
|
||||
spin_unlock(&lock->spinlock);
|
||||
|
||||
/* remove from list (including ref) */
|
||||
list_del_init(&lock->bast_list);
|
||||
dlm_lock_put(lock);
|
||||
spin_unlock(&dlm->ast_lock);
|
||||
|
||||
mlog(0, "delivering a bast for this lockres "
|
||||
"(blocked = %d\n", hi);
|
||||
|
||||
if (lock->ml.node != dlm->node_num) {
|
||||
ret = dlm_send_proxy_bast(dlm, res, lock, hi);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
} else
|
||||
dlm_do_local_bast(dlm, res, lock, hi);
|
||||
|
||||
spin_lock(&dlm->ast_lock);
|
||||
|
||||
/* possible that another bast was queued while
|
||||
* we were delivering the last one */
|
||||
if (!list_empty(&lock->bast_list)) {
|
||||
mlog(0, "aha another bast got queued while "
|
||||
"we were finishing the last one. will "
|
||||
"keep the bast_pending flag set.\n");
|
||||
} else
|
||||
lock->bast_pending = 0;
|
||||
|
||||
/* drop the extra ref.
|
||||
* this may drop it completely. */
|
||||
dlm_lock_put(lock);
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
}
|
||||
wake_up(&dlm->ast_wq);
|
||||
spin_unlock(&dlm->ast_lock);
|
||||
}
|
||||
|
||||
|
||||
#define DLM_THREAD_TIMEOUT_MS (4 * 1000)
|
||||
#define DLM_THREAD_MAX_DIRTY 100
|
||||
#define DLM_THREAD_MAX_ASTS 10
|
||||
|
||||
static int dlm_thread(void *data)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
struct dlm_ctxt *dlm = data;
|
||||
unsigned long timeout = msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS);
|
||||
|
||||
mlog(0, "dlm thread running for %s...\n", dlm->name);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
int n = DLM_THREAD_MAX_DIRTY;
|
||||
|
||||
/* dlm_shutting_down is very point-in-time, but that
|
||||
* doesn't matter as we'll just loop back around if we
|
||||
* get false on the leading edge of a state
|
||||
* transition. */
|
||||
dlm_run_purge_list(dlm, dlm_shutting_down(dlm));
|
||||
|
||||
/* We really don't want to hold dlm->spinlock while
|
||||
* calling dlm_shuffle_lists on each lockres that
|
||||
* needs to have its queues adjusted and AST/BASTs
|
||||
* run. So let's pull each entry off the dirty_list
|
||||
* and drop dlm->spinlock ASAP. Once off the list,
|
||||
* res->spinlock needs to be taken again to protect
|
||||
* the queues while calling dlm_shuffle_lists. */
|
||||
spin_lock(&dlm->spinlock);
|
||||
while (!list_empty(&dlm->dirty_list)) {
|
||||
int delay = 0;
|
||||
res = list_entry(dlm->dirty_list.next,
|
||||
struct dlm_lock_resource, dirty);
|
||||
|
||||
/* peel a lockres off, remove it from the list,
|
||||
* unset the dirty flag and drop the dlm lock */
|
||||
BUG_ON(!res);
|
||||
dlm_lockres_get(res);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
res->state &= ~DLM_LOCK_RES_DIRTY;
|
||||
list_del_init(&res->dirty);
|
||||
spin_unlock(&res->spinlock);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
/* lockres can be re-dirtied/re-added to the
|
||||
* dirty_list in this gap, but that is ok */
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
if (res->owner != dlm->node_num) {
|
||||
__dlm_print_one_lock_resource(res);
|
||||
mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n",
|
||||
res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no",
|
||||
res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no",
|
||||
res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no",
|
||||
res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
|
||||
}
|
||||
BUG_ON(res->owner != dlm->node_num);
|
||||
|
||||
/* it is now ok to move lockreses in these states
|
||||
* to the dirty list, assuming that they will only be
|
||||
* dirty for a short while. */
|
||||
if (res->state & (DLM_LOCK_RES_IN_PROGRESS |
|
||||
DLM_LOCK_RES_MIGRATING |
|
||||
DLM_LOCK_RES_RECOVERING)) {
|
||||
/* move it to the tail and keep going */
|
||||
spin_unlock(&res->spinlock);
|
||||
mlog(0, "delaying list shuffling for in-"
|
||||
"progress lockres %.*s, state=%d\n",
|
||||
res->lockname.len, res->lockname.name,
|
||||
res->state);
|
||||
delay = 1;
|
||||
goto in_progress;
|
||||
}
|
||||
|
||||
/* at this point the lockres is not migrating/
|
||||
* recovering/in-progress. we have the lockres
|
||||
* spinlock and do NOT have the dlm lock.
|
||||
* safe to reserve/queue asts and run the lists. */
|
||||
|
||||
mlog(0, "calling dlm_shuffle_lists with dlm=%p, "
|
||||
"res=%p\n", dlm, res);
|
||||
|
||||
/* called while holding lockres lock */
|
||||
dlm_shuffle_lists(dlm, res);
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
dlm_lockres_calc_usage(dlm, res);
|
||||
|
||||
in_progress:
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
/* if the lock was in-progress, stick
|
||||
* it on the back of the list */
|
||||
if (delay) {
|
||||
spin_lock(&res->spinlock);
|
||||
list_add_tail(&res->dirty, &dlm->dirty_list);
|
||||
res->state |= DLM_LOCK_RES_DIRTY;
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
dlm_lockres_put(res);
|
||||
|
||||
/* unlikely, but we may need to give time to
|
||||
* other tasks */
|
||||
if (!--n) {
|
||||
mlog(0, "throttling dlm_thread\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&dlm->spinlock);
|
||||
dlm_flush_asts(dlm);
|
||||
|
||||
/* yield and continue right away if there is more work to do */
|
||||
if (!n) {
|
||||
yield();
|
||||
continue;
|
||||
}
|
||||
|
||||
wait_event_interruptible_timeout(dlm->dlm_thread_wq,
|
||||
!dlm_dirty_list_empty(dlm) ||
|
||||
kthread_should_stop(),
|
||||
timeout);
|
||||
}
|
||||
|
||||
mlog(0, "quitting DLM thread\n");
|
||||
return 0;
|
||||
}
|
672
fs/ocfs2/dlm/dlmunlock.c
Normal file
672
fs/ocfs2/dlm/dlmunlock.c
Normal file
@ -0,0 +1,672 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmunlock.c
|
||||
*
|
||||
* underlying calls for unlocking locks
|
||||
*
|
||||
* Copyright (C) 2004 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include "cluster/heartbeat.h"
|
||||
#include "cluster/nodemanager.h"
|
||||
#include "cluster/tcp.h"
|
||||
|
||||
#include "dlmapi.h"
|
||||
#include "dlmcommon.h"
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_DLM
|
||||
#include "cluster/masklog.h"
|
||||
|
||||
#define DLM_UNLOCK_FREE_LOCK 0x00000001
|
||||
#define DLM_UNLOCK_CALL_AST 0x00000002
|
||||
#define DLM_UNLOCK_REMOVE_LOCK 0x00000004
|
||||
#define DLM_UNLOCK_REGRANT_LOCK 0x00000008
|
||||
#define DLM_UNLOCK_CLEAR_CONVERT_TYPE 0x00000010
|
||||
|
||||
|
||||
static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int *actions);
|
||||
static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int *actions);
|
||||
|
||||
static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags,
|
||||
u8 owner);
|
||||
|
||||
|
||||
/*
|
||||
* according to the spec:
|
||||
* http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
|
||||
*
|
||||
* flags & LKM_CANCEL != 0: must be converting or blocked
|
||||
* flags & LKM_CANCEL == 0: must be granted
|
||||
*
|
||||
* So to unlock a converting lock, you must first cancel the
|
||||
* convert (passing LKM_CANCEL in flags), then call the unlock
|
||||
* again (with no LKM_CANCEL in flags).
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: res->spinlock and lock->spinlock taken and dropped
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
|
||||
* all callers should have taken an extra ref on lock coming in
|
||||
*/
|
||||
static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags, int *call_ast,
|
||||
int master_node)
|
||||
{
|
||||
enum dlm_status status;
|
||||
int actions = 0;
|
||||
int in_use;
|
||||
u8 owner;
|
||||
|
||||
mlog(0, "master_node = %d, valblk = %d\n", master_node,
|
||||
flags & LKM_VALBLK);
|
||||
|
||||
if (master_node)
|
||||
BUG_ON(res->owner != dlm->node_num);
|
||||
else
|
||||
BUG_ON(res->owner == dlm->node_num);
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
/* We want to be sure that we're not freeing a lock
|
||||
* that still has AST's pending... */
|
||||
in_use = !list_empty(&lock->ast_list);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
if (in_use) {
|
||||
mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
|
||||
"while waiting for an ast!", res->lockname.len,
|
||||
res->lockname.name);
|
||||
return DLM_BADPARAM;
|
||||
}
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
if (res->state & DLM_LOCK_RES_IN_PROGRESS) {
|
||||
if (master_node) {
|
||||
mlog(ML_ERROR, "lockres in progress!\n");
|
||||
spin_unlock(&res->spinlock);
|
||||
return DLM_FORWARD;
|
||||
}
|
||||
/* ok for this to sleep if not in a network handler */
|
||||
__dlm_wait_on_lockres(res);
|
||||
res->state |= DLM_LOCK_RES_IN_PROGRESS;
|
||||
}
|
||||
spin_lock(&lock->spinlock);
|
||||
|
||||
if (res->state & DLM_LOCK_RES_RECOVERING) {
|
||||
status = DLM_RECOVERING;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
|
||||
/* see above for what the spec says about
|
||||
* LKM_CANCEL and the lock queue state */
|
||||
if (flags & LKM_CANCEL)
|
||||
status = dlm_get_cancel_actions(dlm, res, lock, lksb, &actions);
|
||||
else
|
||||
status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
|
||||
|
||||
if (status != DLM_NORMAL)
|
||||
goto leave;
|
||||
|
||||
/* By now this has been masked out of cancel requests. */
|
||||
if (flags & LKM_VALBLK) {
|
||||
/* make the final update to the lvb */
|
||||
if (master_node)
|
||||
memcpy(res->lvb, lksb->lvb, DLM_LVB_LEN);
|
||||
else
|
||||
flags |= LKM_PUT_LVB; /* let the send function
|
||||
* handle it. */
|
||||
}
|
||||
|
||||
if (!master_node) {
|
||||
owner = res->owner;
|
||||
/* drop locks and send message */
|
||||
if (flags & LKM_CANCEL)
|
||||
lock->cancel_pending = 1;
|
||||
else
|
||||
lock->unlock_pending = 1;
|
||||
spin_unlock(&lock->spinlock);
|
||||
spin_unlock(&res->spinlock);
|
||||
status = dlm_send_remote_unlock_request(dlm, res, lock, lksb,
|
||||
flags, owner);
|
||||
spin_lock(&res->spinlock);
|
||||
spin_lock(&lock->spinlock);
|
||||
/* if the master told us the lock was already granted,
|
||||
* let the ast handle all of these actions */
|
||||
if (status == DLM_NORMAL &&
|
||||
lksb->status == DLM_CANCELGRANT) {
|
||||
actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
|
||||
DLM_UNLOCK_REGRANT_LOCK|
|
||||
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
|
||||
}
|
||||
if (flags & LKM_CANCEL)
|
||||
lock->cancel_pending = 0;
|
||||
else
|
||||
lock->unlock_pending = 0;
|
||||
|
||||
}
|
||||
|
||||
/* get an extra ref on lock. if we are just switching
|
||||
* lists here, we dont want the lock to go away. */
|
||||
dlm_lock_get(lock);
|
||||
|
||||
if (actions & DLM_UNLOCK_REMOVE_LOCK) {
|
||||
list_del_init(&lock->list);
|
||||
dlm_lock_put(lock);
|
||||
}
|
||||
if (actions & DLM_UNLOCK_REGRANT_LOCK) {
|
||||
dlm_lock_get(lock);
|
||||
list_add_tail(&lock->list, &res->granted);
|
||||
}
|
||||
if (actions & DLM_UNLOCK_CLEAR_CONVERT_TYPE) {
|
||||
mlog(0, "clearing convert_type at %smaster node\n",
|
||||
master_node ? "" : "non-");
|
||||
lock->ml.convert_type = LKM_IVMODE;
|
||||
}
|
||||
|
||||
/* remove the extra ref on lock */
|
||||
dlm_lock_put(lock);
|
||||
|
||||
leave:
|
||||
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
|
||||
if (!dlm_lock_on_list(&res->converting, lock))
|
||||
BUG_ON(lock->ml.convert_type != LKM_IVMODE);
|
||||
else
|
||||
BUG_ON(lock->ml.convert_type == LKM_IVMODE);
|
||||
spin_unlock(&lock->spinlock);
|
||||
spin_unlock(&res->spinlock);
|
||||
wake_up(&res->wq);
|
||||
|
||||
/* let the caller's final dlm_lock_put handle the actual kfree */
|
||||
if (actions & DLM_UNLOCK_FREE_LOCK) {
|
||||
/* this should always be coupled with list removal */
|
||||
BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
|
||||
mlog(0, "lock %"MLFu64" should be gone now! refs=%d\n",
|
||||
lock->ml.cookie, atomic_read(&lock->lock_refs.refcount)-1);
|
||||
dlm_lock_put(lock);
|
||||
}
|
||||
if (actions & DLM_UNLOCK_CALL_AST)
|
||||
*call_ast = 1;
|
||||
|
||||
/* if cancel or unlock succeeded, lvb work is done */
|
||||
if (status == DLM_NORMAL)
|
||||
lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
/* leave DLM_LKSB_PUT_LVB on the lksb so any final
|
||||
* update of the lvb will be sent to the new master */
|
||||
list_del_init(&lock->list);
|
||||
}
|
||||
|
||||
void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock)
|
||||
{
|
||||
list_del_init(&lock->list);
|
||||
list_add_tail(&lock->list, &res->granted);
|
||||
lock->ml.convert_type = LKM_IVMODE;
|
||||
}
|
||||
|
||||
|
||||
static inline enum dlm_status dlmunlock_master(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags,
|
||||
int *call_ast)
|
||||
{
|
||||
return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 1);
|
||||
}
|
||||
|
||||
static inline enum dlm_status dlmunlock_remote(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags, int *call_ast)
|
||||
{
|
||||
return dlmunlock_common(dlm, res, lock, lksb, flags, call_ast, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: none
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_NOLOCKMGR, status from network
|
||||
*/
|
||||
static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int flags,
|
||||
u8 owner)
|
||||
{
|
||||
struct dlm_unlock_lock unlock;
|
||||
int tmpret;
|
||||
enum dlm_status ret;
|
||||
int status = 0;
|
||||
struct kvec vec[2];
|
||||
size_t veclen = 1;
|
||||
|
||||
mlog_entry("%.*s\n", res->lockname.len, res->lockname.name);
|
||||
|
||||
memset(&unlock, 0, sizeof(unlock));
|
||||
unlock.node_idx = dlm->node_num;
|
||||
unlock.flags = cpu_to_be32(flags);
|
||||
unlock.cookie = lock->ml.cookie;
|
||||
unlock.namelen = res->lockname.len;
|
||||
memcpy(unlock.name, res->lockname.name, unlock.namelen);
|
||||
|
||||
vec[0].iov_len = sizeof(struct dlm_unlock_lock);
|
||||
vec[0].iov_base = &unlock;
|
||||
|
||||
if (flags & LKM_PUT_LVB) {
|
||||
/* extra data to send if we are updating lvb */
|
||||
vec[1].iov_len = DLM_LVB_LEN;
|
||||
vec[1].iov_base = lock->lksb->lvb;
|
||||
veclen++;
|
||||
}
|
||||
|
||||
tmpret = o2net_send_message_vec(DLM_UNLOCK_LOCK_MSG, dlm->key,
|
||||
vec, veclen, owner, &status);
|
||||
if (tmpret >= 0) {
|
||||
// successfully sent and received
|
||||
if (status == DLM_CANCELGRANT)
|
||||
ret = DLM_NORMAL;
|
||||
else if (status == DLM_FORWARD) {
|
||||
mlog(0, "master was in-progress. retry\n");
|
||||
ret = DLM_FORWARD;
|
||||
} else
|
||||
ret = status;
|
||||
lksb->status = status;
|
||||
} else {
|
||||
mlog_errno(tmpret);
|
||||
if (dlm_is_host_down(tmpret)) {
|
||||
/* NOTE: this seems strange, but it is what we want.
|
||||
* when the master goes down during a cancel or
|
||||
* unlock, the recovery code completes the operation
|
||||
* as if the master had not died, then passes the
|
||||
* updated state to the recovery master. this thread
|
||||
* just needs to finish out the operation and call
|
||||
* the unlockast. */
|
||||
ret = DLM_NORMAL;
|
||||
} else {
|
||||
/* something bad. this will BUG in ocfs2 */
|
||||
ret = dlm_err_to_dlm_status(tmpret);
|
||||
}
|
||||
lksb->status = ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* locking:
|
||||
* caller needs: none
|
||||
* taken: takes and drops res->spinlock
|
||||
* held on exit: none
|
||||
* returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
|
||||
* return value from dlmunlock_master
|
||||
*/
|
||||
int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
|
||||
{
|
||||
struct dlm_ctxt *dlm = data;
|
||||
struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
|
||||
struct dlm_lock_resource *res = NULL;
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *lock = NULL;
|
||||
enum dlm_status status = DLM_NORMAL;
|
||||
int found = 0, i;
|
||||
struct dlm_lockstatus *lksb = NULL;
|
||||
int ignore;
|
||||
u32 flags;
|
||||
struct list_head *queue;
|
||||
|
||||
flags = be32_to_cpu(unlock->flags);
|
||||
|
||||
if (flags & LKM_GET_LVB) {
|
||||
mlog(ML_ERROR, "bad args! GET_LVB specified on unlock!\n");
|
||||
return DLM_BADARGS;
|
||||
}
|
||||
|
||||
if ((flags & (LKM_PUT_LVB|LKM_CANCEL)) == (LKM_PUT_LVB|LKM_CANCEL)) {
|
||||
mlog(ML_ERROR, "bad args! cannot modify lvb on a CANCEL "
|
||||
"request!\n");
|
||||
return DLM_BADARGS;
|
||||
}
|
||||
|
||||
if (unlock->namelen > DLM_LOCKID_NAME_MAX) {
|
||||
mlog(ML_ERROR, "Invalid name length in unlock handler!\n");
|
||||
return DLM_IVBUFLEN;
|
||||
}
|
||||
|
||||
if (!dlm_grab(dlm))
|
||||
return DLM_REJECTED;
|
||||
|
||||
mlog_bug_on_msg(!dlm_domain_fully_joined(dlm),
|
||||
"Domain %s not fully joined!\n", dlm->name);
|
||||
|
||||
mlog(0, "lvb: %s\n", flags & LKM_PUT_LVB ? "put lvb" : "none");
|
||||
|
||||
res = dlm_lookup_lockres(dlm, unlock->name, unlock->namelen);
|
||||
if (!res) {
|
||||
/* We assume here that a no lock resource simply means
|
||||
* it was migrated away and destroyed before the other
|
||||
* node could detect it. */
|
||||
mlog(0, "returning DLM_FORWARD -- res no longer exists\n");
|
||||
status = DLM_FORWARD;
|
||||
goto not_found;
|
||||
}
|
||||
|
||||
queue=&res->granted;
|
||||
found = 0;
|
||||
spin_lock(&res->spinlock);
|
||||
if (res->state & DLM_LOCK_RES_RECOVERING) {
|
||||
spin_unlock(&res->spinlock);
|
||||
mlog(0, "returning DLM_RECOVERING\n");
|
||||
status = DLM_RECOVERING;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
if (res->state & DLM_LOCK_RES_MIGRATING) {
|
||||
spin_unlock(&res->spinlock);
|
||||
mlog(0, "returning DLM_MIGRATING\n");
|
||||
status = DLM_MIGRATING;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
if (res->owner != dlm->node_num) {
|
||||
spin_unlock(&res->spinlock);
|
||||
mlog(0, "returning DLM_FORWARD -- not master\n");
|
||||
status = DLM_FORWARD;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
for (i=0; i<3; i++) {
|
||||
list_for_each(iter, queue) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
if (lock->ml.cookie == unlock->cookie &&
|
||||
lock->ml.node == unlock->node_idx) {
|
||||
dlm_lock_get(lock);
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found)
|
||||
break;
|
||||
/* scan granted -> converting -> blocked queues */
|
||||
queue++;
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
if (!found) {
|
||||
status = DLM_IVLOCKID;
|
||||
goto not_found;
|
||||
}
|
||||
|
||||
/* lock was found on queue */
|
||||
lksb = lock->lksb;
|
||||
/* unlockast only called on originating node */
|
||||
if (flags & LKM_PUT_LVB) {
|
||||
lksb->flags |= DLM_LKSB_PUT_LVB;
|
||||
memcpy(&lksb->lvb[0], &unlock->lvb[0], DLM_LVB_LEN);
|
||||
}
|
||||
|
||||
/* if this is in-progress, propagate the DLM_FORWARD
|
||||
* all the way back out */
|
||||
status = dlmunlock_master(dlm, res, lock, lksb, flags, &ignore);
|
||||
if (status == DLM_FORWARD)
|
||||
mlog(0, "lockres is in progress\n");
|
||||
|
||||
if (flags & LKM_PUT_LVB)
|
||||
lksb->flags &= ~DLM_LKSB_PUT_LVB;
|
||||
|
||||
dlm_lockres_calc_usage(dlm, res);
|
||||
dlm_kick_thread(dlm, res);
|
||||
|
||||
not_found:
|
||||
if (!found)
|
||||
mlog(ML_ERROR, "failed to find lock to unlock! "
|
||||
"cookie=%"MLFu64"\n",
|
||||
unlock->cookie);
|
||||
else {
|
||||
/* send the lksb->status back to the other node */
|
||||
status = lksb->status;
|
||||
dlm_lock_put(lock);
|
||||
}
|
||||
|
||||
leave:
|
||||
if (res)
|
||||
dlm_lockres_put(res);
|
||||
|
||||
dlm_put(dlm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int *actions)
|
||||
{
|
||||
enum dlm_status status;
|
||||
|
||||
if (dlm_lock_on_list(&res->blocked, lock)) {
|
||||
/* cancel this outright */
|
||||
lksb->status = DLM_NORMAL;
|
||||
status = DLM_NORMAL;
|
||||
*actions = (DLM_UNLOCK_CALL_AST |
|
||||
DLM_UNLOCK_REMOVE_LOCK);
|
||||
} else if (dlm_lock_on_list(&res->converting, lock)) {
|
||||
/* cancel the request, put back on granted */
|
||||
lksb->status = DLM_NORMAL;
|
||||
status = DLM_NORMAL;
|
||||
*actions = (DLM_UNLOCK_CALL_AST |
|
||||
DLM_UNLOCK_REMOVE_LOCK |
|
||||
DLM_UNLOCK_REGRANT_LOCK |
|
||||
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
|
||||
} else if (dlm_lock_on_list(&res->granted, lock)) {
|
||||
/* too late, already granted. DLM_CANCELGRANT */
|
||||
lksb->status = DLM_CANCELGRANT;
|
||||
status = DLM_NORMAL;
|
||||
*actions = DLM_UNLOCK_CALL_AST;
|
||||
} else {
|
||||
mlog(ML_ERROR, "lock to cancel is not on any list!\n");
|
||||
lksb->status = DLM_IVLOCKID;
|
||||
status = DLM_IVLOCKID;
|
||||
*actions = 0;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res,
|
||||
struct dlm_lock *lock,
|
||||
struct dlm_lockstatus *lksb,
|
||||
int *actions)
|
||||
{
|
||||
enum dlm_status status;
|
||||
|
||||
/* unlock request */
|
||||
if (!dlm_lock_on_list(&res->granted, lock)) {
|
||||
lksb->status = DLM_DENIED;
|
||||
status = DLM_DENIED;
|
||||
dlm_error(status);
|
||||
*actions = 0;
|
||||
} else {
|
||||
/* unlock granted lock */
|
||||
lksb->status = DLM_NORMAL;
|
||||
status = DLM_NORMAL;
|
||||
*actions = (DLM_UNLOCK_FREE_LOCK |
|
||||
DLM_UNLOCK_CALL_AST |
|
||||
DLM_UNLOCK_REMOVE_LOCK);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/* there seems to be no point in doing this async
|
||||
* since (even for the remote case) there is really
|
||||
* no work to queue up... so just do it and fire the
|
||||
* unlockast by hand when done... */
|
||||
enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
|
||||
int flags, dlm_astunlockfunc_t *unlockast, void *data)
|
||||
{
|
||||
enum dlm_status status;
|
||||
struct dlm_lock_resource *res;
|
||||
struct dlm_lock *lock = NULL;
|
||||
int call_ast, is_master;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
if (!lksb) {
|
||||
dlm_error(DLM_BADARGS);
|
||||
return DLM_BADARGS;
|
||||
}
|
||||
|
||||
if (flags & ~(LKM_CANCEL | LKM_VALBLK | LKM_INVVALBLK)) {
|
||||
dlm_error(DLM_BADPARAM);
|
||||
return DLM_BADPARAM;
|
||||
}
|
||||
|
||||
if ((flags & (LKM_VALBLK | LKM_CANCEL)) == (LKM_VALBLK | LKM_CANCEL)) {
|
||||
mlog(0, "VALBLK given with CANCEL: ignoring VALBLK\n");
|
||||
flags &= ~LKM_VALBLK;
|
||||
}
|
||||
|
||||
if (!lksb->lockid || !lksb->lockid->lockres) {
|
||||
dlm_error(DLM_BADPARAM);
|
||||
return DLM_BADPARAM;
|
||||
}
|
||||
|
||||
lock = lksb->lockid;
|
||||
BUG_ON(!lock);
|
||||
dlm_lock_get(lock);
|
||||
|
||||
res = lock->lockres;
|
||||
BUG_ON(!res);
|
||||
dlm_lockres_get(res);
|
||||
retry:
|
||||
call_ast = 0;
|
||||
/* need to retry up here because owner may have changed */
|
||||
mlog(0, "lock=%p res=%p\n", lock, res);
|
||||
|
||||
spin_lock(&res->spinlock);
|
||||
is_master = (res->owner == dlm->node_num);
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
if (is_master) {
|
||||
status = dlmunlock_master(dlm, res, lock, lksb, flags,
|
||||
&call_ast);
|
||||
mlog(0, "done calling dlmunlock_master: returned %d, "
|
||||
"call_ast is %d\n", status, call_ast);
|
||||
} else {
|
||||
status = dlmunlock_remote(dlm, res, lock, lksb, flags,
|
||||
&call_ast);
|
||||
mlog(0, "done calling dlmunlock_remote: returned %d, "
|
||||
"call_ast is %d\n", status, call_ast);
|
||||
}
|
||||
|
||||
if (status == DLM_RECOVERING ||
|
||||
status == DLM_MIGRATING ||
|
||||
status == DLM_FORWARD) {
|
||||
/* We want to go away for a tiny bit to allow recovery
|
||||
* / migration to complete on this resource. I don't
|
||||
* know of any wait queue we could sleep on as this
|
||||
* may be happening on another node. Perhaps the
|
||||
* proper solution is to queue up requests on the
|
||||
* other end? */
|
||||
|
||||
/* do we want to yield(); ?? */
|
||||
msleep(50);
|
||||
|
||||
mlog(0, "retrying unlock due to pending recovery/"
|
||||
"migration/in-progress\n");
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (call_ast) {
|
||||
mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status);
|
||||
if (is_master) {
|
||||
/* it is possible that there is one last bast
|
||||
* pending. make sure it is flushed, then
|
||||
* call the unlockast.
|
||||
* not an issue if this is a mastered remotely,
|
||||
* since this lock has been removed from the
|
||||
* lockres queues and cannot be found. */
|
||||
dlm_kick_thread(dlm, NULL);
|
||||
wait_event(dlm->ast_wq,
|
||||
dlm_lock_basts_flushed(dlm, lock));
|
||||
}
|
||||
(*unlockast)(data, lksb->status);
|
||||
}
|
||||
|
||||
if (status == DLM_NORMAL) {
|
||||
mlog(0, "kicking the thread\n");
|
||||
dlm_kick_thread(dlm, res);
|
||||
} else
|
||||
dlm_error(status);
|
||||
|
||||
dlm_lockres_calc_usage(dlm, res);
|
||||
dlm_lockres_put(res);
|
||||
dlm_lock_put(lock);
|
||||
|
||||
mlog(0, "returning status=%d!\n", status);
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dlmunlock);
|
||||
|
42
fs/ocfs2/dlm/dlmver.c
Normal file
42
fs/ocfs2/dlm/dlmver.c
Normal file
@ -0,0 +1,42 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmver.c
|
||||
*
|
||||
* version string
|
||||
*
|
||||
* Copyright (C) 2002, 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "dlmver.h"
|
||||
|
||||
#define DLM_BUILD_VERSION "1.3.3"
|
||||
|
||||
#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
|
||||
|
||||
void dlm_print_version(void)
|
||||
{
|
||||
printk(KERN_INFO "%s\n", VERSION_STR);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION(VERSION_STR);
|
||||
|
||||
MODULE_VERSION(DLM_BUILD_VERSION);
|
31
fs/ocfs2/dlm/dlmver.h
Normal file
31
fs/ocfs2/dlm/dlmver.h
Normal file
@ -0,0 +1,31 @@
|
||||
/* -*- mode: c; c-basic-offset: 8; -*-
|
||||
* vim: noexpandtab sw=8 ts=8 sts=0:
|
||||
*
|
||||
* dlmfsver.h
|
||||
*
|
||||
* Function prototypes
|
||||
*
|
||||
* Copyright (C) 2005 Oracle. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef DLM_VER_H
|
||||
#define DLM_VER_H
|
||||
|
||||
void dlm_print_version(void);
|
||||
|
||||
#endif /* DLM_VER_H */
|
Loading…
Reference in New Issue
Block a user