mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 14:41:02 +07:00
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: ocfs2/cluster: Make o2net connect messages KERN_NOTICE ocfs2/dlm: Fix printing of lockname ocfs2: Fix contiguousness check in ocfs2_try_to_merge_extent_map() ocfs2/dlm: Remove BUG_ON in dlm recovery when freeing locks of a dead node ocfs2: Plugs race between the dc thread and an unlock ast message ocfs2: Remove overzealous BUG_ON during blocked lock processing ocfs2: Do not downconvert if the lock level is already compatible ocfs2: Prevent a livelock in dlmglue ocfs2: Fix setting of OCFS2_LOCK_BLOCKED during bast ocfs2: Use compat_ptr in reflink_arguments. ocfs2/dlm: Handle EAGAIN for compatibility - v2 ocfs2: Add parenthesis to wrap the check for O_DIRECT. ocfs2: Only bug out when page size is larger than cluster size. ocfs2: Fix memory overflow in cow_by_page. ocfs2/dlm: Print more messages during lock migration ocfs2/dlm: Ignore LVBs of locks in the Blocked list ocfs2/trivial: Remove trailing whitespaces ocfs2: fix a misleading variable name ocfs2: Sync max_inline_data_with_xattr from tools. ocfs2: Fix refcnt leak on ocfs2_fast_follow_link() error path
This commit is contained in:
commit
a5f28ae4df
@ -599,7 +599,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
|
||||
* particularly interested in the aio/dio case. Like the core uses
|
||||
* i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
|
||||
@ -670,7 +670,7 @@ static ssize_t ocfs2_direct_IO(int rw,
|
||||
|
||||
ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev, iov, offset,
|
||||
nr_segs,
|
||||
nr_segs,
|
||||
ocfs2_direct_IO_get_blocks,
|
||||
ocfs2_dio_end_io);
|
||||
|
||||
|
@ -368,7 +368,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
|
||||
}
|
||||
ocfs2_metadata_cache_io_unlock(ci);
|
||||
|
||||
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
|
||||
mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
|
||||
(unsigned long long)block, nr,
|
||||
((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
|
||||
flags);
|
||||
|
@ -78,7 +78,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
|
||||
|
||||
unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
|
||||
|
||||
/* Only sets a new threshold if there are no active regions.
|
||||
/* Only sets a new threshold if there are no active regions.
|
||||
*
|
||||
* No locking or otherwise interesting code is required for reading
|
||||
* o2hb_dead_threshold as it can't change once regions are active and
|
||||
@ -170,7 +170,7 @@ static void o2hb_write_timeout(struct work_struct *work)
|
||||
|
||||
mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
|
||||
"milliseconds\n", reg->hr_dev_name,
|
||||
jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
|
||||
jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
|
||||
o2quo_disk_timeout();
|
||||
}
|
||||
|
||||
@ -624,7 +624,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
|
||||
"seq %llu last %llu changed %u equal %u\n",
|
||||
slot->ds_node_num, (long long)slot->ds_last_generation,
|
||||
le32_to_cpu(hb_block->hb_cksum),
|
||||
(unsigned long long)le64_to_cpu(hb_block->hb_seq),
|
||||
(unsigned long long)le64_to_cpu(hb_block->hb_seq),
|
||||
(unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
|
||||
slot->ds_equal_samples);
|
||||
|
||||
|
@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
|
||||
}
|
||||
|
||||
if (was_valid && !valid) {
|
||||
printk(KERN_INFO "o2net: no longer connected to "
|
||||
printk(KERN_NOTICE "o2net: no longer connected to "
|
||||
SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
|
||||
o2net_complete_nodes_nsw(nn);
|
||||
}
|
||||
@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
|
||||
if (!was_valid && valid) {
|
||||
o2quo_conn_up(o2net_num_from_nn(nn));
|
||||
cancel_delayed_work(&nn->nn_connect_expired);
|
||||
printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
|
||||
printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n",
|
||||
o2nm_this_node() > sc->sc_node->nd_num ?
|
||||
"connected to" : "accepted connection from",
|
||||
SC_NODEF_ARGS(sc));
|
||||
@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
|
||||
cond_resched();
|
||||
continue;
|
||||
}
|
||||
mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
|
||||
mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
|
||||
" failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
|
||||
o2net_ensure_shutdown(nn, sc, 0);
|
||||
break;
|
||||
@ -1476,14 +1476,14 @@ static void o2net_idle_timer(unsigned long data)
|
||||
|
||||
do_gettimeofday(&now);
|
||||
|
||||
printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
|
||||
printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
|
||||
"seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
|
||||
o2net_idle_timeout() / 1000,
|
||||
o2net_idle_timeout() % 1000);
|
||||
mlog(ML_NOTICE, "here are some times that might help debug the "
|
||||
"situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
|
||||
"%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
|
||||
sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
|
||||
sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
|
||||
now.tv_sec, (long) now.tv_usec,
|
||||
sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
|
||||
sc->sc_tv_advance_start.tv_sec,
|
||||
|
@ -32,10 +32,10 @@
|
||||
* on their number */
|
||||
#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
|
||||
|
||||
/*
|
||||
/*
|
||||
* This version number represents quite a lot, unfortunately. It not
|
||||
* only represents the raw network message protocol on the wire but also
|
||||
* locking semantics of the file system using the protocol. It should
|
||||
* locking semantics of the file system using the protocol. It should
|
||||
* be somewhere else, I'm sure, but right now it isn't.
|
||||
*
|
||||
* With version 11, we separate out the filesystem locking portion. The
|
||||
|
@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err);
|
||||
mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \
|
||||
} while (0)
|
||||
|
||||
#define DLM_LKSB_UNUSED1 0x01
|
||||
#define DLM_LKSB_UNUSED1 0x01
|
||||
#define DLM_LKSB_PUT_LVB 0x02
|
||||
#define DLM_LKSB_GET_LVB 0x04
|
||||
#define DLM_LKSB_UNUSED2 0x08
|
||||
|
@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
|
||||
dlm_lock_put(lock);
|
||||
/* free up the reserved bast that we are cancelling.
|
||||
* guaranteed that this will not be the last reserved
|
||||
* ast because *both* an ast and a bast were reserved
|
||||
* ast because *both* an ast and a bast were reserved
|
||||
* to get to this point. the res->spinlock will not be
|
||||
* taken here */
|
||||
dlm_lockres_release_ast(dlm, res);
|
||||
|
@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
|
||||
/* instead of logging the same network error over
|
||||
* and over, sleep here and wait for the heartbeat
|
||||
* to notice the node is dead. times out after 5s. */
|
||||
dlm_wait_for_node_death(dlm, res->owner,
|
||||
dlm_wait_for_node_death(dlm, res->owner,
|
||||
DLM_NODE_DEATH_WAIT_MAX);
|
||||
ret = DLM_RECOVERING;
|
||||
mlog(0, "node %u died so returning DLM_RECOVERING "
|
||||
|
@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
stringify_lockname(res->lockname.name, res->lockname.len,
|
||||
buf, sizeof(buf) - 1);
|
||||
buf, sizeof(buf));
|
||||
printk("lockres: %s, owner=%u, state=%u\n",
|
||||
buf, res->owner, res->state);
|
||||
printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
|
||||
|
@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
}
|
||||
|
||||
/* Once the dlm ctxt is marked as leaving then we don't want
|
||||
* to be put in someone's domain map.
|
||||
* to be put in someone's domain map.
|
||||
* Also, explicitly disallow joining at certain troublesome
|
||||
* times (ie. during recovery). */
|
||||
if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
|
||||
|
@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
|
||||
}
|
||||
dlm_revert_pending_lock(res, lock);
|
||||
dlm_lock_put(lock);
|
||||
} else if (dlm_is_recovery_lock(res->lockname.name,
|
||||
} else if (dlm_is_recovery_lock(res->lockname.name,
|
||||
res->lockname.len)) {
|
||||
/* special case for the $RECOVERY lock.
|
||||
* there will never be an AST delivered to put
|
||||
|
@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
|
||||
struct dlm_master_list_entry *mle;
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
|
||||
|
||||
list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
|
||||
if (node_up)
|
||||
dlm_mle_node_up(dlm, mle, NULL, idx);
|
||||
@ -833,7 +833,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
|
||||
__dlm_insert_mle(dlm, mle);
|
||||
|
||||
/* still holding the dlm spinlock, check the recovery map
|
||||
* to see if there are any nodes that still need to be
|
||||
* to see if there are any nodes that still need to be
|
||||
* considered. these will not appear in the mle nodemap
|
||||
* but they might own this lockres. wait on them. */
|
||||
bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
|
||||
@ -883,7 +883,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
|
||||
msleep(500);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
dlm_kick_recovery_thread(dlm);
|
||||
msleep(1000);
|
||||
@ -939,8 +939,8 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
|
||||
res->lockname.name, blocked);
|
||||
if (++tries > 20) {
|
||||
mlog(ML_ERROR, "%s:%.*s: spinning on "
|
||||
"dlm_wait_for_lock_mastery, blocked=%d\n",
|
||||
dlm->name, res->lockname.len,
|
||||
"dlm_wait_for_lock_mastery, blocked=%d\n",
|
||||
dlm->name, res->lockname.len,
|
||||
res->lockname.name, blocked);
|
||||
dlm_print_one_lock_resource(res);
|
||||
dlm_print_one_mle(mle);
|
||||
@ -1029,7 +1029,7 @@ static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
|
||||
ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
|
||||
b = (mle->type == DLM_MLE_BLOCK);
|
||||
if ((*blocked && !b) || (!*blocked && b)) {
|
||||
mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
|
||||
mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name,
|
||||
*blocked, b);
|
||||
*blocked = b;
|
||||
@ -1602,7 +1602,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
}
|
||||
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
|
||||
dlm->node_num, res->lockname.len, res->lockname.name);
|
||||
ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
|
||||
ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
|
||||
DLM_ASSERT_MASTER_MLE_CLEANUP);
|
||||
if (ret < 0) {
|
||||
mlog(ML_ERROR, "failed to dispatch assert master work\n");
|
||||
@ -1701,7 +1701,7 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm,
|
||||
|
||||
if (r & DLM_ASSERT_RESPONSE_REASSERT) {
|
||||
mlog(0, "%.*s: node %u create mles on other "
|
||||
"nodes and requests a re-assert\n",
|
||||
"nodes and requests a re-assert\n",
|
||||
namelen, lockname, to);
|
||||
reassert = 1;
|
||||
}
|
||||
@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
spin_unlock(&dlm->master_lock);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock(&dlm->master_lock);
|
||||
@ -1883,7 +1883,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
int extra_ref = 0;
|
||||
int nn = -1;
|
||||
int rr, err = 0;
|
||||
|
||||
|
||||
spin_lock(&mle->spinlock);
|
||||
if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
|
||||
extra_ref = 1;
|
||||
@ -1891,7 +1891,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
/* MASTER mle: if any bits set in the response map
|
||||
* then the calling node needs to re-assert to clear
|
||||
* up nodes that this node contacted */
|
||||
while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
|
||||
while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
|
||||
nn+1)) < O2NM_MAX_NODES) {
|
||||
if (nn != dlm->node_num && nn != assert->node_idx)
|
||||
master_request = 1;
|
||||
@ -2002,7 +2002,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
__dlm_print_one_lock_resource(res);
|
||||
spin_unlock(&res->spinlock);
|
||||
spin_unlock(&dlm->spinlock);
|
||||
*ret_data = (void *)res;
|
||||
*ret_data = (void *)res;
|
||||
dlm_put(dlm);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
|
||||
item->u.am.request_from = request_from;
|
||||
item->u.am.flags = flags;
|
||||
|
||||
if (ignore_higher)
|
||||
mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
|
||||
if (ignore_higher)
|
||||
mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
|
||||
res->lockname.name);
|
||||
|
||||
|
||||
spin_lock(&dlm->work_lock);
|
||||
list_add_tail(&item->list, &dlm->work_list);
|
||||
spin_unlock(&dlm->work_lock);
|
||||
@ -2133,7 +2133,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
|
||||
* think that $RECOVERY is currently mastered by a dead node. If so,
|
||||
* we wait a short time to allow that node to get notified by its own
|
||||
* heartbeat stack, then check again. All $RECOVERY lock resources
|
||||
* mastered by dead nodes are purged when the hearbeat callback is
|
||||
* mastered by dead nodes are purged when the hearbeat callback is
|
||||
* fired, so we can know for sure that it is safe to continue once
|
||||
* the node returns a live node or no node. */
|
||||
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
|
||||
@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
spin_unlock(&dlm->spinlock);
|
||||
mlog(0, "%s: reco lock master is %u\n", dlm->name,
|
||||
mlog(0, "%s: reco lock master is %u\n", dlm->name,
|
||||
master);
|
||||
break;
|
||||
}
|
||||
@ -2602,7 +2602,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
|
||||
|
||||
mlog(0, "%s:%.*s: timed out during migration\n",
|
||||
dlm->name, res->lockname.len, res->lockname.name);
|
||||
/* avoid hang during shutdown when migrating lockres
|
||||
/* avoid hang during shutdown when migrating lockres
|
||||
* to a node which also goes down */
|
||||
if (dlm_is_node_dead(dlm, target)) {
|
||||
mlog(0, "%s:%.*s: expected migration "
|
||||
@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
|
||||
can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
|
||||
spin_unlock(&res->spinlock);
|
||||
|
||||
/* target has died, so make the caller break out of the
|
||||
/* target has died, so make the caller break out of the
|
||||
* wait_event, but caller must recheck the domain_map */
|
||||
spin_lock(&dlm->spinlock);
|
||||
if (!test_bit(mig_target, dlm->domain_map))
|
||||
|
@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
|
||||
if (lock->ml.node == dead_node) {
|
||||
mlog(0, "AHA! there was "
|
||||
"a $RECOVERY lock for dead "
|
||||
"node %u (%s)!\n",
|
||||
"node %u (%s)!\n",
|
||||
dead_node, dlm->name);
|
||||
list_del_init(&lock->list);
|
||||
dlm_lock_put(lock);
|
||||
@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
|
||||
mres->master = master;
|
||||
}
|
||||
|
||||
static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
|
||||
struct dlm_migratable_lockres *mres,
|
||||
int queue)
|
||||
{
|
||||
if (!lock->lksb)
|
||||
return;
|
||||
|
||||
/* Ignore lvb in all locks in the blocked list */
|
||||
if (queue == DLM_BLOCKED_LIST)
|
||||
return;
|
||||
|
||||
/* Only consider lvbs in locks with granted EX or PR lock levels */
|
||||
if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
|
||||
return;
|
||||
|
||||
if (dlm_lvb_is_empty(mres->lvb)) {
|
||||
memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Ensure the lvb copied for migration matches in other valid locks */
|
||||
if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
|
||||
return;
|
||||
|
||||
mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
|
||||
"node=%u\n",
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
|
||||
lock->lockres->lockname.len, lock->lockres->lockname.name,
|
||||
lock->ml.node);
|
||||
dlm_print_one_lock_resource(lock->lockres);
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* returns 1 if this lock fills the network structure,
|
||||
* 0 otherwise */
|
||||
@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
|
||||
ml->list = queue;
|
||||
if (lock->lksb) {
|
||||
ml->flags = lock->lksb->flags;
|
||||
/* send our current lvb */
|
||||
if (ml->type == LKM_EXMODE ||
|
||||
ml->type == LKM_PRMODE) {
|
||||
/* if it is already set, this had better be a PR
|
||||
* and it has to match */
|
||||
if (!dlm_lvb_is_empty(mres->lvb) &&
|
||||
(ml->type == LKM_EXMODE ||
|
||||
memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
|
||||
mlog(ML_ERROR, "mismatched lvbs!\n");
|
||||
dlm_print_one_lock_resource(lock->lockres);
|
||||
BUG();
|
||||
}
|
||||
memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
|
||||
}
|
||||
dlm_prepare_lvb_for_migration(lock, mres, queue);
|
||||
}
|
||||
ml->node = lock->ml.node;
|
||||
mres->num_locks++;
|
||||
@ -1730,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock *lock = NULL;
|
||||
u8 from = O2NM_MAX_NODES;
|
||||
unsigned int added = 0;
|
||||
__be64 c;
|
||||
|
||||
mlog(0, "running %d locks for this lockres\n", mres->num_locks);
|
||||
for (i=0; i<mres->num_locks; i++) {
|
||||
@ -1777,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
||||
/* lock is always created locally first, and
|
||||
* destroyed locally last. it must be on the list */
|
||||
if (!lock) {
|
||||
__be64 c = ml->cookie;
|
||||
mlog(ML_ERROR, "could not find local lock "
|
||||
"with cookie %u:%llu!\n",
|
||||
c = ml->cookie;
|
||||
mlog(ML_ERROR, "Could not find local lock "
|
||||
"with cookie %u:%llu, node %u, "
|
||||
"list %u, flags 0x%x, type %d, "
|
||||
"conv %d, highest blocked %d\n",
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(c)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(c)));
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(c)),
|
||||
ml->node, ml->list, ml->flags, ml->type,
|
||||
ml->convert_type, ml->highest_blocked);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (lock->ml.node != ml->node) {
|
||||
c = lock->ml.cookie;
|
||||
mlog(ML_ERROR, "Mismatched node# in lock "
|
||||
"cookie %u:%llu, name %.*s, node %u\n",
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(c)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(c)),
|
||||
res->lockname.len, res->lockname.name,
|
||||
lock->ml.node);
|
||||
c = ml->cookie;
|
||||
mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
|
||||
"node %u, list %u, flags 0x%x, type %d, "
|
||||
"conv %d, highest blocked %d\n",
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(c)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(c)),
|
||||
ml->node, ml->list, ml->flags, ml->type,
|
||||
ml->convert_type, ml->highest_blocked);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
BUG();
|
||||
}
|
||||
BUG_ON(lock->ml.node != ml->node);
|
||||
|
||||
if (tmpq != queue) {
|
||||
mlog(0, "lock was on %u instead of %u for %.*s\n",
|
||||
j, ml->list, res->lockname.len, res->lockname.name);
|
||||
c = ml->cookie;
|
||||
mlog(0, "Lock cookie %u:%llu was on list %u "
|
||||
"instead of list %u for %.*s\n",
|
||||
dlm_get_lock_cookie_node(be64_to_cpu(c)),
|
||||
dlm_get_lock_cookie_seq(be64_to_cpu(c)),
|
||||
j, ml->list, res->lockname.len,
|
||||
res->lockname.name);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
spin_unlock(&res->spinlock);
|
||||
continue;
|
||||
}
|
||||
@ -1839,7 +1889,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
||||
* the lvb. */
|
||||
memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
|
||||
} else {
|
||||
/* otherwise, the node is sending its
|
||||
/* otherwise, the node is sending its
|
||||
* most recent valid lvb info */
|
||||
BUG_ON(ml->type != LKM_EXMODE &&
|
||||
ml->type != LKM_PRMODE);
|
||||
@ -1886,7 +1936,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
||||
spin_lock(&res->spinlock);
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
if (lock->ml.cookie == ml->cookie) {
|
||||
__be64 c = lock->ml.cookie;
|
||||
c = lock->ml.cookie;
|
||||
mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
|
||||
"exists on this lockres!\n", dlm->name,
|
||||
res->lockname.len, res->lockname.name,
|
||||
@ -2114,7 +2164,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
if (res->owner == dlm->node_num)
|
||||
/* if this node owned the lockres, and if the dead node
|
||||
/* if this node owned the lockres, and if the dead node
|
||||
* had an EX when he died, blank out the lvb */
|
||||
search_node = dead_node;
|
||||
else {
|
||||
@ -2152,7 +2202,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
|
||||
|
||||
/* this node is the lockres master:
|
||||
* 1) remove any stale locks for the dead node
|
||||
* 2) if the dead node had an EX when he died, blank out the lvb
|
||||
* 2) if the dead node had an EX when he died, blank out the lvb
|
||||
*/
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
assert_spin_locked(&res->spinlock);
|
||||
@ -2193,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
|
||||
mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
|
||||
"dropping ref from lockres\n", dlm->name,
|
||||
res->lockname.len, res->lockname.name, freed, dead_node);
|
||||
BUG_ON(!test_bit(dead_node, res->refmap));
|
||||
if(!test_bit(dead_node, res->refmap)) {
|
||||
mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
|
||||
"but ref was not set\n", dlm->name,
|
||||
res->lockname.len, res->lockname.name, freed, dead_node);
|
||||
__dlm_print_one_lock_resource(res);
|
||||
}
|
||||
dlm_lockres_clear_refmap_bit(dead_node, res);
|
||||
} else if (test_bit(dead_node, res->refmap)) {
|
||||
mlog(0, "%s:%.*s: dead node %u had a ref, but had "
|
||||
@ -2260,7 +2315,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
|
||||
}
|
||||
spin_unlock(&res->spinlock);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
spin_lock(&res->spinlock);
|
||||
/* zero the lvb if necessary */
|
||||
dlm_revalidate_lvb(dlm, res, dead_node);
|
||||
@ -2411,7 +2466,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
|
||||
* this function on each node racing to become the recovery
|
||||
* master will not stop attempting this until either:
|
||||
* a) this node gets the EX (and becomes the recovery master),
|
||||
* or b) dlm->reco.new_master gets set to some nodenum
|
||||
* or b) dlm->reco.new_master gets set to some nodenum
|
||||
* != O2NM_INVALID_NODE_NUM (another node will do the reco).
|
||||
* so each time a recovery master is needed, the entire cluster
|
||||
* will sync at this point. if the new master dies, that will
|
||||
@ -2424,7 +2479,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
|
||||
|
||||
mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
|
||||
dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
|
||||
again:
|
||||
again:
|
||||
memset(&lksb, 0, sizeof(lksb));
|
||||
|
||||
ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
|
||||
@ -2437,8 +2492,8 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
|
||||
if (ret == DLM_NORMAL) {
|
||||
mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
|
||||
dlm->name, dlm->node_num);
|
||||
|
||||
/* got the EX lock. check to see if another node
|
||||
|
||||
/* got the EX lock. check to see if another node
|
||||
* just became the reco master */
|
||||
if (dlm_reco_master_ready(dlm)) {
|
||||
mlog(0, "%s: got reco EX lock, but %u will "
|
||||
@ -2451,12 +2506,12 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
|
||||
/* see if recovery was already finished elsewhere */
|
||||
spin_lock(&dlm->spinlock);
|
||||
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
|
||||
status = -EINVAL;
|
||||
status = -EINVAL;
|
||||
mlog(0, "%s: got reco EX lock, but "
|
||||
"node got recovered already\n", dlm->name);
|
||||
if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
|
||||
mlog(ML_ERROR, "%s: new master is %u "
|
||||
"but no dead node!\n",
|
||||
"but no dead node!\n",
|
||||
dlm->name, dlm->reco.new_master);
|
||||
BUG();
|
||||
}
|
||||
@ -2468,7 +2523,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
|
||||
* set the master and send the messages to begin recovery */
|
||||
if (!status) {
|
||||
mlog(0, "%s: dead=%u, this=%u, sending "
|
||||
"begin_reco now\n", dlm->name,
|
||||
"begin_reco now\n", dlm->name,
|
||||
dlm->reco.dead_node, dlm->node_num);
|
||||
status = dlm_send_begin_reco_message(dlm,
|
||||
dlm->reco.dead_node);
|
||||
@ -2501,7 +2556,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
|
||||
mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
|
||||
dlm->name, dlm->node_num);
|
||||
/* another node is master. wait on
|
||||
* reco.new_master != O2NM_INVALID_NODE_NUM
|
||||
* reco.new_master != O2NM_INVALID_NODE_NUM
|
||||
* for at most one second */
|
||||
wait_event_timeout(dlm->dlm_reco_thread_wq,
|
||||
dlm_reco_master_ready(dlm),
|
||||
@ -2589,7 +2644,13 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
|
||||
"begin reco msg (%d)\n", dlm->name, nodenum, ret);
|
||||
ret = 0;
|
||||
}
|
||||
if (ret == -EAGAIN) {
|
||||
|
||||
/*
|
||||
* Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
|
||||
* dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
|
||||
* We are handling both for compatibility reasons.
|
||||
*/
|
||||
if (ret == -EAGAIN || ret == EAGAIN) {
|
||||
mlog(0, "%s: trying to start recovery of node "
|
||||
"%u, but node %u is waiting for last recovery "
|
||||
"to complete, backoff for a bit\n", dlm->name,
|
||||
@ -2599,7 +2660,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
|
||||
}
|
||||
if (ret < 0) {
|
||||
struct dlm_lock_resource *res;
|
||||
/* this is now a serious problem, possibly ENOMEM
|
||||
/* this is now a serious problem, possibly ENOMEM
|
||||
* in the network stack. must retry */
|
||||
mlog_errno(ret);
|
||||
mlog(ML_ERROR, "begin reco of dlm %s to node %u "
|
||||
@ -2612,7 +2673,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
|
||||
} else {
|
||||
mlog(ML_ERROR, "recovery lock not found\n");
|
||||
}
|
||||
/* sleep for a bit in hopes that we can avoid
|
||||
/* sleep for a bit in hopes that we can avoid
|
||||
* another ENOMEM */
|
||||
msleep(100);
|
||||
goto retry;
|
||||
@ -2664,7 +2725,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
}
|
||||
if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
|
||||
mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
|
||||
"node %u changing it to %u\n", dlm->name,
|
||||
"node %u changing it to %u\n", dlm->name,
|
||||
dlm->reco.dead_node, br->node_idx, br->dead_node);
|
||||
}
|
||||
dlm_set_reco_master(dlm, br->node_idx);
|
||||
@ -2730,8 +2791,8 @@ static int dlm_send_finalize_reco_message(struct dlm_ctxt *dlm)
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
if (dlm_is_host_down(ret)) {
|
||||
/* this has no effect on this recovery
|
||||
* session, so set the status to zero to
|
||||
/* this has no effect on this recovery
|
||||
* session, so set the status to zero to
|
||||
* finish out the last recovery */
|
||||
mlog(ML_ERROR, "node %u went down after this "
|
||||
"node finished recovery.\n", nodenum);
|
||||
@ -2768,7 +2829,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
|
||||
mlog(0, "%s: node %u finalizing recovery stage%d of "
|
||||
"node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
|
||||
fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
|
||||
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
|
||||
if (dlm->reco.new_master != fr->node_idx) {
|
||||
|
@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
|
||||
actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
|
||||
DLM_UNLOCK_REGRANT_LOCK|
|
||||
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
|
||||
} else if (status == DLM_RECOVERING ||
|
||||
status == DLM_MIGRATING ||
|
||||
} else if (status == DLM_RECOVERING ||
|
||||
status == DLM_MIGRATING ||
|
||||
status == DLM_FORWARD) {
|
||||
/* must clear the actions because this unlock
|
||||
* is about to be retried. cannot free or do
|
||||
@ -661,14 +661,14 @@ enum dlm_status dlmunlock(struct dlm_ctxt *dlm, struct dlm_lockstatus *lksb,
|
||||
if (call_ast) {
|
||||
mlog(0, "calling unlockast(%p, %d)\n", data, status);
|
||||
if (is_master) {
|
||||
/* it is possible that there is one last bast
|
||||
/* it is possible that there is one last bast
|
||||
* pending. make sure it is flushed, then
|
||||
* call the unlockast.
|
||||
* not an issue if this is a mastered remotely,
|
||||
* since this lock has been removed from the
|
||||
* lockres queues and cannot be found. */
|
||||
dlm_kick_thread(dlm, NULL);
|
||||
wait_event(dlm->ast_wq,
|
||||
wait_event(dlm->ast_wq,
|
||||
dlm_lock_basts_flushed(dlm, lock));
|
||||
}
|
||||
(*unlockast)(data, status);
|
||||
|
@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
|
||||
lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
|
||||
|
||||
lockres->l_level = lockres->l_requested;
|
||||
|
||||
/*
|
||||
* We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
|
||||
* the OCFS2_LOCK_BUSY flag to prevent the dc thread from
|
||||
* downconverting the lock before the upconvert has fully completed.
|
||||
*/
|
||||
lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
|
||||
|
||||
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
|
||||
|
||||
mlog_exit_void();
|
||||
@ -907,8 +915,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
|
||||
|
||||
assert_spin_locked(&lockres->l_lock);
|
||||
|
||||
lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
|
||||
|
||||
if (level > lockres->l_blocking) {
|
||||
/* only schedule a downconvert if we haven't already scheduled
|
||||
* one that goes low enough to satisfy the level we're
|
||||
@ -921,6 +927,9 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
|
||||
lockres->l_blocking = level;
|
||||
}
|
||||
|
||||
if (needs_downconvert)
|
||||
lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
|
||||
|
||||
mlog_exit(needs_downconvert);
|
||||
return needs_downconvert;
|
||||
}
|
||||
@ -1133,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
|
||||
mlog_entry_void();
|
||||
spin_lock_irqsave(&lockres->l_lock, flags);
|
||||
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
|
||||
lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
|
||||
if (convert)
|
||||
lockres->l_action = OCFS2_AST_INVALID;
|
||||
else
|
||||
@ -1323,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
|
||||
again:
|
||||
wait = 0;
|
||||
|
||||
spin_lock_irqsave(&lockres->l_lock, flags);
|
||||
|
||||
if (catch_signals && signal_pending(current)) {
|
||||
ret = -ERESTARTSYS;
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&lockres->l_lock, flags);
|
||||
|
||||
mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
|
||||
"Cluster lock called on freeing lockres %s! flags "
|
||||
"0x%lx\n", lockres->l_name, lockres->l_flags);
|
||||
@ -1346,6 +1356,25 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
|
||||
/*
|
||||
* We've upconverted. If the lock now has a level we can
|
||||
* work with, we take it. If, however, the lock is not at the
|
||||
* required level, we go thru the full cycle. One way this could
|
||||
* happen is if a process requesting an upconvert to PR is
|
||||
* closely followed by another requesting upconvert to an EX.
|
||||
* If the process requesting EX lands here, we want it to
|
||||
* continue attempting to upconvert and let the process
|
||||
* requesting PR take the lock.
|
||||
* If multiple processes request upconvert to PR, the first one
|
||||
* here will take the lock. The others will have to go thru the
|
||||
* OCFS2_LOCK_BLOCKED check to ensure that there is no pending
|
||||
* downconvert request.
|
||||
*/
|
||||
if (level <= lockres->l_level)
|
||||
goto update_holders;
|
||||
}
|
||||
|
||||
if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
|
||||
!ocfs2_may_continue_on_blocked_lock(lockres, level)) {
|
||||
/* is the lock is currently blocked on behalf of
|
||||
@ -1416,11 +1445,14 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
|
||||
goto again;
|
||||
}
|
||||
|
||||
update_holders:
|
||||
/* Ok, if we get here then we're good to go. */
|
||||
ocfs2_inc_holders(lockres, level);
|
||||
|
||||
ret = 0;
|
||||
unlock:
|
||||
lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
|
||||
|
||||
spin_unlock_irqrestore(&lockres->l_lock, flags);
|
||||
out:
|
||||
/*
|
||||
@ -3155,7 +3187,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
|
||||
/* Mark the lockres as being dropped. It will no longer be
|
||||
* queued if blocking, but we still may have to wait on it
|
||||
* being dequeued from the downconvert thread before we can consider
|
||||
* it safe to drop.
|
||||
* it safe to drop.
|
||||
*
|
||||
* You can *not* attempt to call cluster_lock on this lockres anymore. */
|
||||
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
|
||||
@ -3352,6 +3384,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
|
||||
unsigned long flags;
|
||||
int blocking;
|
||||
int new_level;
|
||||
int level;
|
||||
int ret = 0;
|
||||
int set_lvb = 0;
|
||||
unsigned int gen;
|
||||
@ -3360,9 +3393,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
|
||||
|
||||
spin_lock_irqsave(&lockres->l_lock, flags);
|
||||
|
||||
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
|
||||
|
||||
recheck:
|
||||
/*
|
||||
* Is it still blocking? If not, we have no more work to do.
|
||||
*/
|
||||
if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
|
||||
BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
|
||||
spin_unlock_irqrestore(&lockres->l_lock, flags);
|
||||
ret = 0;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
if (lockres->l_flags & OCFS2_LOCK_BUSY) {
|
||||
/* XXX
|
||||
* This is a *big* race. The OCFS2_LOCK_PENDING flag
|
||||
@ -3401,6 +3442,31 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/*
|
||||
* This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
|
||||
* set when the ast is received for an upconvert just before the
|
||||
* OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
|
||||
* on the heels of the ast, we want to delay the downconvert just
|
||||
* enough to allow the up requestor to do its task. Because this
|
||||
* lock is in the blocked queue, the lock will be downconverted
|
||||
* as soon as the requestor is done with the lock.
|
||||
*/
|
||||
if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
|
||||
goto leave_requeue;
|
||||
|
||||
/*
|
||||
* How can we block and yet be at NL? We were trying to upconvert
|
||||
* from NL and got canceled. The code comes back here, and now
|
||||
* we notice and clear BLOCKING.
|
||||
*/
|
||||
if (lockres->l_level == DLM_LOCK_NL) {
|
||||
BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
|
||||
lockres->l_blocking = DLM_LOCK_NL;
|
||||
lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
|
||||
spin_unlock_irqrestore(&lockres->l_lock, flags);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* if we're blocking an exclusive and we have *any* holders,
|
||||
* then requeue. */
|
||||
if ((lockres->l_blocking == DLM_LOCK_EX)
|
||||
@ -3438,6 +3504,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
|
||||
* may sleep, so we save off a copy of what we're blocking as
|
||||
* it may change while we're not holding the spin lock. */
|
||||
blocking = lockres->l_blocking;
|
||||
level = lockres->l_level;
|
||||
spin_unlock_irqrestore(&lockres->l_lock, flags);
|
||||
|
||||
ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
|
||||
@ -3446,7 +3513,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
|
||||
goto leave;
|
||||
|
||||
spin_lock_irqsave(&lockres->l_lock, flags);
|
||||
if (blocking != lockres->l_blocking) {
|
||||
if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
|
||||
/* If this changed underneath us, then we can't drop
|
||||
* it just yet. */
|
||||
goto recheck;
|
||||
|
@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
|
||||
mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
|
||||
(unsigned long long)blkno, generation);
|
||||
}
|
||||
|
||||
|
||||
*max_len = len;
|
||||
|
||||
bail:
|
||||
|
@ -192,7 +192,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
|
||||
emi->ei_clusters += ins->ei_clusters;
|
||||
return 1;
|
||||
} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
|
||||
(ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
|
||||
(ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
|
||||
ins->ei_flags == emi->ei_flags) {
|
||||
emi->ei_phys = ins->ei_phys;
|
||||
emi->ei_cpos = ins->ei_cpos;
|
||||
|
@ -749,7 +749,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
|
||||
int ret;
|
||||
|
||||
offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
|
||||
/* ugh. in prepare/commit_write, if from==to==start of block, we
|
||||
/* ugh. in prepare/commit_write, if from==to==start of block, we
|
||||
** skip the prepare. make sure we never send an offset for the start
|
||||
** of a block
|
||||
*/
|
||||
@ -1779,7 +1779,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
|
||||
struct inode *inode = dentry->d_inode;
|
||||
loff_t saved_pos, end;
|
||||
|
||||
/*
|
||||
/*
|
||||
* We start with a read level meta lock and only jump to an ex
|
||||
* if we need to make modifications here.
|
||||
*/
|
||||
@ -2013,8 +2013,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
|
||||
/* buffered aio wouldn't have proper lock coverage today */
|
||||
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
|
||||
|
||||
if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
|
||||
(file->f_flags & O_DIRECT && has_refcount)) {
|
||||
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
|
||||
((file->f_flags & O_DIRECT) && has_refcount)) {
|
||||
ret = filemap_fdatawrite_range(file->f_mapping, pos,
|
||||
pos + count - 1);
|
||||
if (ret < 0)
|
||||
@ -2033,7 +2033,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
|
||||
pos + count - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
|
||||
* function pointer which is called when o_direct io completes so that
|
||||
* it can unlock our rw lock. (it's the clustered equivalent of
|
||||
@ -2198,7 +2198,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* buffered reads protect themselves in ->readpage(). O_DIRECT reads
|
||||
* need locks to protect pending reads from racing with truncate.
|
||||
*/
|
||||
@ -2220,10 +2220,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
|
||||
* We're fine letting folks race truncates and extending
|
||||
* writes with read across the cluster, just like they can
|
||||
* locally. Hence no rw_lock during read.
|
||||
*
|
||||
*
|
||||
* Take and drop the meta data lock to update inode fields
|
||||
* like i_size. This allows the checks down below
|
||||
* generic_file_aio_read() a chance of actually working.
|
||||
* generic_file_aio_read() a chance of actually working.
|
||||
*/
|
||||
ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
|
||||
if (ret < 0) {
|
||||
@ -2248,7 +2248,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
|
||||
bail:
|
||||
if (have_alloc_sem)
|
||||
up_read(&inode->i_alloc_sem);
|
||||
if (rw_level != -1)
|
||||
if (rw_level != -1)
|
||||
ocfs2_rw_unlock(inode, rw_level);
|
||||
mlog_exit(ret);
|
||||
|
||||
|
@ -475,7 +475,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
|
||||
if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
|
||||
status = ocfs2_try_open_lock(inode, 0);
|
||||
if (status) {
|
||||
make_bad_inode(inode);
|
||||
make_bad_inode(inode);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
@ -684,7 +684,7 @@ static int ocfs2_remove_inode(struct inode *inode,
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* Serialize with orphan dir recovery. If the process doing
|
||||
* recovery on this orphan dir does an iget() with the dir
|
||||
* i_mutex held, we'll deadlock here. Instead we detect this
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#define MLOG_MASK_PREFIX ML_INODE
|
||||
#include <cluster/masklog.h>
|
||||
@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
#ifdef CONFIG_COMPAT
|
||||
long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
{
|
||||
bool preserve;
|
||||
struct reflink_arguments args;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
|
||||
switch (cmd) {
|
||||
case OCFS2_IOC32_GETFLAGS:
|
||||
cmd = OCFS2_IOC_GETFLAGS;
|
||||
@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
||||
case OCFS2_IOC_GROUP_EXTEND:
|
||||
case OCFS2_IOC_GROUP_ADD:
|
||||
case OCFS2_IOC_GROUP_ADD64:
|
||||
case OCFS2_IOC_REFLINK:
|
||||
break;
|
||||
case OCFS2_IOC_REFLINK:
|
||||
if (copy_from_user(&args, (struct reflink_arguments *)arg,
|
||||
sizeof(args)))
|
||||
return -EFAULT;
|
||||
preserve = (args.preserve != 0);
|
||||
|
||||
return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
|
||||
compat_ptr(args.new_path), preserve);
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
|
||||
status = -ENOENT;
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&orphan_dir_inode->i_mutex);
|
||||
status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
|
||||
|
@ -136,6 +136,10 @@ enum ocfs2_unlock_action {
|
||||
#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
|
||||
call to dlm_lock. Only
|
||||
exists with BUSY set. */
|
||||
#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
|
||||
* from downconverting
|
||||
* before the upconvert
|
||||
* has completed */
|
||||
|
||||
struct ocfs2_lock_res_ops;
|
||||
|
||||
|
@ -1417,9 +1417,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
|
||||
return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
|
||||
}
|
||||
|
||||
static inline int ocfs2_max_inline_data(int blocksize)
|
||||
static inline int ocfs2_max_inline_data_with_xattr(int blocksize,
|
||||
struct ocfs2_dinode *di)
|
||||
{
|
||||
return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
|
||||
if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL))
|
||||
return blocksize -
|
||||
offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
|
||||
di->i_xattr_inline_size;
|
||||
else
|
||||
return blocksize -
|
||||
offsetof(struct ocfs2_dinode, id2.i_data.id_data);
|
||||
}
|
||||
|
||||
static inline int ocfs2_extent_recs_per_inode(int blocksize)
|
||||
|
@ -2945,7 +2945,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
||||
|
||||
while (offset < end) {
|
||||
page_index = offset >> PAGE_CACHE_SHIFT;
|
||||
map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
|
||||
map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
|
||||
if (map_end > end)
|
||||
map_end = end;
|
||||
|
||||
@ -2957,8 +2957,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
|
||||
|
||||
page = grab_cache_page(mapping, page_index);
|
||||
|
||||
/* This page can't be dirtied before we CoW it out. */
|
||||
BUG_ON(PageDirty(page));
|
||||
/*
|
||||
* In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
|
||||
* can't be dirtied before we CoW it out.
|
||||
*/
|
||||
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
|
||||
BUG_ON(PageDirty(page));
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
ret = block_read_full_page(page, ocfs2_get_block);
|
||||
@ -3170,7 +3174,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
|
||||
|
||||
while (offset < end) {
|
||||
page_index = offset >> PAGE_CACHE_SHIFT;
|
||||
map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
|
||||
map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
|
||||
if (map_end > end)
|
||||
map_end = end;
|
||||
|
||||
|
@ -277,7 +277,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
u32 dlm_key;
|
||||
struct dlm_ctxt *dlm;
|
||||
struct o2dlm_private *priv;
|
||||
struct dlm_protocol_version dlm_version;
|
||||
struct dlm_protocol_version fs_version;
|
||||
|
||||
BUG_ON(conn == NULL);
|
||||
BUG_ON(o2cb_stack.sp_proto == NULL);
|
||||
@ -304,18 +304,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
|
||||
/* used by the dlm code to make message headers unique, each
|
||||
* node in this domain must agree on this. */
|
||||
dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
|
||||
dlm_version.pv_major = conn->cc_version.pv_major;
|
||||
dlm_version.pv_minor = conn->cc_version.pv_minor;
|
||||
fs_version.pv_major = conn->cc_version.pv_major;
|
||||
fs_version.pv_minor = conn->cc_version.pv_minor;
|
||||
|
||||
dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version);
|
||||
dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version);
|
||||
if (IS_ERR(dlm)) {
|
||||
rc = PTR_ERR(dlm);
|
||||
mlog_errno(rc);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
conn->cc_version.pv_major = dlm_version.pv_major;
|
||||
conn->cc_version.pv_minor = dlm_version.pv_minor;
|
||||
conn->cc_version.pv_major = fs_version.pv_major;
|
||||
conn->cc_version.pv_minor = fs_version.pv_minor;
|
||||
conn->cc_lockspace = dlm;
|
||||
|
||||
dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
|
||||
|
@ -1062,7 +1062,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
||||
"file system, but write access is "
|
||||
"unavailable.\n");
|
||||
else
|
||||
mlog_errno(status);
|
||||
mlog_errno(status);
|
||||
goto read_super_error;
|
||||
}
|
||||
|
||||
|
@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
|
||||
}
|
||||
|
||||
memcpy(link, target, len);
|
||||
nd_set_link(nd, link);
|
||||
|
||||
bail:
|
||||
nd_set_link(nd, status ? ERR_PTR(status) : link);
|
||||
brelse(bh);
|
||||
|
||||
mlog_exit(status);
|
||||
return status ? ERR_PTR(status) : link;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
|
||||
{
|
||||
char *link = cookie;
|
||||
|
||||
kfree(link);
|
||||
char *link = nd_get_link(nd);
|
||||
if (!IS_ERR(link))
|
||||
kfree(link);
|
||||
}
|
||||
|
||||
const struct inode_operations ocfs2_symlink_inode_operations = {
|
||||
|
@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
|
||||
}
|
||||
|
||||
/* Warning: even if it returns true, this does *not* guarantee that
|
||||
* the block is stored in our inode metadata cache.
|
||||
*
|
||||
* the block is stored in our inode metadata cache.
|
||||
*
|
||||
* This can be called under lock_buffer()
|
||||
*/
|
||||
int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
|
||||
|
Loading…
Reference in New Issue
Block a user