linux_dsm_epyc7002/fs/fscache/cache.c

418 lines
11 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/* FS-Cache cache handling
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#define FSCACHE_DEBUG_LEVEL CACHE
#include <linux/module.h>
#include <linux/slab.h>
#include "internal.h"
LIST_HEAD(fscache_cache_list);
DECLARE_RWSEM(fscache_addremove_sem);
DECLARE_WAIT_QUEUE_HEAD(fscache_cache_cleared_wq);
EXPORT_SYMBOL(fscache_cache_cleared_wq);
static LIST_HEAD(fscache_cache_tag_list);
/*
* look up a cache tag
*/
struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name)
{
struct fscache_cache_tag *tag, *xtag;
/* firstly check for the existence of the tag under read lock */
down_read(&fscache_addremove_sem);
list_for_each_entry(tag, &fscache_cache_tag_list, link) {
if (strcmp(tag->name, name) == 0) {
atomic_inc(&tag->usage);
up_read(&fscache_addremove_sem);
return tag;
}
}
up_read(&fscache_addremove_sem);
/* the tag does not exist - create a candidate */
xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL);
if (!xtag)
/* return a dummy tag if out of memory */
return ERR_PTR(-ENOMEM);
atomic_set(&xtag->usage, 1);
strcpy(xtag->name, name);
/* write lock, search again and add if still not present */
down_write(&fscache_addremove_sem);
list_for_each_entry(tag, &fscache_cache_tag_list, link) {
if (strcmp(tag->name, name) == 0) {
atomic_inc(&tag->usage);
up_write(&fscache_addremove_sem);
kfree(xtag);
return tag;
}
}
list_add_tail(&xtag->link, &fscache_cache_tag_list);
up_write(&fscache_addremove_sem);
return xtag;
}
/*
* release a reference to a cache tag
*/
void __fscache_release_cache_tag(struct fscache_cache_tag *tag)
{
if (tag != ERR_PTR(-ENOMEM)) {
down_write(&fscache_addremove_sem);
if (atomic_dec_and_test(&tag->usage))
list_del_init(&tag->link);
else
tag = NULL;
up_write(&fscache_addremove_sem);
kfree(tag);
}
}
/*
* select a cache in which to store an object
* - the cache addremove semaphore must be at least read-locked by the caller
* - the object will never be an index
*/
struct fscache_cache *fscache_select_cache_for_object(
struct fscache_cookie *cookie)
{
struct fscache_cache_tag *tag;
struct fscache_object *object;
struct fscache_cache *cache;
_enter("");
if (list_empty(&fscache_cache_list)) {
_leave(" = NULL [no cache]");
return NULL;
}
/* we check the parent to determine the cache to use */
spin_lock(&cookie->lock);
/* the first in the parent's backing list should be the preferred
* cache */
if (!hlist_empty(&cookie->backing_objects)) {
object = hlist_entry(cookie->backing_objects.first,
struct fscache_object, cookie_link);
cache = object->cache;
if (fscache_object_is_dying(object) ||
test_bit(FSCACHE_IOERROR, &cache->flags))
cache = NULL;
spin_unlock(&cookie->lock);
_leave(" = %p [parent]", cache);
return cache;
}
/* the parent is unbacked */
if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
/* cookie not an index and is unbacked */
spin_unlock(&cookie->lock);
_leave(" = NULL [cookie ub,ni]");
return NULL;
}
spin_unlock(&cookie->lock);
if (!cookie->def->select_cache)
goto no_preference;
/* ask the netfs for its preference */
tag = cookie->def->select_cache(cookie->parent->netfs_data,
cookie->netfs_data);
if (!tag)
goto no_preference;
if (tag == ERR_PTR(-ENOMEM)) {
_leave(" = NULL [nomem tag]");
return NULL;
}
if (!tag->cache) {
_leave(" = NULL [unbacked tag]");
return NULL;
}
if (test_bit(FSCACHE_IOERROR, &tag->cache->flags))
return NULL;
_leave(" = %p [specific]", tag->cache);
return tag->cache;
no_preference:
/* netfs has no preference - just select first cache */
cache = list_entry(fscache_cache_list.next,
struct fscache_cache, link);
_leave(" = %p [first]", cache);
return cache;
}
/**
* fscache_init_cache - Initialise a cache record
* @cache: The cache record to be initialised
* @ops: The cache operations to be installed in that record
* @idfmt: Format string to define identifier
* @...: sprintf-style arguments
*
* Initialise a record of a cache and fill in the name.
*
* See Documentation/filesystems/caching/backend-api.rst for a complete
* description.
*/
void fscache_init_cache(struct fscache_cache *cache,
const struct fscache_cache_ops *ops,
const char *idfmt,
...)
{
va_list va;
memset(cache, 0, sizeof(*cache));
cache->ops = ops;
va_start(va, idfmt);
vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va);
va_end(va);
FS-Cache: Add and document asynchronous operation handling Add and document asynchronous operation handling for use by FS-Cache's data storage and retrieval routines. The following documentation is added to: Documentation/filesystems/caching/operations.txt ================================ ASYNCHRONOUS OPERATIONS HANDLING ================================ ======== OVERVIEW ======== FS-Cache has an asynchronous operations handling facility that it uses for its data storage and retrieval routines. Its operations are represented by fscache_operation structs, though these are usually embedded into some other structure. This facility is available to and expected to be be used by the cache backends, and FS-Cache will create operations and pass them off to the appropriate cache backend for completion. To make use of this facility, <linux/fscache-cache.h> should be #included. =============================== OPERATION RECORD INITIALISATION =============================== An operation is recorded in an fscache_operation struct: struct fscache_operation { union { struct work_struct fast_work; struct slow_work slow_work; }; unsigned long flags; fscache_operation_processor_t processor; ... }; Someone wanting to issue an operation should allocate something with this struct embedded in it. They should initialise it by calling: void fscache_operation_init(struct fscache_operation *op, fscache_operation_release_t release); with the operation to be initialised and the release function to use. The op->flags parameter should be set to indicate the CPU time provision and the exclusivity (see the Parameters section). The op->fast_work, op->slow_work and op->processor flags should be set as appropriate for the CPU time provision (see the Parameters section). FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the operation and waited for afterwards. ========== PARAMETERS ========== There are a number of parameters that can be set in the operation record's flag parameter. There are three options for the provision of CPU time in these operations: (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread may decide it wants to handle an operation itself without deferring it to another thread. This is, for example, used in read operations for calling readpages() on the backing filesystem in CacheFiles. Although readpages() does an asynchronous data fetch, the determination of whether pages exist is done synchronously - and the netfs does not proceed until this has been determined. If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags before submitting the operation, and the operating thread must wait for it to be cleared before proceeding: wait_on_bit(&op->flags, FSCACHE_OP_WAITING, fscache_wait_bit, TASK_UNINTERRUPTIBLE); (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it will be given to keventd to process. Such an operation is not permitted to sleep on I/O. This is, for example, used by CacheFiles to copy data from a backing fs page to a netfs page after the backing fs has read the page in. If this option is used, op->fast_work and op->processor must be initialised before submitting the operation: INIT_WORK(&op->fast_work, do_some_work); (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it will be given to the slow work facility to process. Such an operation is permitted to sleep on I/O. This is, for example, used by FS-Cache to handle background writes of pages that have just been fetched from a remote server. If this option is used, op->slow_work and op->processor must be initialised before submitting the operation: fscache_operation_init_slow(op, processor) Furthermore, operations may be one of two types: (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in conjunction with any other operation on the object being operated upon. An example of this is the attribute change operation, in which the file being written to may need truncation. (2) Shareable. Operations of this type may be running simultaneously. It's up to the operation implementation to prevent interference between other operations running at the same time. ========= PROCEDURE ========= Operations are used through the following procedure: (1) The submitting thread must allocate the operation and initialise it itself. Normally this would be part of a more specific structure with the generic op embedded within. (2) The submitting thread must then submit the operation for processing using one of the following two functions: int fscache_submit_op(struct fscache_object *object, struct fscache_operation *op); int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op); The first function should be used to submit non-exclusive ops and the second to submit exclusive ones. The caller must still set the FSCACHE_OP_EXCLUSIVE flag. If successful, both functions will assign the operation to the specified object and return 0. -ENOBUFS will be returned if the object specified is permanently unavailable. The operation manager will defer operations on an object that is still undergoing lookup or creation. The operation will also be deferred if an operation of conflicting exclusivity is in progress on the object. If the operation is asynchronous, the manager will retain a reference to it, so the caller should put their reference to it by passing it to: void fscache_put_operation(struct fscache_operation *op); (3) If the submitting thread wants to do the work itself, and has marked the operation with FSCACHE_OP_MYTHREAD, then it should monitor FSCACHE_OP_WAITING as described above and check the state of the object if necessary (the object might have died whilst the thread was waiting). When it has finished doing its processing, it should call fscache_put_operation() on it. (4) The operation holds an effective lock upon the object, preventing other exclusive ops conflicting until it is released. The operation can be enqueued for further immediate asynchronous processing by adjusting the CPU time provisioning option if necessary, eg: op->flags &= ~FSCACHE_OP_TYPE; op->flags |= ~FSCACHE_OP_FAST; and calling: void fscache_enqueue_operation(struct fscache_operation *op) This can be used to allow other things to have use of the worker thread pools. ===================== ASYNCHRONOUS CALLBACK ===================== When used in asynchronous mode, the worker thread pool will invoke the processor method with a pointer to the operation. This should then get at the container struct by using container_of(): static void fscache_write_op(struct fscache_operation *_op) { struct fscache_storage *op = container_of(_op, struct fscache_storage, op); ... } The caller holds a reference on the operation, and will invoke fscache_put_operation() when the processor function returns. The processor function is at liberty to call fscache_enqueue_operation() or to take extra references. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Steve Dickson <steved@redhat.com> Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Tested-by: Daire Byrne <Daire.Byrne@framestore.com>
2009-04-03 22:42:39 +07:00
INIT_WORK(&cache->op_gc, fscache_operation_gc);
INIT_LIST_HEAD(&cache->link);
INIT_LIST_HEAD(&cache->object_list);
INIT_LIST_HEAD(&cache->op_gc_list);
spin_lock_init(&cache->object_list_lock);
spin_lock_init(&cache->op_gc_list_lock);
}
EXPORT_SYMBOL(fscache_init_cache);
/**
* fscache_add_cache - Declare a cache as being open for business
* @cache: The record describing the cache
* @ifsdef: The record of the cache object describing the top-level index
* @tagname: The tag describing this cache
*
* Add a cache to the system, making it available for netfs's to use.
*
* See Documentation/filesystems/caching/backend-api.rst for a complete
* description.
*/
int fscache_add_cache(struct fscache_cache *cache,
struct fscache_object *ifsdef,
const char *tagname)
{
struct fscache_cache_tag *tag;
fscache: Fix reference overput in fscache_attach_object() error handling When a cookie is allocated that causes fscache_object structs to be allocated, those objects are initialised with the cookie pointer, but aren't blessed with a ref on that cookie unless the attachment is successfully completed in fscache_attach_object(). If attachment fails because the parent object was dying or there was a collision, fscache_attach_object() returns without incrementing the cookie counter - but upon failure of this function, the object is released which then puts the cookie, whether or not a ref was taken on the cookie. Fix this by taking a ref on the cookie when it is assigned in fscache_object_init(), even when we're creating a root object. Analysis from Kiran Kumar: This bug has been seen in 4.4.0-124-generic #148-Ubuntu kernel BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1776277 fscache cookie ref count updated incorrectly during fscache object allocation resulting in following Oops. kernel BUG at /build/linux-Y09MKI/linux-4.4.0/fs/fscache/internal.h:321! kernel BUG at /build/linux-Y09MKI/linux-4.4.0/fs/fscache/cookie.c:639! [Cause] Two threads are trying to do operate on a cookie and two objects. (1) One thread tries to unmount the filesystem and in process goes over a huge list of objects marking them dead and deleting the objects. cookie->usage is also decremented in following path: nfs_fscache_release_super_cookie -> __fscache_relinquish_cookie ->__fscache_cookie_put ->BUG_ON(atomic_read(&cookie->usage) <= 0); (2) A second thread tries to lookup an object for reading data in following path: fscache_alloc_object 1) cachefiles_alloc_object -> fscache_object_init -> assign cookie, but usage not bumped. 2) fscache_attach_object -> fails in cant_attach_object because the cookie's backing object or cookie's->parent object are going away 3) fscache_put_object -> cachefiles_put_object ->fscache_object_destroy ->fscache_cookie_put ->BUG_ON(atomic_read(&cookie->usage) <= 0); [NOTE from dhowells] It's unclear as to the circumstances in which (2) can take place, given that thread (1) is in nfs_kill_super(), however a conflicting NFS mount with slightly different parameters that creates a different superblock would do it. A backtrace from Kiran seems to show that this is a possibility: kernel BUG at/build/linux-Y09MKI/linux-4.4.0/fs/fscache/cookie.c:639! ... RIP: __fscache_cookie_put+0x3a/0x40 [fscache] Call Trace: __fscache_relinquish_cookie+0x87/0x120 [fscache] nfs_fscache_release_super_cookie+0x2d/0xb0 [nfs] nfs_kill_super+0x29/0x40 [nfs] deactivate_locked_super+0x48/0x80 deactivate_super+0x5c/0x60 cleanup_mnt+0x3f/0x90 __cleanup_mnt+0x12/0x20 task_work_run+0x86/0xb0 exit_to_usermode_loop+0xc2/0xd0 syscall_return_slowpath+0x4e/0x60 int_ret_from_sys_call+0x25/0x9f [Fix] Bump up the cookie usage in fscache_object_init, when it is first being assigned a cookie atomically such that the cookie is added and bumped up if its refcount is not zero. Remove the assignment in fscache_attach_object(). [Testcase] I have run ~100 hours of NFS stress tests and not seen this bug recur. [Regression Potential] - Limited to fscache/cachefiles. Fixes: ccc4fc3d11e9 ("FS-Cache: Implement the cookie management part of the netfs API") Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com> Signed-off-by: David Howells <dhowells@redhat.com>
2018-06-22 03:31:44 +07:00
ASSERTCMP(ifsdef->cookie, ==, &fscache_fsdef_index);
BUG_ON(!cache->ops);
BUG_ON(!ifsdef);
cache->flags = 0;
FS-Cache: Fix object state machine to have separate work and wait states Fix object state machine to have separate work and wait states as that makes it easier to envision. There are now three kinds of state: (1) Work state. This is an execution state. No event processing is performed by a work state. The function attached to a work state returns a pointer indicating the next state to which the OSM should transition. Returning NO_TRANSIT repeats the current state, but goes back to the scheduler first. (2) Wait state. This is an event processing state. No execution is performed by a wait state. Wait states are just tables of "if event X occurs, clear it and transition to state Y". The dispatcher returns to the scheduler if none of the events in which the wait state has an interest are currently pending. (3) Out-of-band state. This is a special work state. Transitions to normal states can be overridden when an unexpected event occurs (eg. I/O error). Instead the dispatcher disables and clears the OOB event and transits to the specified work state. This then acts as an ordinary work state, though object->state points to the overridden destination. Returning NO_TRANSIT resumes the overridden transition. In addition, the states have names in their definitions, so there's no need for tables of state names. Further, the EV_REQUEUE event is no longer necessary as that is automatic for work states. Since the states are now separate structs rather than values in an enum, it's not possible to use comparisons other than (non-)equality between them, so use some object->flags to indicate what phase an object is in. The EV_RELEASE, EV_RETIRE and EV_WITHDRAW events have been squished into one (EV_KILL). An object flag now carries the information about retirement. Similarly, the RELEASING, RECYCLING and WITHDRAWING states have been merged into an KILL_OBJECT state and additional states have been added for handling waiting dependent objects (JUMPSTART_DEPS and KILL_DEPENDENTS). A state has also been added for synchronising with parent object initialisation (WAIT_FOR_PARENT) and another for initiating look up (PARENT_READY). Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
ifsdef->event_mask =
((1 << NR_FSCACHE_OBJECT_EVENTS) - 1) &
~(1 << FSCACHE_OBJECT_EV_CLEARED);
__set_bit(FSCACHE_OBJECT_IS_AVAILABLE, &ifsdef->flags);
if (!tagname)
tagname = cache->identifier;
BUG_ON(!tagname[0]);
_enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname);
/* we use the cache tag to uniquely identify caches */
tag = __fscache_lookup_cache_tag(tagname);
if (IS_ERR(tag))
goto nomem;
if (test_and_set_bit(FSCACHE_TAG_RESERVED, &tag->flags))
goto tag_in_use;
cache->kobj = kobject_create_and_add(tagname, fscache_root);
if (!cache->kobj)
goto error;
ifsdef->cache = cache;
cache->fsdef = ifsdef;
down_write(&fscache_addremove_sem);
tag->cache = cache;
cache->tag = tag;
/* add the cache to the list */
list_add(&cache->link, &fscache_cache_list);
/* add the cache's netfs definition index object to the cache's
* list */
spin_lock(&cache->object_list_lock);
list_add_tail(&ifsdef->cache_link, &cache->object_list);
spin_unlock(&cache->object_list_lock);
fscache_objlist_add(ifsdef);
/* add the cache's netfs definition index object to the top level index
* cookie as a known backing object */
spin_lock(&fscache_fsdef_index.lock);
hlist_add_head(&ifsdef->cookie_link,
&fscache_fsdef_index.backing_objects);
atomic_inc(&fscache_fsdef_index.usage);
/* done */
spin_unlock(&fscache_fsdef_index.lock);
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n",
cache->tag->name, cache->ops->name);
kobject_uevent(cache->kobj, KOBJ_ADD);
_leave(" = 0 [%s]", cache->identifier);
return 0;
tag_in_use:
pr_err("Cache tag '%s' already in use\n", tagname);
__fscache_release_cache_tag(tag);
_leave(" = -EXIST");
return -EEXIST;
error:
__fscache_release_cache_tag(tag);
_leave(" = -EINVAL");
return -EINVAL;
nomem:
_leave(" = -ENOMEM");
return -ENOMEM;
}
EXPORT_SYMBOL(fscache_add_cache);
/**
* fscache_io_error - Note a cache I/O error
* @cache: The record describing the cache
*
* Note that an I/O error occurred in a cache and that it should no longer be
* used for anything. This also reports the error into the kernel log.
*
* See Documentation/filesystems/caching/backend-api.rst for a complete
* description.
*/
void fscache_io_error(struct fscache_cache *cache)
{
if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags))
pr_err("Cache '%s' stopped due to I/O error\n",
cache->ops->name);
}
EXPORT_SYMBOL(fscache_io_error);
/*
* request withdrawal of all the objects in a cache
* - all the objects being withdrawn are moved onto the supplied list
*/
static void fscache_withdraw_all_objects(struct fscache_cache *cache,
struct list_head *dying_objects)
{
struct fscache_object *object;
while (!list_empty(&cache->object_list)) {
FS-Cache: Fix object state machine to have separate work and wait states Fix object state machine to have separate work and wait states as that makes it easier to envision. There are now three kinds of state: (1) Work state. This is an execution state. No event processing is performed by a work state. The function attached to a work state returns a pointer indicating the next state to which the OSM should transition. Returning NO_TRANSIT repeats the current state, but goes back to the scheduler first. (2) Wait state. This is an event processing state. No execution is performed by a wait state. Wait states are just tables of "if event X occurs, clear it and transition to state Y". The dispatcher returns to the scheduler if none of the events in which the wait state has an interest are currently pending. (3) Out-of-band state. This is a special work state. Transitions to normal states can be overridden when an unexpected event occurs (eg. I/O error). Instead the dispatcher disables and clears the OOB event and transits to the specified work state. This then acts as an ordinary work state, though object->state points to the overridden destination. Returning NO_TRANSIT resumes the overridden transition. In addition, the states have names in their definitions, so there's no need for tables of state names. Further, the EV_REQUEUE event is no longer necessary as that is automatic for work states. Since the states are now separate structs rather than values in an enum, it's not possible to use comparisons other than (non-)equality between them, so use some object->flags to indicate what phase an object is in. The EV_RELEASE, EV_RETIRE and EV_WITHDRAW events have been squished into one (EV_KILL). An object flag now carries the information about retirement. Similarly, the RELEASING, RECYCLING and WITHDRAWING states have been merged into an KILL_OBJECT state and additional states have been added for handling waiting dependent objects (JUMPSTART_DEPS and KILL_DEPENDENTS). A state has also been added for synchronising with parent object initialisation (WAIT_FOR_PARENT) and another for initiating look up (PARENT_READY). Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
spin_lock(&cache->object_list_lock);
FS-Cache: Fix object state machine to have separate work and wait states Fix object state machine to have separate work and wait states as that makes it easier to envision. There are now three kinds of state: (1) Work state. This is an execution state. No event processing is performed by a work state. The function attached to a work state returns a pointer indicating the next state to which the OSM should transition. Returning NO_TRANSIT repeats the current state, but goes back to the scheduler first. (2) Wait state. This is an event processing state. No execution is performed by a wait state. Wait states are just tables of "if event X occurs, clear it and transition to state Y". The dispatcher returns to the scheduler if none of the events in which the wait state has an interest are currently pending. (3) Out-of-band state. This is a special work state. Transitions to normal states can be overridden when an unexpected event occurs (eg. I/O error). Instead the dispatcher disables and clears the OOB event and transits to the specified work state. This then acts as an ordinary work state, though object->state points to the overridden destination. Returning NO_TRANSIT resumes the overridden transition. In addition, the states have names in their definitions, so there's no need for tables of state names. Further, the EV_REQUEUE event is no longer necessary as that is automatic for work states. Since the states are now separate structs rather than values in an enum, it's not possible to use comparisons other than (non-)equality between them, so use some object->flags to indicate what phase an object is in. The EV_RELEASE, EV_RETIRE and EV_WITHDRAW events have been squished into one (EV_KILL). An object flag now carries the information about retirement. Similarly, the RELEASING, RECYCLING and WITHDRAWING states have been merged into an KILL_OBJECT state and additional states have been added for handling waiting dependent objects (JUMPSTART_DEPS and KILL_DEPENDENTS). A state has also been added for synchronising with parent object initialisation (WAIT_FOR_PARENT) and another for initiating look up (PARENT_READY). Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
if (!list_empty(&cache->object_list)) {
object = list_entry(cache->object_list.next,
struct fscache_object, cache_link);
list_move_tail(&object->cache_link, dying_objects);
FS-Cache: Fix object state machine to have separate work and wait states Fix object state machine to have separate work and wait states as that makes it easier to envision. There are now three kinds of state: (1) Work state. This is an execution state. No event processing is performed by a work state. The function attached to a work state returns a pointer indicating the next state to which the OSM should transition. Returning NO_TRANSIT repeats the current state, but goes back to the scheduler first. (2) Wait state. This is an event processing state. No execution is performed by a wait state. Wait states are just tables of "if event X occurs, clear it and transition to state Y". The dispatcher returns to the scheduler if none of the events in which the wait state has an interest are currently pending. (3) Out-of-band state. This is a special work state. Transitions to normal states can be overridden when an unexpected event occurs (eg. I/O error). Instead the dispatcher disables and clears the OOB event and transits to the specified work state. This then acts as an ordinary work state, though object->state points to the overridden destination. Returning NO_TRANSIT resumes the overridden transition. In addition, the states have names in their definitions, so there's no need for tables of state names. Further, the EV_REQUEUE event is no longer necessary as that is automatic for work states. Since the states are now separate structs rather than values in an enum, it's not possible to use comparisons other than (non-)equality between them, so use some object->flags to indicate what phase an object is in. The EV_RELEASE, EV_RETIRE and EV_WITHDRAW events have been squished into one (EV_KILL). An object flag now carries the information about retirement. Similarly, the RELEASING, RECYCLING and WITHDRAWING states have been merged into an KILL_OBJECT state and additional states have been added for handling waiting dependent objects (JUMPSTART_DEPS and KILL_DEPENDENTS). A state has also been added for synchronising with parent object initialisation (WAIT_FOR_PARENT) and another for initiating look up (PARENT_READY). Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
_debug("withdraw %p", object->cookie);
/* This must be done under object_list_lock to prevent
* a race with fscache_drop_object().
*/
fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
}
FS-Cache: Fix object state machine to have separate work and wait states Fix object state machine to have separate work and wait states as that makes it easier to envision. There are now three kinds of state: (1) Work state. This is an execution state. No event processing is performed by a work state. The function attached to a work state returns a pointer indicating the next state to which the OSM should transition. Returning NO_TRANSIT repeats the current state, but goes back to the scheduler first. (2) Wait state. This is an event processing state. No execution is performed by a wait state. Wait states are just tables of "if event X occurs, clear it and transition to state Y". The dispatcher returns to the scheduler if none of the events in which the wait state has an interest are currently pending. (3) Out-of-band state. This is a special work state. Transitions to normal states can be overridden when an unexpected event occurs (eg. I/O error). Instead the dispatcher disables and clears the OOB event and transits to the specified work state. This then acts as an ordinary work state, though object->state points to the overridden destination. Returning NO_TRANSIT resumes the overridden transition. In addition, the states have names in their definitions, so there's no need for tables of state names. Further, the EV_REQUEUE event is no longer necessary as that is automatic for work states. Since the states are now separate structs rather than values in an enum, it's not possible to use comparisons other than (non-)equality between them, so use some object->flags to indicate what phase an object is in. The EV_RELEASE, EV_RETIRE and EV_WITHDRAW events have been squished into one (EV_KILL). An object flag now carries the information about retirement. Similarly, the RELEASING, RECYCLING and WITHDRAWING states have been merged into an KILL_OBJECT state and additional states have been added for handling waiting dependent objects (JUMPSTART_DEPS and KILL_DEPENDENTS). A state has also been added for synchronising with parent object initialisation (WAIT_FOR_PARENT) and another for initiating look up (PARENT_READY). Signed-off-by: David Howells <dhowells@redhat.com> Tested-By: Milosz Tanski <milosz@adfin.com> Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
spin_unlock(&cache->object_list_lock);
cond_resched();
}
}
/**
* fscache_withdraw_cache - Withdraw a cache from the active service
* @cache: The record describing the cache
*
* Withdraw a cache from service, unbinding all its cache objects from the
* netfs cookies they're currently representing.
*
* See Documentation/filesystems/caching/backend-api.rst for a complete
* description.
*/
void fscache_withdraw_cache(struct fscache_cache *cache)
{
LIST_HEAD(dying_objects);
_enter("");
pr_notice("Withdrawing cache \"%s\"\n",
cache->tag->name);
/* make the cache unavailable for cookie acquisition */
if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags))
BUG();
down_write(&fscache_addremove_sem);
list_del_init(&cache->link);
cache->tag->cache = NULL;
up_write(&fscache_addremove_sem);
/* make sure all pages pinned by operations on behalf of the netfs are
* written to disk */
fscache_stat(&fscache_n_cop_sync_cache);
cache->ops->sync_cache(cache);
fscache_stat_d(&fscache_n_cop_sync_cache);
/* dissociate all the netfs pages backed by this cache from the block
* mappings in the cache */
fscache_stat(&fscache_n_cop_dissociate_pages);
cache->ops->dissociate_pages(cache);
fscache_stat_d(&fscache_n_cop_dissociate_pages);
/* we now have to destroy all the active objects pertaining to this
* cache - which we do by passing them off to thread pool to be
* disposed of */
_debug("destroy");
fscache_withdraw_all_objects(cache, &dying_objects);
/* wait for all extant objects to finish their outstanding operations
* and go away */
_debug("wait for finish");
wait_event(fscache_cache_cleared_wq,
atomic_read(&cache->object_count) == 0);
_debug("wait for clearance");
wait_event(fscache_cache_cleared_wq,
list_empty(&cache->object_list));
_debug("cleared");
ASSERT(list_empty(&dying_objects));
kobject_put(cache->kobj);
clear_bit(FSCACHE_TAG_RESERVED, &cache->tag->flags);
fscache_release_cache_tag(cache->tag);
cache->tag = NULL;
_leave("");
}
EXPORT_SYMBOL(fscache_withdraw_cache);