2009-10-07 01:31:10 +07:00
|
|
|
#ifndef _FS_CEPH_OSD_CLIENT_H
|
|
|
|
#define _FS_CEPH_OSD_CLIENT_H
|
|
|
|
|
|
|
|
#include <linux/completion.h>
|
2009-12-08 04:37:03 +07:00
|
|
|
#include <linux/kref.h>
|
2009-10-07 01:31:10 +07:00
|
|
|
#include <linux/mempool.h>
|
|
|
|
#include <linux/rbtree.h>
|
|
|
|
|
2012-05-17 03:16:38 +07:00
|
|
|
#include <linux/ceph/types.h>
|
|
|
|
#include <linux/ceph/osdmap.h>
|
|
|
|
#include <linux/ceph/messenger.h>
|
2016-06-08 02:57:15 +07:00
|
|
|
#include <linux/ceph/msgpool.h>
|
2012-05-17 03:16:38 +07:00
|
|
|
#include <linux/ceph/auth.h>
|
2012-11-14 10:11:15 +07:00
|
|
|
#include <linux/ceph/pagelist.h>
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
struct ceph_msg;
|
|
|
|
struct ceph_snap_context;
|
|
|
|
struct ceph_osd_request;
|
|
|
|
struct ceph_osd_client;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* completion callback for async writepages
|
|
|
|
*/
|
2016-04-28 21:07:24 +07:00
|
|
|
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
|
libceph: change how "safe" callback is used
An osd request currently has two callbacks. They inform the
initiator of the request when we've received confirmation for the
target osd that a request was received, and when the osd indicates
all changes described by the request are durable.
The only time the second callback is used is in the ceph file system
for a synchronous write. There's a race that makes some handling of
this case unsafe. This patch addresses this problem. The error
handling for this callback is also kind of gross, and this patch
changes that as well.
In ceph_sync_write(), if a safe callback is requested we want to add
the request on the ceph inode's unsafe items list. Because items on
this list must have their tid set (by ceph_osd_start_request()), the
request added *after* the call to that function returns. The
problem with this is that there's a race between starting the
request and adding it to the unsafe items list; the request may
already be complete before ceph_sync_write() even begins to put it
on the list.
To address this, we change the way the "safe" callback is used.
Rather than just calling it when the request is "safe", we use it to
notify the initiator the bounds (start and end) of the period during
which the request is *unsafe*. So the initiator gets notified just
before the request gets sent to the osd (when it is "unsafe"), and
again when it's known the results are durable (it's no longer
unsafe). The first call will get made in __send_request(), just
before the request message gets sent to the messenger for the first
time. That function is only called by __send_queued(), which is
always called with the osd client's request mutex held.
We then have this callback function insert the request on the ceph
inode's unsafe list when we're told the request is unsafe. This
will avoid the race because this call will be made under protection
of the osd client's request mutex. It also nicely groups the setup
and cleanup of the state associated with managing unsafe requests.
The name of the "safe" callback field is changed to "unsafe" to
better reflect its new purpose. It has a Boolean "unsafe" parameter
to indicate whether the request is becoming unsafe or is now safe.
Because the "msg" parameter wasn't used, we drop that.
This resolves the original problem reportedin:
http://tracker.ceph.com/issues/4706
Reported-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Sage Weil <sage@inktank.com>
2013-04-15 23:20:42 +07:00
|
|
|
typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
|
2009-10-07 01:31:10 +07:00
|
|
|
|
2016-04-28 21:07:23 +07:00
|
|
|
#define CEPH_HOMELESS_OSD -1
|
|
|
|
|
2009-10-07 01:31:10 +07:00
|
|
|
/* a given osd we're communicating with */
|
|
|
|
struct ceph_osd {
|
|
|
|
atomic_t o_ref;
|
|
|
|
struct ceph_osd_client *o_osdc;
|
|
|
|
int o_osd;
|
|
|
|
int o_incarnation;
|
|
|
|
struct rb_node o_node;
|
|
|
|
struct ceph_connection o_con;
|
2016-04-28 21:07:26 +07:00
|
|
|
struct rb_root o_requests;
|
2016-05-26 06:15:02 +07:00
|
|
|
struct rb_root o_linger_requests;
|
2010-02-04 02:00:26 +07:00
|
|
|
struct list_head o_osd_lru;
|
2012-05-17 03:16:38 +07:00
|
|
|
struct ceph_auth_handshake o_auth;
|
2010-02-04 02:00:26 +07:00
|
|
|
unsigned long lru_ttl;
|
2010-02-27 06:32:31 +07:00
|
|
|
struct list_head o_keepalive_item;
|
2016-04-28 21:07:26 +07:00
|
|
|
struct mutex lock;
|
2009-10-07 01:31:10 +07:00
|
|
|
};
|
|
|
|
|
2016-02-09 23:50:15 +07:00
|
|
|
#define CEPH_OSD_SLAB_OPS 2
|
|
|
|
#define CEPH_OSD_MAX_OPS 16
|
2013-02-26 07:11:12 +07:00
|
|
|
|
2013-02-15 01:16:43 +07:00
|
|
|
enum ceph_osd_data_type {
|
2013-04-05 13:27:12 +07:00
|
|
|
CEPH_OSD_DATA_TYPE_NONE = 0,
|
2013-02-15 01:16:43 +07:00
|
|
|
CEPH_OSD_DATA_TYPE_PAGES,
|
2013-03-09 02:35:36 +07:00
|
|
|
CEPH_OSD_DATA_TYPE_PAGELIST,
|
2013-02-15 01:16:43 +07:00
|
|
|
#ifdef CONFIG_BLOCK
|
|
|
|
CEPH_OSD_DATA_TYPE_BIO,
|
|
|
|
#endif /* CONFIG_BLOCK */
|
|
|
|
};
|
|
|
|
|
2013-02-15 01:16:43 +07:00
|
|
|
struct ceph_osd_data {
|
2013-02-15 01:16:43 +07:00
|
|
|
enum ceph_osd_data_type type;
|
|
|
|
union {
|
2013-02-15 01:16:43 +07:00
|
|
|
struct {
|
|
|
|
struct page **pages;
|
2013-03-08 04:38:25 +07:00
|
|
|
u64 length;
|
2013-02-15 01:16:43 +07:00
|
|
|
u32 alignment;
|
|
|
|
bool pages_from_pool;
|
|
|
|
bool own_pages;
|
|
|
|
};
|
2013-03-09 02:35:36 +07:00
|
|
|
struct ceph_pagelist *pagelist;
|
2013-02-15 01:16:43 +07:00
|
|
|
#ifdef CONFIG_BLOCK
|
2013-03-15 02:09:06 +07:00
|
|
|
struct {
|
|
|
|
struct bio *bio; /* list of bios */
|
|
|
|
size_t bio_length; /* total in list */
|
|
|
|
};
|
2013-02-15 01:16:43 +07:00
|
|
|
#endif /* CONFIG_BLOCK */
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2013-04-04 09:32:51 +07:00
|
|
|
struct ceph_osd_req_op {
|
|
|
|
u16 op; /* CEPH_OSD_OP_* */
|
2014-02-25 21:22:26 +07:00
|
|
|
u32 flags; /* CEPH_OSD_OP_FLAG_* */
|
2016-02-08 19:39:46 +07:00
|
|
|
u32 indata_len; /* request */
|
2016-01-07 15:48:57 +07:00
|
|
|
u32 outdata_len; /* reply */
|
|
|
|
s32 rval;
|
|
|
|
|
2013-04-04 09:32:51 +07:00
|
|
|
union {
|
2013-02-12 01:33:24 +07:00
|
|
|
struct ceph_osd_data raw_data_in;
|
2013-04-04 09:32:51 +07:00
|
|
|
struct {
|
|
|
|
u64 offset, length;
|
|
|
|
u64 truncate_size;
|
|
|
|
u32 truncate_seq;
|
2013-04-05 13:27:12 +07:00
|
|
|
struct ceph_osd_data osd_data;
|
2013-04-04 09:32:51 +07:00
|
|
|
} extent;
|
2014-11-12 13:00:43 +07:00
|
|
|
struct {
|
2014-12-19 18:00:41 +07:00
|
|
|
u32 name_len;
|
|
|
|
u32 value_len;
|
2014-11-12 13:00:43 +07:00
|
|
|
__u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
|
|
|
|
__u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
|
|
|
|
struct ceph_osd_data osd_data;
|
|
|
|
} xattr;
|
2013-04-04 09:32:51 +07:00
|
|
|
struct {
|
|
|
|
const char *class_name;
|
|
|
|
const char *method_name;
|
2013-04-05 13:27:12 +07:00
|
|
|
struct ceph_osd_data request_info;
|
2013-04-06 02:46:02 +07:00
|
|
|
struct ceph_osd_data request_data;
|
2013-04-05 13:27:12 +07:00
|
|
|
struct ceph_osd_data response_data;
|
2013-04-04 09:32:51 +07:00
|
|
|
__u8 class_len;
|
|
|
|
__u8 method_len;
|
2016-05-26 05:29:52 +07:00
|
|
|
u32 indata_len;
|
2013-04-04 09:32:51 +07:00
|
|
|
} cls;
|
|
|
|
struct {
|
|
|
|
u64 cookie;
|
2016-05-26 06:15:02 +07:00
|
|
|
__u8 op; /* CEPH_OSD_WATCH_OP_ */
|
|
|
|
u32 gen;
|
2013-04-04 09:32:51 +07:00
|
|
|
} watch;
|
2016-05-26 06:15:02 +07:00
|
|
|
struct {
|
|
|
|
struct ceph_osd_data request_data;
|
|
|
|
} notify_ack;
|
2016-04-28 21:07:27 +07:00
|
|
|
struct {
|
|
|
|
u64 cookie;
|
|
|
|
struct ceph_osd_data request_data;
|
|
|
|
struct ceph_osd_data response_data;
|
|
|
|
} notify;
|
2014-02-25 21:22:27 +07:00
|
|
|
struct {
|
|
|
|
u64 expected_object_size;
|
|
|
|
u64 expected_write_size;
|
|
|
|
} alloc_hint;
|
2013-04-04 09:32:51 +07:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2016-04-28 21:07:23 +07:00
|
|
|
struct ceph_osd_request_target {
|
|
|
|
struct ceph_object_id base_oid;
|
|
|
|
struct ceph_object_locator base_oloc;
|
|
|
|
struct ceph_object_id target_oid;
|
|
|
|
struct ceph_object_locator target_oloc;
|
|
|
|
|
|
|
|
struct ceph_pg pgid;
|
|
|
|
u32 pg_num;
|
|
|
|
u32 pg_num_mask;
|
|
|
|
struct ceph_osds acting;
|
|
|
|
struct ceph_osds up;
|
|
|
|
int size;
|
|
|
|
int min_size;
|
|
|
|
bool sort_bitwise;
|
|
|
|
|
|
|
|
unsigned int flags; /* CEPH_OSD_FLAG_* */
|
|
|
|
bool paused;
|
|
|
|
|
|
|
|
int osd;
|
|
|
|
};
|
|
|
|
|
2009-10-07 01:31:10 +07:00
|
|
|
/* an in-flight request */
|
|
|
|
struct ceph_osd_request {
|
|
|
|
u64 r_tid; /* unique for this client */
|
|
|
|
struct rb_node r_node;
|
2016-04-28 21:07:27 +07:00
|
|
|
struct rb_node r_mc_node; /* map check */
|
2009-10-07 01:31:10 +07:00
|
|
|
struct ceph_osd *r_osd;
|
2016-04-28 21:07:23 +07:00
|
|
|
|
|
|
|
struct ceph_osd_request_target r_t;
|
|
|
|
#define r_base_oid r_t.base_oid
|
|
|
|
#define r_base_oloc r_t.base_oloc
|
|
|
|
#define r_flags r_t.flags
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
struct ceph_msg *r_request, *r_reply;
|
|
|
|
u32 r_sent; /* >0 if r_request is sending/sent */
|
2013-02-26 07:11:12 +07:00
|
|
|
|
2013-04-04 09:32:51 +07:00
|
|
|
/* request osd ops array */
|
|
|
|
unsigned int r_num_ops;
|
|
|
|
|
2013-02-26 07:11:12 +07:00
|
|
|
int r_result;
|
2016-04-28 21:07:24 +07:00
|
|
|
bool r_got_reply;
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
struct ceph_osd_client *r_osdc;
|
2009-12-08 04:37:03 +07:00
|
|
|
struct kref r_kref;
|
2009-10-07 01:31:10 +07:00
|
|
|
bool r_mempool;
|
2016-04-28 21:07:24 +07:00
|
|
|
struct completion r_completion;
|
|
|
|
struct completion r_safe_completion; /* fsync waiter */
|
libceph: change how "safe" callback is used
An osd request currently has two callbacks. They inform the
initiator of the request when we've received confirmation for the
target osd that a request was received, and when the osd indicates
all changes described by the request are durable.
The only time the second callback is used is in the ceph file system
for a synchronous write. There's a race that makes some handling of
this case unsafe. This patch addresses this problem. The error
handling for this callback is also kind of gross, and this patch
changes that as well.
In ceph_sync_write(), if a safe callback is requested we want to add
the request on the ceph inode's unsafe items list. Because items on
this list must have their tid set (by ceph_osd_start_request()), the
request added *after* the call to that function returns. The
problem with this is that there's a race between starting the
request and adding it to the unsafe items list; the request may
already be complete before ceph_sync_write() even begins to put it
on the list.
To address this, we change the way the "safe" callback is used.
Rather than just calling it when the request is "safe", we use it to
notify the initiator the bounds (start and end) of the period during
which the request is *unsafe*. So the initiator gets notified just
before the request gets sent to the osd (when it is "unsafe"), and
again when it's known the results are durable (it's no longer
unsafe). The first call will get made in __send_request(), just
before the request message gets sent to the messenger for the first
time. That function is only called by __send_queued(), which is
always called with the osd client's request mutex held.
We then have this callback function insert the request on the ceph
inode's unsafe list when we're told the request is unsafe. This
will avoid the race because this call will be made under protection
of the osd client's request mutex. It also nicely groups the setup
and cleanup of the state associated with managing unsafe requests.
The name of the "safe" callback field is changed to "unsafe" to
better reflect its new purpose. It has a Boolean "unsafe" parameter
to indicate whether the request is becoming unsafe or is now safe.
Because the "msg" parameter wasn't used, we drop that.
This resolves the original problem reportedin:
http://tracker.ceph.com/issues/4706
Reported-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Sage Weil <sage@inktank.com>
2013-04-15 23:20:42 +07:00
|
|
|
ceph_osdc_callback_t r_callback;
|
|
|
|
ceph_osdc_unsafe_callback_t r_unsafe_callback;
|
2009-10-07 01:31:10 +07:00
|
|
|
struct list_head r_unsafe_item;
|
|
|
|
|
|
|
|
struct inode *r_inode; /* for use by callbacks */
|
2010-04-07 05:14:15 +07:00
|
|
|
void *r_priv; /* ditto */
|
2009-10-07 01:31:10 +07:00
|
|
|
|
2016-05-26 05:29:52 +07:00
|
|
|
/* set by submitter */
|
|
|
|
u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */
|
|
|
|
struct ceph_snap_context *r_snapc; /* for writes */
|
|
|
|
struct timespec r_mtime; /* ditto */
|
|
|
|
u64 r_data_offset; /* ditto */
|
2016-05-26 06:15:02 +07:00
|
|
|
bool r_linger; /* don't resend on failure */
|
2009-10-07 01:31:10 +07:00
|
|
|
|
2016-05-26 05:29:52 +07:00
|
|
|
/* internal */
|
|
|
|
unsigned long r_stamp; /* jiffies, send or check time */
|
|
|
|
int r_attempts;
|
|
|
|
struct ceph_eversion r_replay_version; /* aka reassert_version */
|
|
|
|
u32 r_last_force_resend;
|
2016-04-28 21:07:27 +07:00
|
|
|
u32 r_map_dne_bound;
|
2016-02-09 23:50:15 +07:00
|
|
|
|
|
|
|
struct ceph_osd_req_op r_ops[];
|
2009-10-07 01:31:10 +07:00
|
|
|
};
|
|
|
|
|
libceph: follow redirect replies from osds
Follow redirect replies from osds, for details see ceph.git commit
fbbe3ad1220799b7bb00ea30fce581c5eadaf034.
v1 (current) version of redirect reply consists of oloc and oid, which
expands to pool, key, nspace, hash and oid. However, server-side code
that would populate anything other than pool doesn't exist yet, and
hence this commit adds support for pool redirects only. To make sure
that future server-side updates don't break us, we decode all fields
and, if any of key, nspace, hash or oid have a non-default value, error
out with "corrupt osd_op_reply ..." message.
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
2014-01-27 22:40:20 +07:00
|
|
|
struct ceph_request_redirect {
|
|
|
|
struct ceph_object_locator oloc;
|
|
|
|
};
|
|
|
|
|
2016-05-26 06:15:02 +07:00
|
|
|
typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
|
|
|
|
u64 notifier_id, void *data, size_t data_len);
|
|
|
|
typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
|
|
|
|
|
|
|
|
struct ceph_osd_linger_request {
|
2011-03-22 05:07:16 +07:00
|
|
|
struct ceph_osd_client *osdc;
|
2016-05-26 06:15:02 +07:00
|
|
|
u64 linger_id;
|
|
|
|
bool committed;
|
2016-04-28 21:07:27 +07:00
|
|
|
bool is_watch; /* watch or notify */
|
2016-05-26 06:15:02 +07:00
|
|
|
|
|
|
|
struct ceph_osd *osd;
|
|
|
|
struct ceph_osd_request *reg_req;
|
|
|
|
struct ceph_osd_request *ping_req;
|
|
|
|
unsigned long ping_sent;
|
2016-04-28 21:07:27 +07:00
|
|
|
unsigned long watch_valid_thru;
|
|
|
|
struct list_head pending_lworks;
|
2016-05-26 06:15:02 +07:00
|
|
|
|
|
|
|
struct ceph_osd_request_target t;
|
|
|
|
u32 last_force_resend;
|
2016-04-28 21:07:27 +07:00
|
|
|
u32 map_dne_bound;
|
2016-05-26 06:15:02 +07:00
|
|
|
|
|
|
|
struct timespec mtime;
|
|
|
|
|
2011-03-22 05:07:16 +07:00
|
|
|
struct kref kref;
|
2016-05-26 06:15:02 +07:00
|
|
|
struct mutex lock;
|
|
|
|
struct rb_node node; /* osd */
|
|
|
|
struct rb_node osdc_node; /* osdc */
|
2016-04-28 21:07:27 +07:00
|
|
|
struct rb_node mc_node; /* map check */
|
2016-05-26 06:15:02 +07:00
|
|
|
struct list_head scan_item;
|
|
|
|
|
|
|
|
struct completion reg_commit_wait;
|
2016-04-28 21:07:27 +07:00
|
|
|
struct completion notify_finish_wait;
|
2016-05-26 06:15:02 +07:00
|
|
|
int reg_commit_error;
|
2016-04-28 21:07:27 +07:00
|
|
|
int notify_finish_error;
|
2016-05-26 06:15:02 +07:00
|
|
|
int last_error;
|
|
|
|
|
|
|
|
u32 register_gen;
|
2016-04-28 21:07:27 +07:00
|
|
|
u64 notify_id;
|
2011-03-22 05:07:16 +07:00
|
|
|
|
2016-05-26 06:15:02 +07:00
|
|
|
rados_watchcb2_t wcb;
|
|
|
|
rados_watcherrcb_t errcb;
|
|
|
|
void *data;
|
2016-04-28 21:07:27 +07:00
|
|
|
|
|
|
|
struct page ***preply_pages;
|
|
|
|
size_t *preply_len;
|
2011-03-22 05:07:16 +07:00
|
|
|
};
|
|
|
|
|
2009-10-07 01:31:10 +07:00
|
|
|
struct ceph_osd_client {
|
|
|
|
struct ceph_client *client;
|
|
|
|
|
|
|
|
struct ceph_osdmap *osdmap; /* current map */
|
2016-04-28 21:07:26 +07:00
|
|
|
struct rw_semaphore lock;
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
struct rb_root osds; /* osds */
|
2010-02-04 02:00:26 +07:00
|
|
|
struct list_head osd_lru; /* idle osds */
|
2016-04-28 21:07:26 +07:00
|
|
|
spinlock_t osd_lru_lock;
|
2016-04-28 21:07:26 +07:00
|
|
|
struct ceph_osd homeless_osd;
|
|
|
|
atomic64_t last_tid; /* tid of last request */
|
2016-05-26 06:15:02 +07:00
|
|
|
u64 last_linger_id;
|
|
|
|
struct rb_root linger_requests; /* lingering requests */
|
2016-04-28 21:07:27 +07:00
|
|
|
struct rb_root map_checks;
|
|
|
|
struct rb_root linger_map_checks;
|
2016-04-28 21:07:26 +07:00
|
|
|
atomic_t num_requests;
|
|
|
|
atomic_t num_homeless;
|
2009-10-07 01:31:10 +07:00
|
|
|
struct delayed_work timeout_work;
|
2010-02-04 02:00:26 +07:00
|
|
|
struct delayed_work osds_timeout_work;
|
2009-11-13 06:05:52 +07:00
|
|
|
#ifdef CONFIG_DEBUG_FS
|
2009-10-07 01:31:10 +07:00
|
|
|
struct dentry *debugfs_file;
|
2009-11-13 06:05:52 +07:00
|
|
|
#endif
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
mempool_t *req_mempool;
|
|
|
|
|
2010-01-14 08:03:23 +07:00
|
|
|
struct ceph_msgpool msgpool_op;
|
2010-03-02 04:02:00 +07:00
|
|
|
struct ceph_msgpool msgpool_op_reply;
|
2011-03-22 05:07:16 +07:00
|
|
|
|
|
|
|
struct workqueue_struct *notify_wq;
|
2009-10-07 01:31:10 +07:00
|
|
|
};
|
|
|
|
|
2016-04-28 21:07:25 +07:00
|
|
|
static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
|
|
|
|
{
|
|
|
|
return osdc->osdmap->flags & flag;
|
|
|
|
}
|
|
|
|
|
2013-05-02 00:43:04 +07:00
|
|
|
extern int ceph_osdc_setup(void);
|
|
|
|
extern void ceph_osdc_cleanup(void);
|
|
|
|
|
2009-10-07 01:31:10 +07:00
|
|
|
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_client *client);
|
|
|
|
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
|
|
|
|
|
|
|
|
extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_msg *msg);
|
|
|
|
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_msg *msg);
|
|
|
|
|
2013-02-12 01:33:24 +07:00
|
|
|
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
|
2015-04-27 10:09:54 +07:00
|
|
|
unsigned int which, u16 opcode, u32 flags);
|
2013-02-12 01:33:24 +07:00
|
|
|
|
|
|
|
extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
|
|
|
|
unsigned int which,
|
|
|
|
struct page **pages, u64 length,
|
|
|
|
u32 alignment, bool pages_from_pool,
|
|
|
|
bool own_pages);
|
|
|
|
|
2013-04-05 13:27:11 +07:00
|
|
|
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
|
|
|
|
unsigned int which, u16 opcode,
|
libceph: define source request op functions
The rbd code has a function that allocates and populates a
ceph_osd_req_op structure (the in-core version of an osd request
operation). When reviewed, Josh suggested two things: that the
big varargs function might be better split into type-specific
functions; and that this functionality really belongs in the osd
client rather than rbd.
This patch implements both of Josh's suggestions. It breaks
up the rbd function into separate functions and defines them
in the osd client module as exported interfaces. Unlike the
rbd version, however, the functions don't allocate an osd_req_op
structure; they are provided the address of one and that is
initialized instead.
The rbd function has been eliminated and calls to it have been
replaced by calls to the new routines. The rbd code now now use a
stack (struct) variable to hold the op rather than allocating and
freeing it each time.
For now only the capabilities used by rbd are implemented.
Implementing all the other osd op types, and making the rest of the
code use it will be done separately, in the next few patches.
Note that only the extent, cls, and watch portions of the
ceph_osd_req_op structure are currently used. Delete the others
(xattr, pgls, and snap) from its definition so nobody thinks it's
actually implemented or needed. We can add it back again later
if needed, when we know it's been tested.
This (and a few follow-on patches) resolves:
http://tracker.ceph.com/issues/3861
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
2013-03-14 08:50:00 +07:00
|
|
|
u64 offset, u64 length,
|
|
|
|
u64 truncate_size, u32 truncate_seq);
|
2013-04-05 13:27:11 +07:00
|
|
|
extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
|
|
|
|
unsigned int which, u64 length);
|
2016-01-07 16:32:54 +07:00
|
|
|
extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
|
|
|
|
unsigned int which, u64 offset_inc);
|
2013-04-05 13:27:12 +07:00
|
|
|
|
|
|
|
extern struct ceph_osd_data *osd_req_op_extent_osd_data(
|
|
|
|
struct ceph_osd_request *osd_req,
|
2013-04-16 02:50:36 +07:00
|
|
|
unsigned int which);
|
2013-04-05 13:27:12 +07:00
|
|
|
|
|
|
|
extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
|
2013-04-16 02:50:36 +07:00
|
|
|
unsigned int which,
|
2013-04-05 13:27:12 +07:00
|
|
|
struct page **pages, u64 length,
|
|
|
|
u32 alignment, bool pages_from_pool,
|
|
|
|
bool own_pages);
|
|
|
|
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
|
2013-04-16 02:50:36 +07:00
|
|
|
unsigned int which,
|
2013-04-05 13:27:12 +07:00
|
|
|
struct ceph_pagelist *pagelist);
|
|
|
|
#ifdef CONFIG_BLOCK
|
|
|
|
extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
|
2013-04-16 02:50:36 +07:00
|
|
|
unsigned int which,
|
2013-04-05 13:27:12 +07:00
|
|
|
struct bio *bio, size_t bio_length);
|
|
|
|
#endif /* CONFIG_BLOCK */
|
|
|
|
|
2013-04-06 02:46:02 +07:00
|
|
|
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
|
|
|
|
unsigned int which,
|
|
|
|
struct ceph_pagelist *pagelist);
|
2013-04-20 03:34:49 +07:00
|
|
|
extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
|
|
|
|
unsigned int which,
|
|
|
|
struct page **pages, u64 length,
|
|
|
|
u32 alignment, bool pages_from_pool,
|
|
|
|
bool own_pages);
|
2013-04-05 13:27:12 +07:00
|
|
|
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
|
2013-04-05 13:27:11 +07:00
|
|
|
unsigned int which,
|
2013-04-05 13:27:12 +07:00
|
|
|
struct page **pages, u64 length,
|
|
|
|
u32 alignment, bool pages_from_pool,
|
|
|
|
bool own_pages);
|
|
|
|
|
2013-04-05 13:27:11 +07:00
|
|
|
extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
|
|
|
|
unsigned int which, u16 opcode,
|
2013-04-06 02:46:02 +07:00
|
|
|
const char *class, const char *method);
|
2014-11-12 13:00:43 +07:00
|
|
|
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
|
|
|
|
u16 opcode, const char *name, const void *value,
|
|
|
|
size_t size, u8 cmp_op, u8 cmp_mode);
|
2014-02-25 21:22:27 +07:00
|
|
|
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
|
|
|
|
unsigned int which,
|
|
|
|
u64 expected_object_size,
|
|
|
|
u64 expected_write_size);
|
libceph: define source request op functions
The rbd code has a function that allocates and populates a
ceph_osd_req_op structure (the in-core version of an osd request
operation). When reviewed, Josh suggested two things: that the
big varargs function might be better split into type-specific
functions; and that this functionality really belongs in the osd
client rather than rbd.
This patch implements both of Josh's suggestions. It breaks
up the rbd function into separate functions and defines them
in the osd client module as exported interfaces. Unlike the
rbd version, however, the functions don't allocate an osd_req_op
structure; they are provided the address of one and that is
initialized instead.
The rbd function has been eliminated and calls to it have been
replaced by calls to the new routines. The rbd code now now use a
stack (struct) variable to hold the op rather than allocating and
freeing it each time.
For now only the capabilities used by rbd are implemented.
Implementing all the other osd op types, and making the rest of the
code use it will be done separately, in the next few patches.
Note that only the extent, cls, and watch portions of the
ceph_osd_req_op structure are currently used. Delete the others
(xattr, pgls, and snap) from its definition so nobody thinks it's
actually implemented or needed. We can add it back again later
if needed, when we know it's been tested.
This (and a few follow-on patches) resolves:
http://tracker.ceph.com/issues/3861
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
2013-03-14 08:50:00 +07:00
|
|
|
|
2010-04-07 04:51:47 +07:00
|
|
|
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_snap_context *snapc,
|
2013-03-15 02:09:05 +07:00
|
|
|
unsigned int num_ops,
|
2010-04-07 04:51:47 +07:00
|
|
|
bool use_mempool,
|
2012-11-14 10:11:15 +07:00
|
|
|
gfp_t gfp_flags);
|
2016-04-27 19:15:51 +07:00
|
|
|
int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
|
2010-04-07 04:51:47 +07:00
|
|
|
|
2009-10-07 01:31:10 +07:00
|
|
|
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
|
|
|
|
struct ceph_file_layout *layout,
|
|
|
|
struct ceph_vino vino,
|
2013-03-15 02:09:05 +07:00
|
|
|
u64 offset, u64 *len,
|
2014-11-13 13:40:37 +07:00
|
|
|
unsigned int which, int num_ops,
|
|
|
|
int opcode, int flags,
|
2009-10-07 01:31:10 +07:00
|
|
|
struct ceph_snap_context *snapc,
|
2013-03-15 02:09:05 +07:00
|
|
|
u32 truncate_seq, u64 truncate_size,
|
libceph: don't assign page info in ceph_osdc_new_request()
Currently ceph_osdc_new_request() assigns an osd request's
r_num_pages and r_alignment fields. The only thing it does
after that is call ceph_osdc_build_request(), and that doesn't
need those fields to be assigned.
Move the assignment of those fields out of ceph_osdc_new_request()
and into its caller. As a result, the page_align parameter is no
longer used, so get rid of it.
Note that in ceph_sync_write(), the value for req->r_num_pages had
already been calculated earlier (as num_pages, and fortunately
it was computed the same way). So don't bother recomputing it,
but because it's not needed earlier, move that calculation after the
call to ceph_osdc_new_request(). Hold off making the assignment to
r_alignment, doing it instead r_pages and r_num_pages are
getting set.
Similarly, in start_read(), nr_pages already holds the number of
pages in the array (and is calculated the same way), so there's no
need to recompute it. Move the assignment of the page alignment
down with the others there as well.
This and the next few patches are preparation work for:
http://tracker.ceph.com/issues/4127
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
2013-03-02 07:00:15 +07:00
|
|
|
bool use_mempool);
|
2009-10-07 01:31:10 +07:00
|
|
|
|
2014-06-20 17:14:42 +07:00
|
|
|
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
|
|
|
|
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_osd_request *req,
|
|
|
|
bool nofail);
|
2014-06-19 14:38:13 +07:00
|
|
|
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
|
2009-10-07 01:31:10 +07:00
|
|
|
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_osd_request *req);
|
|
|
|
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
|
|
|
|
|
2013-08-29 11:43:09 +07:00
|
|
|
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
|
2016-04-28 21:07:28 +07:00
|
|
|
void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
|
2013-08-29 11:43:09 +07:00
|
|
|
|
2009-10-07 01:31:10 +07:00
|
|
|
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_vino vino,
|
|
|
|
struct ceph_file_layout *layout,
|
|
|
|
u64 off, u64 *plen,
|
|
|
|
u32 truncate_seq, u64 truncate_size,
|
2010-11-10 03:43:12 +07:00
|
|
|
struct page **pages, int nr_pages,
|
|
|
|
int page_align);
|
2009-10-07 01:31:10 +07:00
|
|
|
|
|
|
|
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_vino vino,
|
|
|
|
struct ceph_file_layout *layout,
|
|
|
|
struct ceph_snap_context *sc,
|
|
|
|
u64 off, u64 len,
|
|
|
|
u32 truncate_seq, u64 truncate_size,
|
|
|
|
struct timespec *mtime,
|
2013-02-16 00:42:29 +07:00
|
|
|
struct page **pages, int nr_pages);
|
2009-10-07 01:31:10 +07:00
|
|
|
|
2016-05-26 06:15:02 +07:00
|
|
|
/* watch/notify */
|
|
|
|
struct ceph_osd_linger_request *
|
|
|
|
ceph_osdc_watch(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_object_id *oid,
|
|
|
|
struct ceph_object_locator *oloc,
|
|
|
|
rados_watchcb2_t wcb,
|
|
|
|
rados_watcherrcb_t errcb,
|
|
|
|
void *data);
|
|
|
|
int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_osd_linger_request *lreq);
|
|
|
|
|
|
|
|
int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_object_id *oid,
|
|
|
|
struct ceph_object_locator *oloc,
|
|
|
|
u64 notify_id,
|
|
|
|
u64 cookie,
|
|
|
|
void *payload,
|
|
|
|
size_t payload_len);
|
2016-04-28 21:07:27 +07:00
|
|
|
int ceph_osdc_notify(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_object_id *oid,
|
|
|
|
struct ceph_object_locator *oloc,
|
|
|
|
void *payload,
|
|
|
|
size_t payload_len,
|
|
|
|
u32 timeout,
|
|
|
|
struct page ***preply_pages,
|
|
|
|
size_t *preply_len);
|
2016-04-28 21:07:27 +07:00
|
|
|
int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
|
|
|
|
struct ceph_osd_linger_request *lreq);
|
2009-10-07 01:31:10 +07:00
|
|
|
#endif
|
|
|
|
|