block-5.10-2020-10-12

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl+EWUgQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpnoxEADCVSNBRkpV0OVkOEC3wf8EGhXhk01Jnjtl
 u5Mg2V55hcgJ0thQxBV/V28XyqmsEBrmAVi0Yf8Vr9Qbq4Ze08Wae4ChS4rEOyh1
 jTcGYWx5aJB3ChLvV/HI0nWQ3bkj03mMrL3SW8rhhf5DTyKHsVeTenpx42Qu/FKf
 fRzi09FSr3Pjd0B+EX6gunwJnlyXQC5Fa4AA0GhnXJzAznANXxHkkcXu8a6Yw75x
 e28CfhIBliORsK8sRHLoUnPpeTe1vtxCBhBMsE+gJAj9ZUOWMzvNFIPP4FvfawDy
 6cCQo2m1azJ/IdZZCDjFUWyjh+wxdKMp+NNryEcoV+VlqIoc3n98rFwrSL+GIq5Z
 WVwEwq+AcwoMCsD29Lu1ytL2PQ/RVqcJP5UheMrbL4vzefNfJFumQVZLIcX0k943
 8dFL2QHL+H/hM9Dx5y5rjeiWkAlq75v4xPKVjh/DHb4nehddCqn/+DD5HDhNANHf
 c1kmmEuYhvLpIaC4DHjE6DwLh8TPKahJjwsGuBOTr7D93NUQD+OOWsIhX6mNISIl
 FFhP8cd0/ZZVV//9j+q+5B4BaJsT+ZtwmrelKFnPdwPSnh+3iu8zPRRWO+8P8fRC
 YvddxuJAmE6BLmsAYrdz6Xb/wqfyV44cEiyivF0oBQfnhbtnXwDnkDWSfJD1bvCm
 ZwfpDh2+Tg==
 =LzyE
 -----END PGP SIGNATURE-----

Merge tag 'block-5.10-2020-10-12' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:

 - Series of merge handling cleanups (Baolin, Christoph)

 - Series of blk-throttle fixes and cleanups (Baolin)

 - Series cleaning up BDI, seperating the block device from the
   backing_dev_info (Christoph)

 - Removal of bdget() as a generic API (Christoph)

 - Removal of blkdev_get() as a generic API (Christoph)

 - Cleanup of is-partition checks (Christoph)

 - Series reworking disk revalidation (Christoph)

 - Series cleaning up bio flags (Christoph)

 - bio crypt fixes (Eric)

 - IO stats inflight tweak (Gabriel)

 - blk-mq tags fixes (Hannes)

 - Buffer invalidation fixes (Jan)

 - Allow soft limits for zone append (Johannes)

 - Shared tag set improvements (John, Kashyap)

 - Allow IOPRIO_CLASS_RT for CAP_SYS_NICE (Khazhismel)

 - DM no-wait support (Mike, Konstantin)

 - Request allocation improvements (Ming)

 - Allow md/dm/bcache to use IO stat helpers (Song)

 - Series improving blk-iocost (Tejun)

 - Various cleanups (Geert, Damien, Danny, Julia, Tetsuo, Tian, Wang,
   Xianting, Yang, Yufen, yangerkun)

* tag 'block-5.10-2020-10-12' of git://git.kernel.dk/linux-block: (191 commits)
  block: fix uapi blkzoned.h comments
  blk-mq: move cancel of hctx->run_work to the front of blk_exit_queue
  blk-mq: get rid of the dead flush handle code path
  block: get rid of unnecessary local variable
  block: fix comment and add lockdep assert
  blk-mq: use helper function to test hw stopped
  block: use helper function to test queue register
  block: remove redundant mq check
  block: invoke blk_mq_exit_sched no matter whether have .exit_sched
  percpu_ref: don't refer to ref->data if it isn't allocated
  block: ratelimit handle_bad_sector() message
  blk-throttle: Re-use the throtl_set_slice_end()
  blk-throttle: Open code __throtl_de/enqueue_tg()
  blk-throttle: Move service tree validation out of the throtl_rb_first()
  blk-throttle: Move the list operation after list validation
  blk-throttle: Fix IO hang for a corner case
  blk-throttle: Avoid tracking latency if low limit is invalid
  blk-throttle: Avoid getting the current time if tg->last_finish_time is 0
  blk-throttle: Remove a meaningless parameter for throtl_downgrade_state()
  block: Remove redundant 'return' statement
  ...
This commit is contained in:
Linus Torvalds 2020-10-13 12:12:44 -07:00
commit 3ad11d7ac8
144 changed files with 3218 additions and 2435 deletions

View File

@ -488,9 +488,6 @@ getgeo: no
swap_slot_free_notify: no (see below) swap_slot_free_notify: no (see below)
======================= =================== ======================= ===================
unlock_native_capacity and revalidate_disk are called only from
check_disk_change().
swap_slot_free_notify is called with swap_lock and sometimes the page lock swap_slot_free_notify is called with swap_lock and sometimes the page lock
held. held.

View File

@ -181,7 +181,7 @@ HDIO_SET_UNMASKINTR
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1] - EINVAL value out of range [0 1]
- EBUSY Controller busy - EBUSY Controller busy
@ -231,7 +231,7 @@ HDIO_SET_MULTCOUNT
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range supported by disk. - EINVAL value out of range supported by disk.
- EBUSY Controller busy or blockmode already set. - EBUSY Controller busy or blockmode already set.
@ -295,7 +295,7 @@ HDIO_GET_IDENTITY
the ATA specification. the ATA specification.
error returns: error returns:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- ENOMSG IDENTIFY DEVICE information not available - ENOMSG IDENTIFY DEVICE information not available
notes: notes:
@ -355,7 +355,7 @@ HDIO_SET_KEEPSETTINGS
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1] - EINVAL value out of range [0 1]
- EBUSY Controller busy - EBUSY Controller busy
@ -1055,7 +1055,7 @@ HDIO_SET_32BIT
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 3] - EINVAL value out of range [0 3]
- EBUSY Controller busy - EBUSY Controller busy
@ -1085,7 +1085,7 @@ HDIO_SET_NOWERR
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1] - EINVAL value out of range [0 1]
- EBUSY Controller busy - EBUSY Controller busy
@ -1113,7 +1113,7 @@ HDIO_SET_DMA
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1] - EINVAL value out of range [0 1]
- EBUSY Controller busy - EBUSY Controller busy
@ -1141,7 +1141,7 @@ HDIO_SET_PIO_MODE
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 255] - EINVAL value out of range [0 255]
- EBUSY Controller busy - EBUSY Controller busy
@ -1237,7 +1237,7 @@ HDIO_SET_WCACHE
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1] - EINVAL value out of range [0 1]
- EBUSY Controller busy - EBUSY Controller busy
@ -1265,7 +1265,7 @@ HDIO_SET_ACOUSTIC
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 254] - EINVAL value out of range [0 254]
- EBUSY Controller busy - EBUSY Controller busy
@ -1305,7 +1305,7 @@ HDIO_SET_ADDRESS
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 2] - EINVAL value out of range [0 2]
- EBUSY Controller busy - EBUSY Controller busy
@ -1331,7 +1331,7 @@ HDIO_SET_IDE_SCSI
error return: error return:
- EINVAL (bdev != bdev->bd_contains) (not sure what this means) - EINVAL Called on a partition instead of the whole disk device
- EACCES Access denied: requires CAP_SYS_ADMIN - EACCES Access denied: requires CAP_SYS_ADMIN
- EINVAL value out of range [0 1] - EINVAL value out of range [0 1]
- EBUSY Controller busy - EBUSY Controller busy

View File

@ -161,8 +161,6 @@ config BLK_WBT_MQ
depends on BLK_WBT depends on BLK_WBT
help help
Enable writeback throttling by default on multiqueue devices. Enable writeback throttling by default on multiqueue devices.
Multiqueue currently doesn't have support for IO scheduling,
enabling this option is recommended.
config BLK_DEBUG_FS config BLK_DEBUG_FS
bool "Block layer debugging information in debugfs" bool "Block layer debugging information in debugfs"

View File

@ -4640,6 +4640,9 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
{ {
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
if (!atomic_read(&hctx->elevator_queued))
return false;
/* /*
* Avoiding lock: a race on bfqd->busy_queues should cause at * Avoiding lock: a race on bfqd->busy_queues should cause at
* most a call to dispatch for nothing * most a call to dispatch for nothing
@ -5554,6 +5557,7 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
rq = list_first_entry(list, struct request, queuelist); rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist); list_del_init(&rq->queuelist);
bfq_insert_request(hctx, rq, at_head); bfq_insert_request(hctx, rq, at_head);
atomic_inc(&hctx->elevator_queued);
} }
} }
@ -5921,6 +5925,7 @@ static void bfq_finish_requeue_request(struct request *rq)
bfq_completed_request(bfqq, bfqd); bfq_completed_request(bfqq, bfqd);
bfq_finish_requeue_request_body(bfqq); bfq_finish_requeue_request_body(bfqq);
atomic_dec(&rq->mq_hctx->elevator_queued);
spin_unlock_irqrestore(&bfqd->lock, flags); spin_unlock_irqrestore(&bfqd->lock, flags);
} else { } else {
@ -6360,8 +6365,8 @@ static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
struct blk_mq_tags *tags = hctx->sched_tags; struct blk_mq_tags *tags = hctx->sched_tags;
unsigned int min_shallow; unsigned int min_shallow;
min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); min_shallow = bfq_update_depths(bfqd, tags->bitmap_tags);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); sbitmap_queue_min_shallow_depth(tags->bitmap_tags, min_shallow);
} }
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)

View File

@ -713,20 +713,18 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
__bio_clone_fast(b, bio); __bio_clone_fast(b, bio);
bio_crypt_clone(b, bio, gfp_mask); if (bio_crypt_clone(b, bio, gfp_mask) < 0)
goto err_put;
if (bio_integrity(bio)) { if (bio_integrity(bio) &&
int ret; bio_integrity_clone(b, bio, gfp_mask) < 0)
goto err_put;
ret = bio_integrity_clone(b, bio, gfp_mask);
if (ret < 0) {
bio_put(b);
return NULL;
}
}
return b; return b;
err_put:
bio_put(b);
return NULL;
} }
EXPORT_SYMBOL(bio_clone_fast); EXPORT_SYMBOL(bio_clone_fast);

View File

@ -119,6 +119,8 @@ static void blkg_async_bio_workfn(struct work_struct *work)
async_bio_work); async_bio_work);
struct bio_list bios = BIO_EMPTY_LIST; struct bio_list bios = BIO_EMPTY_LIST;
struct bio *bio; struct bio *bio;
struct blk_plug plug;
bool need_plug = false;
/* as long as there are pending bios, @blkg can't go away */ /* as long as there are pending bios, @blkg can't go away */
spin_lock_bh(&blkg->async_bio_lock); spin_lock_bh(&blkg->async_bio_lock);
@ -126,8 +128,15 @@ static void blkg_async_bio_workfn(struct work_struct *work)
bio_list_init(&blkg->async_bios); bio_list_init(&blkg->async_bios);
spin_unlock_bh(&blkg->async_bio_lock); spin_unlock_bh(&blkg->async_bio_lock);
/* start plug only when bio_list contains at least 2 bios */
if (bios.head && bios.head->bi_next) {
need_plug = true;
blk_start_plug(&plug);
}
while ((bio = bio_list_pop(&bios))) while ((bio = bio_list_pop(&bios)))
submit_bio(bio); submit_bio(bio);
if (need_plug)
blk_finish_plug(&plug);
} }
/** /**
@ -1613,16 +1622,24 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
{ {
unsigned long pflags; unsigned long pflags;
bool clamp;
u64 now = ktime_to_ns(ktime_get()); u64 now = ktime_to_ns(ktime_get());
u64 exp; u64 exp;
u64 delay_nsec = 0; u64 delay_nsec = 0;
int tok; int tok;
while (blkg->parent) { while (blkg->parent) {
if (atomic_read(&blkg->use_delay)) { int use_delay = atomic_read(&blkg->use_delay);
if (use_delay) {
u64 this_delay;
blkcg_scale_delay(blkg, now); blkcg_scale_delay(blkg, now);
delay_nsec = max_t(u64, delay_nsec, this_delay = atomic64_read(&blkg->delay_nsec);
atomic64_read(&blkg->delay_nsec)); if (this_delay > delay_nsec) {
delay_nsec = this_delay;
clamp = use_delay > 0;
}
} }
blkg = blkg->parent; blkg = blkg->parent;
} }
@ -1634,9 +1651,12 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
* Let's not sleep for all eternity if we've amassed a huge delay. * Let's not sleep for all eternity if we've amassed a huge delay.
* Swapping or metadata IO can accumulate 10's of seconds worth of * Swapping or metadata IO can accumulate 10's of seconds worth of
* delay, and we want userspace to be able to do _something_ so cap the * delay, and we want userspace to be able to do _something_ so cap the
* delays at 1 second. If there's 10's of seconds worth of delay then * delays at 0.25s. If there's 10's of seconds worth of delay then the
* the tasks will be delayed for 1 second for every syscall. * tasks will be delayed for 0.25 second for every syscall. If
* blkcg_set_delay() was used as indicated by negative use_delay, the
* caller is responsible for regulating the range.
*/ */
if (clamp)
delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
if (use_memdelay) if (use_memdelay)

View File

@ -116,8 +116,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->__sector = (sector_t) -1; rq->__sector = (sector_t) -1;
INIT_HLIST_NODE(&rq->hash); INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node); RB_CLEAR_NODE(&rq->rb_node);
rq->tag = -1; rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = -1; rq->internal_tag = BLK_MQ_NO_TAG;
rq->start_time_ns = ktime_get_ns(); rq->start_time_ns = ktime_get_ns();
rq->part = NULL; rq->part = NULL;
refcount_set(&rq->ref, 1); refcount_set(&rq->ref, 1);
@ -538,11 +538,10 @@ struct request_queue *blk_alloc_queue(int node_id)
if (!q->stats) if (!q->stats)
goto fail_stats; goto fail_stats;
q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
q->backing_dev_info->io_pages = VM_READAHEAD_PAGES;
q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
q->node = node_id; q->node = node_id;
atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
timer_setup(&q->backing_dev_info->laptop_mode_wb_timer, timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
laptop_mode_timer_fn, 0); laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0); timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
@ -643,171 +642,14 @@ void blk_put_request(struct request *req)
} }
EXPORT_SYMBOL(blk_put_request); EXPORT_SYMBOL(blk_put_request);
static void blk_account_io_merge_bio(struct request *req)
{
if (!blk_do_io_stat(req))
return;
part_stat_lock();
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
part_stat_unlock();
}
bool bio_attempt_back_merge(struct request *req, struct bio *bio,
unsigned int nr_segs)
{
const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
if (!ll_back_merge_fn(req, bio, nr_segs))
return false;
trace_block_bio_backmerge(req->q, req, bio);
rq_qos_merge(req->q, req, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
bio_crypt_free_ctx(bio);
blk_account_io_merge_bio(req);
return true;
}
bool bio_attempt_front_merge(struct request *req, struct bio *bio,
unsigned int nr_segs)
{
const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
if (!ll_front_merge_fn(req, bio, nr_segs))
return false;
trace_block_bio_frontmerge(req->q, req, bio);
rq_qos_merge(req->q, req, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
bio->bi_next = req->bio;
req->bio = bio;
req->__sector = bio->bi_iter.bi_sector;
req->__data_len += bio->bi_iter.bi_size;
bio_crypt_do_front_merge(req, bio);
blk_account_io_merge_bio(req);
return true;
}
bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short segments = blk_rq_nr_discard_segments(req);
if (segments >= queue_max_discard_segments(q))
goto no_merge;
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
goto no_merge;
rq_qos_merge(q, req, bio);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
req->nr_phys_segments = segments + 1;
blk_account_io_merge_bio(req);
return true;
no_merge:
req_set_nomerge(q, req);
return false;
}
/**
* blk_attempt_plug_merge - try to merge with %current's plugged list
* @q: request_queue new bio is being queued at
* @bio: new bio being queued
* @nr_segs: number of segments in @bio
* @same_queue_rq: pointer to &struct request that gets filled in when
* another request associated with @q is found on the plug list
* (optional, may be %NULL)
*
* Determine whether @bio being queued on @q can be merged with a request
* on %current's plugged list. Returns %true if merge was successful,
* otherwise %false.
*
* Plugging coalesces IOs from the same issuer for the same purpose without
* going through @q->queue_lock. As such it's more of an issuing mechanism
* than scheduling, and the request, while may have elvpriv data, is not
* added on the elevator at this point. In addition, we don't have
* reliable access to the elevator outside queue lock. Only check basic
* merging parameters without querying the elevator.
*
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **same_queue_rq)
{
struct blk_plug *plug;
struct request *rq;
struct list_head *plug_list;
plug = blk_mq_plug(q, bio);
if (!plug)
return false;
plug_list = &plug->mq_list;
list_for_each_entry_reverse(rq, plug_list, queuelist) {
bool merged = false;
if (rq->q == q && same_queue_rq) {
/*
* Only blk-mq multiple hardware queues case checks the
* rq in the same queue, there should be only one such
* rq in a queue
**/
*same_queue_rq = rq;
}
if (rq->q != q || !blk_rq_merge_ok(rq, bio))
continue;
switch (blk_try_merge(rq, bio)) {
case ELEVATOR_BACK_MERGE:
merged = bio_attempt_back_merge(rq, bio, nr_segs);
break;
case ELEVATOR_FRONT_MERGE:
merged = bio_attempt_front_merge(rq, bio, nr_segs);
break;
case ELEVATOR_DISCARD_MERGE:
merged = bio_attempt_discard_merge(q, rq, bio);
break;
default:
break;
}
if (merged)
return true;
}
return false;
}
static void handle_bad_sector(struct bio *bio, sector_t maxsector) static void handle_bad_sector(struct bio *bio, sector_t maxsector)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
printk(KERN_INFO "attempt to access beyond end of device\n"); pr_info_ratelimited("attempt to access beyond end of device\n"
printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", "%s: rw=%d, want=%llu, limit=%llu\n",
bio_devname(bio, b), bio->bi_opf, bio_devname(bio, b), bio->bi_opf,
(unsigned long long)bio_end_sector(bio), bio_end_sector(bio), maxsector);
(long long)maxsector);
} }
#ifdef CONFIG_FAIL_MAKE_REQUEST #ifdef CONFIG_FAIL_MAKE_REQUEST
@ -971,9 +813,9 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
/* /*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP * For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue is not a request based queue. * if queue does not support NOWAIT.
*/ */
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
goto not_supported; goto not_supported;
if (should_fail_bio(bio)) if (should_fail_bio(bio))
@ -1301,14 +1143,28 @@ EXPORT_SYMBOL(submit_bio);
* limits when retrying requests on other queues. Those requests need * limits when retrying requests on other queues. Those requests need
* to be checked against the new queue limits again during dispatch. * to be checked against the new queue limits again during dispatch.
*/ */
static int blk_cloned_rq_check_limits(struct request_queue *q, static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
struct request *rq) struct request *rq)
{ {
if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
if (blk_rq_sectors(rq) > max_sectors) {
/*
* SCSI device does not have a good way to return if
* Write Same/Zero is actually supported. If a device rejects
* a non-read/write command (discard, write same,etc.) the
* low-level device driver will set the relevant queue limit to
* 0 to prevent blk-lib from issuing more of the offending
* operations. Commands queued prior to the queue limit being
* reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
* errors being propagated to upper layers.
*/
if (max_sectors == 0)
return BLK_STS_NOTSUPP;
printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", printk(KERN_ERR "%s: over max size limit. (%u > %u)\n",
__func__, blk_rq_sectors(rq), __func__, blk_rq_sectors(rq), max_sectors);
blk_queue_get_max_sectors(q, req_op(rq))); return BLK_STS_IOERR;
return -EIO;
} }
/* /*
@ -1321,10 +1177,10 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
if (rq->nr_phys_segments > queue_max_segments(q)) { if (rq->nr_phys_segments > queue_max_segments(q)) {
printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n", printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
__func__, rq->nr_phys_segments, queue_max_segments(q)); __func__, rq->nr_phys_segments, queue_max_segments(q));
return -EIO; return BLK_STS_IOERR;
} }
return 0; return BLK_STS_OK;
} }
/** /**
@ -1334,8 +1190,11 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
*/ */
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq) blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{ {
if (blk_cloned_rq_check_limits(q, rq)) blk_status_t ret;
return BLK_STS_IOERR;
ret = blk_cloned_rq_check_limits(q, rq);
if (ret != BLK_STS_OK)
return ret;
if (rq->rq_disk && if (rq->rq_disk &&
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
@ -1461,10 +1320,9 @@ void blk_account_io_start(struct request *rq)
part_stat_unlock(); part_stat_unlock();
} }
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, static unsigned long __part_start_io_acct(struct hd_struct *part,
unsigned int op) unsigned int sectors, unsigned int op)
{ {
struct hd_struct *part = &disk->part0;
const int sgrp = op_stat_group(op); const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies); unsigned long now = READ_ONCE(jiffies);
@ -1477,12 +1335,26 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
return now; return now;
} }
unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part,
struct bio *bio)
{
*part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector);
return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio));
}
EXPORT_SYMBOL_GPL(part_start_io_acct);
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
unsigned int op)
{
return __part_start_io_acct(&disk->part0, sectors, op);
}
EXPORT_SYMBOL(disk_start_io_acct); EXPORT_SYMBOL(disk_start_io_acct);
void disk_end_io_acct(struct gendisk *disk, unsigned int op, static void __part_end_io_acct(struct hd_struct *part, unsigned int op,
unsigned long start_time) unsigned long start_time)
{ {
struct hd_struct *part = &disk->part0;
const int sgrp = op_stat_group(op); const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies); unsigned long now = READ_ONCE(jiffies);
unsigned long duration = now - start_time; unsigned long duration = now - start_time;
@ -1493,6 +1365,20 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
part_stat_local_dec(part, in_flight[op_is_write(op)]); part_stat_local_dec(part, in_flight[op_is_write(op)]);
part_stat_unlock(); part_stat_unlock();
} }
void part_end_io_acct(struct hd_struct *part, struct bio *bio,
unsigned long start_time)
{
__part_end_io_acct(part, bio_op(bio), start_time);
hd_struct_put(part);
}
EXPORT_SYMBOL_GPL(part_end_io_acct);
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
unsigned long start_time)
{
__part_end_io_acct(&disk->part0, op, start_time);
}
EXPORT_SYMBOL(disk_end_io_acct); EXPORT_SYMBOL(disk_end_io_acct);
/* /*
@ -1730,9 +1616,11 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
if (rq->bio) { if (rq->bio) {
rq->biotail->bi_next = bio; rq->biotail->bi_next = bio;
rq->biotail = bio; rq->biotail = bio;
} else } else {
rq->bio = rq->biotail = bio; rq->bio = rq->biotail = bio;
} }
bio = NULL;
}
/* Copy attributes of the original request to the clone request. */ /* Copy attributes of the original request to the clone request. */
rq->__sector = blk_rq_pos(rq_src); rq->__sector = blk_rq_pos(rq_src);
@ -1744,8 +1632,8 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->nr_phys_segments = rq_src->nr_phys_segments; rq->nr_phys_segments = rq_src->nr_phys_segments;
rq->ioprio = rq_src->ioprio; rq->ioprio = rq_src->ioprio;
if (rq->bio) if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask); goto free_and_out;
return 0; return 0;

View File

@ -142,13 +142,24 @@ static inline void blk_crypto_free_request(struct request *rq)
__blk_crypto_free_request(rq); __blk_crypto_free_request(rq);
} }
void __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio, int __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
gfp_t gfp_mask); gfp_t gfp_mask);
static inline void blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio, /**
* blk_crypto_rq_bio_prep - Prepare a request's crypt_ctx when its first bio
* is inserted
* @rq: The request to prepare
* @bio: The first bio being inserted into the request
* @gfp_mask: Memory allocation flags
*
* Return: 0 on success, -ENOMEM if out of memory. -ENOMEM is only possible if
* @gfp_mask doesn't include %__GFP_DIRECT_RECLAIM.
*/
static inline int blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
if (bio_has_crypt_ctx(bio)) if (bio_has_crypt_ctx(bio))
__blk_crypto_rq_bio_prep(rq, bio, gfp_mask); return __blk_crypto_rq_bio_prep(rq, bio, gfp_mask);
return 0;
} }
/** /**

View File

@ -81,7 +81,15 @@ subsys_initcall(bio_crypt_ctx_init);
void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key, void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key,
const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], gfp_t gfp_mask) const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], gfp_t gfp_mask)
{ {
struct bio_crypt_ctx *bc = mempool_alloc(bio_crypt_ctx_pool, gfp_mask); struct bio_crypt_ctx *bc;
/*
* The caller must use a gfp_mask that contains __GFP_DIRECT_RECLAIM so
* that the mempool_alloc() can't fail.
*/
WARN_ON_ONCE(!(gfp_mask & __GFP_DIRECT_RECLAIM));
bc = mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
bc->bc_key = key; bc->bc_key = key;
memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun)); memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun));
@ -95,10 +103,13 @@ void __bio_crypt_free_ctx(struct bio *bio)
bio->bi_crypt_context = NULL; bio->bi_crypt_context = NULL;
} }
void __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
{ {
dst->bi_crypt_context = mempool_alloc(bio_crypt_ctx_pool, gfp_mask); dst->bi_crypt_context = mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
if (!dst->bi_crypt_context)
return -ENOMEM;
*dst->bi_crypt_context = *src->bi_crypt_context; *dst->bi_crypt_context = *src->bi_crypt_context;
return 0;
} }
EXPORT_SYMBOL_GPL(__bio_crypt_clone); EXPORT_SYMBOL_GPL(__bio_crypt_clone);
@ -280,20 +291,16 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
return false; return false;
} }
/** int __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
* __blk_crypto_rq_bio_prep - Prepare a request's crypt_ctx when its first bio
* is inserted
*
* @rq: The request to prepare
* @bio: The first bio being inserted into the request
* @gfp_mask: gfp mask
*/
void __blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
if (!rq->crypt_ctx) if (!rq->crypt_ctx) {
rq->crypt_ctx = mempool_alloc(bio_crypt_ctx_pool, gfp_mask); rq->crypt_ctx = mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
if (!rq->crypt_ctx)
return -ENOMEM;
}
*rq->crypt_ctx = *bio->bi_crypt_context; *rq->crypt_ctx = *bio->bi_crypt_context;
return 0;
} }
/** /**

View File

@ -183,7 +183,6 @@ bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
return true; return true;
} }
EXPORT_SYMBOL(blk_integrity_merge_rq);
bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
struct bio *bio) struct bio *bio)
@ -212,7 +211,6 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
return true; return true;
} }
EXPORT_SYMBOL(blk_integrity_merge_bio);
struct integrity_sysfs_entry { struct integrity_sysfs_entry {
struct attribute attr; struct attribute attr;
@ -408,7 +406,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
bi->tuple_size = template->tuple_size; bi->tuple_size = template->tuple_size;
bi->tag_size = template->tag_size; bi->tag_size = template->tag_size;
disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
#ifdef CONFIG_BLK_INLINE_ENCRYPTION #ifdef CONFIG_BLK_INLINE_ENCRYPTION
if (disk->queue->ksm) { if (disk->queue->ksm) {
@ -428,7 +426,7 @@ EXPORT_SYMBOL(blk_integrity_register);
*/ */
void blk_integrity_unregister(struct gendisk *disk) void blk_integrity_unregister(struct gendisk *disk)
{ {
disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue);
memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity));
} }
EXPORT_SYMBOL(blk_integrity_unregister); EXPORT_SYMBOL(blk_integrity_unregister);

File diff suppressed because it is too large Load Diff

View File

@ -1046,7 +1046,7 @@ static int __init iolatency_init(void)
static void __exit iolatency_exit(void) static void __exit iolatency_exit(void)
{ {
return blkcg_policy_unregister(&blkcg_policy_iolatency); blkcg_policy_unregister(&blkcg_policy_iolatency);
} }
module_init(iolatency_init); module_init(iolatency_init);

View File

@ -64,7 +64,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
return -EINVAL; return -EINVAL;
/* In case the discard request is in a partition */ /* In case the discard request is in a partition */
if (bdev->bd_partno) if (bdev_is_partition(bdev))
part_offset = bdev->bd_part->start_sect; part_offset = bdev->bd_part->start_sect;
while (nr_sects) { while (nr_sects) {

View File

@ -12,7 +12,8 @@
#include "blk.h" #include "blk.h"
struct bio_map_data { struct bio_map_data {
int is_our_pages; bool is_our_pages : 1;
bool is_null_mapped : 1;
struct iov_iter iter; struct iov_iter iter;
struct iovec iov[]; struct iovec iov[];
}; };
@ -108,7 +109,7 @@ static int bio_uncopy_user(struct bio *bio)
struct bio_map_data *bmd = bio->bi_private; struct bio_map_data *bmd = bio->bi_private;
int ret = 0; int ret = 0;
if (!bio_flagged(bio, BIO_NULL_MAPPED)) { if (!bmd->is_null_mapped) {
/* /*
* if we're in a workqueue, the request is orphaned, so * if we're in a workqueue, the request is orphaned, so
* don't copy into a random user address space, just free * don't copy into a random user address space, just free
@ -126,24 +127,12 @@ static int bio_uncopy_user(struct bio *bio)
return ret; return ret;
} }
/** static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
* bio_copy_user_iov - copy user data to bio struct iov_iter *iter, gfp_t gfp_mask)
* @q: destination block queue
* @map_data: pointer to the rq_map_data holding pages (if necessary)
* @iter: iovec iterator
* @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
static struct bio *bio_copy_user_iov(struct request_queue *q,
struct rq_map_data *map_data, struct iov_iter *iter,
gfp_t gfp_mask)
{ {
struct bio_map_data *bmd; struct bio_map_data *bmd;
struct page *page; struct page *page;
struct bio *bio; struct bio *bio, *bounce_bio;
int i = 0, ret; int i = 0, ret;
int nr_pages; int nr_pages;
unsigned int len = iter->count; unsigned int len = iter->count;
@ -151,14 +140,15 @@ static struct bio *bio_copy_user_iov(struct request_queue *q,
bmd = bio_alloc_map_data(iter, gfp_mask); bmd = bio_alloc_map_data(iter, gfp_mask);
if (!bmd) if (!bmd)
return ERR_PTR(-ENOMEM); return -ENOMEM;
/* /*
* We need to do a deep copy of the iov_iter including the iovecs. * We need to do a deep copy of the iov_iter including the iovecs.
* The caller provided iov might point to an on-stack or otherwise * The caller provided iov might point to an on-stack or otherwise
* shortlived one. * shortlived one.
*/ */
bmd->is_our_pages = map_data ? 0 : 1; bmd->is_our_pages = !map_data;
bmd->is_null_mapped = (map_data && map_data->null_mapped);
nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
if (nr_pages > BIO_MAX_PAGES) if (nr_pages > BIO_MAX_PAGES)
@ -168,8 +158,7 @@ static struct bio *bio_copy_user_iov(struct request_queue *q,
bio = bio_kmalloc(gfp_mask, nr_pages); bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio) if (!bio)
goto out_bmd; goto out_bmd;
bio->bi_opf |= req_op(rq);
ret = 0;
if (map_data) { if (map_data) {
nr_pages = 1 << map_data->page_order; nr_pages = 1 << map_data->page_order;
@ -186,7 +175,7 @@ static struct bio *bio_copy_user_iov(struct request_queue *q,
if (map_data) { if (map_data) {
if (i == map_data->nr_entries * nr_pages) { if (i == map_data->nr_entries * nr_pages) {
ret = -ENOMEM; ret = -ENOMEM;
break; goto cleanup;
} }
page = map_data->pages[i / nr_pages]; page = map_data->pages[i / nr_pages];
@ -194,14 +183,14 @@ static struct bio *bio_copy_user_iov(struct request_queue *q,
i++; i++;
} else { } else {
page = alloc_page(q->bounce_gfp | gfp_mask); page = alloc_page(rq->q->bounce_gfp | gfp_mask);
if (!page) { if (!page) {
ret = -ENOMEM; ret = -ENOMEM;
break; goto cleanup;
} }
} }
if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
if (!map_data) if (!map_data)
__free_page(page); __free_page(page);
break; break;
@ -211,9 +200,6 @@ static struct bio *bio_copy_user_iov(struct request_queue *q,
offset = 0; offset = 0;
} }
if (ret)
goto cleanup;
if (map_data) if (map_data)
map_data->offset += bio->bi_iter.bi_size; map_data->offset += bio->bi_iter.bi_size;
@ -233,41 +219,42 @@ static struct bio *bio_copy_user_iov(struct request_queue *q,
} }
bio->bi_private = bmd; bio->bi_private = bmd;
if (map_data && map_data->null_mapped)
bio_set_flag(bio, BIO_NULL_MAPPED); bounce_bio = bio;
return bio; ret = blk_rq_append_bio(rq, &bounce_bio);
if (ret)
goto cleanup;
/*
* We link the bounce buffer in and could have to traverse it later, so
* we have to get a ref to prevent it from being freed
*/
bio_get(bounce_bio);
return 0;
cleanup: cleanup:
if (!map_data) if (!map_data)
bio_free_pages(bio); bio_free_pages(bio);
bio_put(bio); bio_put(bio);
out_bmd: out_bmd:
kfree(bmd); kfree(bmd);
return ERR_PTR(ret); return ret;
} }
/** static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
* bio_map_user_iov - map user iovec into bio gfp_t gfp_mask)
* @q: the struct request_queue for the bio
* @iter: iovec iterator
* @gfp_mask: memory allocation flags
*
* Map the user space address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
static struct bio *bio_map_user_iov(struct request_queue *q,
struct iov_iter *iter, gfp_t gfp_mask)
{ {
unsigned int max_sectors = queue_max_hw_sectors(q); unsigned int max_sectors = queue_max_hw_sectors(rq->q);
int j; struct bio *bio, *bounce_bio;
struct bio *bio;
int ret; int ret;
int j;
if (!iov_iter_count(iter)) if (!iov_iter_count(iter))
return ERR_PTR(-EINVAL); return -EINVAL;
bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
if (!bio) if (!bio)
return ERR_PTR(-ENOMEM); return -ENOMEM;
bio->bi_opf |= req_op(rq);
while (iov_iter_count(iter)) { while (iov_iter_count(iter)) {
struct page **pages; struct page **pages;
@ -283,7 +270,7 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
if (unlikely(offs & queue_dma_alignment(q))) { if (unlikely(offs & queue_dma_alignment(rq->q))) {
ret = -EINVAL; ret = -EINVAL;
j = 0; j = 0;
} else { } else {
@ -295,7 +282,7 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
if (n > bytes) if (n > bytes)
n = bytes; n = bytes;
if (!bio_add_hw_page(q, bio, page, n, offs, if (!bio_add_hw_page(rq->q, bio, page, n, offs,
max_sectors, &same_page)) { max_sectors, &same_page)) {
if (same_page) if (same_page)
put_page(page); put_page(page);
@ -319,21 +306,31 @@ static struct bio *bio_map_user_iov(struct request_queue *q,
break; break;
} }
bio_set_flag(bio, BIO_USER_MAPPED);
/* /*
* subtle -- if bio_map_user_iov() ended up bouncing a bio, * Subtle: if we end up needing to bounce a bio, it would normally
* it would normally disappear when its bi_end_io is run. * disappear when its bi_end_io is run. However, we need the original
* however, we need it for the unmap, so grab an extra * bio for the unmap, so grab an extra reference to it
* reference to it
*/ */
bio_get(bio); bio_get(bio);
return bio;
bounce_bio = bio;
ret = blk_rq_append_bio(rq, &bounce_bio);
if (ret)
goto out_put_orig;
/*
* We link the bounce buffer in and could have to traverse it
* later, so we have to get a ref to prevent it from being freed
*/
bio_get(bounce_bio);
return 0;
out_put_orig:
bio_put(bio);
out_unmap: out_unmap:
bio_release_pages(bio, false); bio_release_pages(bio, false);
bio_put(bio); bio_put(bio);
return ERR_PTR(ret); return ret;
} }
/** /**
@ -557,55 +554,6 @@ int blk_rq_append_bio(struct request *rq, struct bio **bio)
} }
EXPORT_SYMBOL(blk_rq_append_bio); EXPORT_SYMBOL(blk_rq_append_bio);
static int __blk_rq_unmap_user(struct bio *bio)
{
int ret = 0;
if (bio) {
if (bio_flagged(bio, BIO_USER_MAPPED))
bio_unmap_user(bio);
else
ret = bio_uncopy_user(bio);
}
return ret;
}
static int __blk_rq_map_user_iov(struct request *rq,
struct rq_map_data *map_data, struct iov_iter *iter,
gfp_t gfp_mask, bool copy)
{
struct request_queue *q = rq->q;
struct bio *bio, *orig_bio;
int ret;
if (copy)
bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
else
bio = bio_map_user_iov(q, iter, gfp_mask);
if (IS_ERR(bio))
return PTR_ERR(bio);
bio->bi_opf &= ~REQ_OP_MASK;
bio->bi_opf |= req_op(rq);
orig_bio = bio;
/*
* We link the bounce buffer in and could have to traverse it
* later so we have to get a ref to prevent it from being freed
*/
ret = blk_rq_append_bio(rq, &bio);
if (ret) {
__blk_rq_unmap_user(orig_bio);
return ret;
}
bio_get(bio);
return 0;
}
/** /**
* blk_rq_map_user_iov - map user data to a request, for passthrough requests * blk_rq_map_user_iov - map user data to a request, for passthrough requests
* @q: request queue where request should be inserted * @q: request queue where request should be inserted
@ -649,7 +597,10 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
i = *iter; i = *iter;
do { do {
ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); if (copy)
ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
else
ret = bio_map_user_iov(rq, &i, gfp_mask);
if (ret) if (ret)
goto unmap_rq; goto unmap_rq;
if (!bio) if (!bio)
@ -700,9 +651,13 @@ int blk_rq_unmap_user(struct bio *bio)
if (unlikely(bio_flagged(bio, BIO_BOUNCED))) if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
mapped_bio = bio->bi_private; mapped_bio = bio->bi_private;
ret2 = __blk_rq_unmap_user(mapped_bio); if (bio->bi_private) {
ret2 = bio_uncopy_user(mapped_bio);
if (ret2 && !ret) if (ret2 && !ret)
ret = ret2; ret = ret2;
} else {
bio_unmap_user(mapped_bio);
}
mapped_bio = bio; mapped_bio = bio;
bio = bio->bi_next; bio = bio->bi_next;

View File

@ -11,6 +11,7 @@
#include <trace/events/block.h> #include <trace/events/block.h>
#include "blk.h" #include "blk.h"
#include "blk-rq-qos.h"
static inline bool bio_will_gap(struct request_queue *q, static inline bool bio_will_gap(struct request_queue *q,
struct request *prev_rq, struct bio *prev, struct bio *next) struct request *prev_rq, struct bio *prev, struct bio *next)
@ -579,7 +580,8 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
return ll_new_hw_segment(req, bio, nr_segs); return ll_new_hw_segment(req, bio, nr_segs);
} }
int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) static int ll_front_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs)
{ {
if (req_gap_front_merge(req, bio)) if (req_gap_front_merge(req, bio))
return 0; return 0;
@ -809,7 +811,8 @@ static struct request *attempt_merge(struct request_queue *q,
return next; return next;
} }
struct request *attempt_back_merge(struct request_queue *q, struct request *rq) static struct request *attempt_back_merge(struct request_queue *q,
struct request *rq)
{ {
struct request *next = elv_latter_request(q, rq); struct request *next = elv_latter_request(q, rq);
@ -819,7 +822,8 @@ struct request *attempt_back_merge(struct request_queue *q, struct request *rq)
return NULL; return NULL;
} }
struct request *attempt_front_merge(struct request_queue *q, struct request *rq) static struct request *attempt_front_merge(struct request_queue *q,
struct request *rq)
{ {
struct request *prev = elv_former_request(q, rq); struct request *prev = elv_former_request(q, rq);
@ -895,3 +899,238 @@ enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
return ELEVATOR_FRONT_MERGE; return ELEVATOR_FRONT_MERGE;
return ELEVATOR_NO_MERGE; return ELEVATOR_NO_MERGE;
} }
static void blk_account_io_merge_bio(struct request *req)
{
if (!blk_do_io_stat(req))
return;
part_stat_lock();
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
part_stat_unlock();
}
enum bio_merge_status {
BIO_MERGE_OK,
BIO_MERGE_NONE,
BIO_MERGE_FAILED,
};
static enum bio_merge_status bio_attempt_back_merge(struct request *req,
struct bio *bio, unsigned int nr_segs)
{
const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
if (!ll_back_merge_fn(req, bio, nr_segs))
return BIO_MERGE_FAILED;
trace_block_bio_backmerge(req->q, req, bio);
rq_qos_merge(req->q, req, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
bio_crypt_free_ctx(bio);
blk_account_io_merge_bio(req);
return BIO_MERGE_OK;
}
static enum bio_merge_status bio_attempt_front_merge(struct request *req,
struct bio *bio, unsigned int nr_segs)
{
const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
if (!ll_front_merge_fn(req, bio, nr_segs))
return BIO_MERGE_FAILED;
trace_block_bio_frontmerge(req->q, req, bio);
rq_qos_merge(req->q, req, bio);
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
blk_rq_set_mixed_merge(req);
bio->bi_next = req->bio;
req->bio = bio;
req->__sector = bio->bi_iter.bi_sector;
req->__data_len += bio->bi_iter.bi_size;
bio_crypt_do_front_merge(req, bio);
blk_account_io_merge_bio(req);
return BIO_MERGE_OK;
}
static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
struct request *req, struct bio *bio)
{
unsigned short segments = blk_rq_nr_discard_segments(req);
if (segments >= queue_max_discard_segments(q))
goto no_merge;
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
goto no_merge;
rq_qos_merge(q, req, bio);
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bio->bi_iter.bi_size;
req->nr_phys_segments = segments + 1;
blk_account_io_merge_bio(req);
return BIO_MERGE_OK;
no_merge:
req_set_nomerge(q, req);
return BIO_MERGE_FAILED;
}
static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
struct request *rq,
struct bio *bio,
unsigned int nr_segs,
bool sched_allow_merge)
{
if (!blk_rq_merge_ok(rq, bio))
return BIO_MERGE_NONE;
switch (blk_try_merge(rq, bio)) {
case ELEVATOR_BACK_MERGE:
if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
return bio_attempt_back_merge(rq, bio, nr_segs);
break;
case ELEVATOR_FRONT_MERGE:
if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
return bio_attempt_front_merge(rq, bio, nr_segs);
break;
case ELEVATOR_DISCARD_MERGE:
return bio_attempt_discard_merge(q, rq, bio);
default:
return BIO_MERGE_NONE;
}
return BIO_MERGE_FAILED;
}
/**
* blk_attempt_plug_merge - try to merge with %current's plugged list
* @q: request_queue new bio is being queued at
* @bio: new bio being queued
* @nr_segs: number of segments in @bio
* @same_queue_rq: pointer to &struct request that gets filled in when
* another request associated with @q is found on the plug list
* (optional, may be %NULL)
*
* Determine whether @bio being queued on @q can be merged with a request
* on %current's plugged list. Returns %true if merge was successful,
* otherwise %false.
*
* Plugging coalesces IOs from the same issuer for the same purpose without
* going through @q->queue_lock. As such it's more of an issuing mechanism
* than scheduling, and the request, while may have elvpriv data, is not
* added on the elevator at this point. In addition, we don't have
* reliable access to the elevator outside queue lock. Only check basic
* merging parameters without querying the elevator.
*
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **same_queue_rq)
{
struct blk_plug *plug;
struct request *rq;
struct list_head *plug_list;
plug = blk_mq_plug(q, bio);
if (!plug)
return false;
plug_list = &plug->mq_list;
list_for_each_entry_reverse(rq, plug_list, queuelist) {
if (rq->q == q && same_queue_rq) {
/*
* Only blk-mq multiple hardware queues case checks the
* rq in the same queue, there should be only one such
* rq in a queue
**/
*same_queue_rq = rq;
}
if (rq->q != q)
continue;
if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
BIO_MERGE_OK)
return true;
}
return false;
}
/*
* Iterate list of requests and see if we can merge this bio with any
* of them.
*/
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio, unsigned int nr_segs)
{
struct request *rq;
int checked = 8;
list_for_each_entry_reverse(rq, list, queuelist) {
if (!checked--)
break;
switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
case BIO_MERGE_NONE:
continue;
case BIO_MERGE_OK:
return true;
case BIO_MERGE_FAILED:
return false;
}
}
return false;
}
EXPORT_SYMBOL_GPL(blk_bio_list_merge);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request)
{
struct request *rq;
switch (elv_merge(q, &rq, bio)) {
case ELEVATOR_BACK_MERGE:
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
return false;
*merged_request = attempt_back_merge(q, rq);
if (!*merged_request)
elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
return true;
case ELEVATOR_FRONT_MERGE:
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
return false;
*merged_request = attempt_front_merge(q, rq);
if (!*merged_request)
elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
return true;
case ELEVATOR_DISCARD_MERGE:
return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
default:
return false;
}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);

View File

@ -116,6 +116,7 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(SAME_FORCE),
QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(DEAD),
QUEUE_FLAG_NAME(INIT_DONE), QUEUE_FLAG_NAME(INIT_DONE),
QUEUE_FLAG_NAME(STABLE_WRITES),
QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(POLL),
QUEUE_FLAG_NAME(WC), QUEUE_FLAG_NAME(WC),
QUEUE_FLAG_NAME(FUA), QUEUE_FLAG_NAME(FUA),
@ -240,7 +241,7 @@ static const char *const alloc_policy_name[] = {
#define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name
static const char *const hctx_flag_name[] = { static const char *const hctx_flag_name[] = {
HCTX_FLAG_NAME(SHOULD_MERGE), HCTX_FLAG_NAME(SHOULD_MERGE),
HCTX_FLAG_NAME(TAG_SHARED), HCTX_FLAG_NAME(TAG_QUEUE_SHARED),
HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(BLOCKING),
HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(NO_SCHED),
HCTX_FLAG_NAME(STACKING), HCTX_FLAG_NAME(STACKING),
@ -452,11 +453,11 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
atomic_read(&tags->active_queues)); atomic_read(&tags->active_queues));
seq_puts(m, "\nbitmap_tags:\n"); seq_puts(m, "\nbitmap_tags:\n");
sbitmap_queue_show(&tags->bitmap_tags, m); sbitmap_queue_show(tags->bitmap_tags, m);
if (tags->nr_reserved_tags) { if (tags->nr_reserved_tags) {
seq_puts(m, "\nbreserved_tags:\n"); seq_puts(m, "\nbreserved_tags:\n");
sbitmap_queue_show(&tags->breserved_tags, m); sbitmap_queue_show(tags->breserved_tags, m);
} }
} }
@ -487,7 +488,7 @@ static int hctx_tags_bitmap_show(void *data, struct seq_file *m)
if (res) if (res)
goto out; goto out;
if (hctx->tags) if (hctx->tags)
sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); sbitmap_bitmap_show(&hctx->tags->bitmap_tags->sb, m);
mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_lock);
out: out:
@ -521,7 +522,7 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m)
if (res) if (res)
goto out; goto out;
if (hctx->sched_tags) if (hctx->sched_tags)
sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags->sb, m);
mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_lock);
out: out:

View File

@ -18,21 +18,6 @@
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
#include "blk-wbt.h" #include "blk-wbt.h"
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *))
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (exit && hctx->sched_data)
exit(hctx);
kfree(hctx->sched_data);
hctx->sched_data = NULL;
}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
void blk_mq_sched_assign_ioc(struct request *rq) void blk_mq_sched_assign_ioc(struct request *rq)
{ {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
@ -359,104 +344,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
} }
} }
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request)
{
struct request *rq;
switch (elv_merge(q, &rq, bio)) {
case ELEVATOR_BACK_MERGE:
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (!bio_attempt_back_merge(rq, bio, nr_segs))
return false;
*merged_request = attempt_back_merge(q, rq);
if (!*merged_request)
elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
return true;
case ELEVATOR_FRONT_MERGE:
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (!bio_attempt_front_merge(rq, bio, nr_segs))
return false;
*merged_request = attempt_front_merge(q, rq);
if (!*merged_request)
elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
return true;
case ELEVATOR_DISCARD_MERGE:
return bio_attempt_discard_merge(q, rq, bio);
default:
return false;
}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
/*
* Iterate list of requests and see if we can merge this bio with any
* of them.
*/
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio, unsigned int nr_segs)
{
struct request *rq;
int checked = 8;
list_for_each_entry_reverse(rq, list, queuelist) {
bool merged = false;
if (!checked--)
break;
if (!blk_rq_merge_ok(rq, bio))
continue;
switch (blk_try_merge(rq, bio)) {
case ELEVATOR_BACK_MERGE:
if (blk_mq_sched_allow_merge(q, rq, bio))
merged = bio_attempt_back_merge(rq, bio,
nr_segs);
break;
case ELEVATOR_FRONT_MERGE:
if (blk_mq_sched_allow_merge(q, rq, bio))
merged = bio_attempt_front_merge(rq, bio,
nr_segs);
break;
case ELEVATOR_DISCARD_MERGE:
merged = bio_attempt_discard_merge(q, rq, bio);
break;
default:
continue;
}
return merged;
}
return false;
}
EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
/*
* Reverse check our software queue for entries that we could potentially
* merge with. Currently includes a hand-wavy stop count of 8, to not spend
* too much time checking for merges.
*/
static bool blk_mq_attempt_merge(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx, struct bio *bio,
unsigned int nr_segs)
{
enum hctx_type type = hctx->type;
lockdep_assert_held(&ctx->lock);
if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
ctx->rq_merged++;
return true;
}
return false;
}
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs) unsigned int nr_segs)
{ {
@ -470,14 +357,24 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
return e->type->ops.bio_merge(hctx, bio, nr_segs); return e->type->ops.bio_merge(hctx, bio, nr_segs);
type = hctx->type; type = hctx->type;
if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
!list_empty_careful(&ctx->rq_lists[type])) { list_empty_careful(&ctx->rq_lists[type]))
return false;
/* default per sw-queue merge */ /* default per sw-queue merge */
spin_lock(&ctx->lock); spin_lock(&ctx->lock);
ret = blk_mq_attempt_merge(q, hctx, ctx, bio, nr_segs); /*
spin_unlock(&ctx->lock); * Reverse check our software queue for entries that we could
* potentially merge with. Currently includes a hand-wavy stop
* count of 8, to not spend too much time checking for merges.
*/
if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
ctx->rq_merged++;
ret = true;
} }
spin_unlock(&ctx->lock);
return ret; return ret;
} }
@ -525,13 +422,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
/* flush rq in flush machinery need to be dispatched directly */ WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
blk_insert_flush(rq);
goto run;
}
WARN_ON(e && (rq->tag != -1));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
/* /*
@ -616,9 +507,11 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx) unsigned int hctx_idx)
{ {
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
if (hctx->sched_tags) { if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
blk_mq_free_rq_map(hctx->sched_tags); blk_mq_free_rq_map(hctx->sched_tags, flags);
hctx->sched_tags = NULL; hctx->sched_tags = NULL;
} }
} }
@ -628,10 +521,12 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
unsigned int hctx_idx) unsigned int hctx_idx)
{ {
struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_tag_set *set = q->tag_set;
/* Clear HCTX_SHARED so tags are init'ed */
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
int ret; int ret;
hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
set->reserved_tags); set->reserved_tags, flags);
if (!hctx->sched_tags) if (!hctx->sched_tags)
return -ENOMEM; return -ENOMEM;
@ -649,8 +544,11 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
int i; int i;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
/* Clear HCTX_SHARED so tags are freed */
unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
if (hctx->sched_tags) { if (hctx->sched_tags) {
blk_mq_free_rq_map(hctx->sched_tags); blk_mq_free_rq_map(hctx->sched_tags, flags);
hctx->sched_tags = NULL; hctx->sched_tags = NULL;
} }
} }

View File

@ -5,9 +5,6 @@
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
void blk_mq_sched_assign_ioc(struct request *rq); void blk_mq_sched_assign_ioc(struct request *rq);
void blk_mq_sched_request_inserted(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq);

View File

@ -36,8 +36,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
kobj); kobj);
cancel_delayed_work_sync(&hctx->run_work);
if (hctx->flags & BLK_MQ_F_BLOCKING) if (hctx->flags & BLK_MQ_F_BLOCKING)
cleanup_srcu_struct(hctx->srcu); cleanup_srcu_struct(hctx->srcu);
blk_free_flush_queue(hctx->fq); blk_free_flush_queue(hctx->fq);

View File

@ -23,9 +23,18 @@
*/ */
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{ {
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
!test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
atomic_inc(&set->active_queues_shared_sbitmap);
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
!test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
atomic_inc(&hctx->tags->active_queues); atomic_inc(&hctx->tags->active_queues);
}
return true; return true;
} }
@ -35,9 +44,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
*/ */
void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
{ {
sbitmap_queue_wake_all(&tags->bitmap_tags); sbitmap_queue_wake_all(tags->bitmap_tags);
if (include_reserve) if (include_reserve)
sbitmap_queue_wake_all(&tags->breserved_tags); sbitmap_queue_wake_all(tags->breserved_tags);
} }
/* /*
@ -47,11 +56,19 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{ {
struct blk_mq_tags *tags = hctx->tags; struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
&q->queue_flags))
return;
atomic_dec(&set->active_queues_shared_sbitmap);
} else {
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return; return;
atomic_dec(&tags->active_queues); atomic_dec(&tags->active_queues);
}
blk_mq_tag_wakeup_all(tags, false); blk_mq_tag_wakeup_all(tags, false);
} }
@ -59,7 +76,8 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
struct sbitmap_queue *bt) struct sbitmap_queue *bt)
{ {
if (!data->q->elevator && !hctx_may_queue(data->hctx, bt)) if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
!hctx_may_queue(data->hctx, bt))
return BLK_MQ_NO_TAG; return BLK_MQ_NO_TAG;
if (data->shallow_depth) if (data->shallow_depth)
@ -82,10 +100,10 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return BLK_MQ_NO_TAG; return BLK_MQ_NO_TAG;
} }
bt = &tags->breserved_tags; bt = tags->breserved_tags;
tag_offset = 0; tag_offset = 0;
} else { } else {
bt = &tags->bitmap_tags; bt = tags->bitmap_tags;
tag_offset = tags->nr_reserved_tags; tag_offset = tags->nr_reserved_tags;
} }
@ -131,9 +149,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
data->ctx); data->ctx);
tags = blk_mq_tags_from_data(data); tags = blk_mq_tags_from_data(data);
if (data->flags & BLK_MQ_REQ_RESERVED) if (data->flags & BLK_MQ_REQ_RESERVED)
bt = &tags->breserved_tags; bt = tags->breserved_tags;
else else
bt = &tags->bitmap_tags; bt = tags->bitmap_tags;
/* /*
* If destination hw queue is changed, fake wake up on * If destination hw queue is changed, fake wake up on
@ -167,10 +185,10 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
const int real_tag = tag - tags->nr_reserved_tags; const int real_tag = tag - tags->nr_reserved_tags;
BUG_ON(real_tag >= tags->nr_tags); BUG_ON(real_tag >= tags->nr_tags);
sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); sbitmap_queue_clear(tags->bitmap_tags, real_tag, ctx->cpu);
} else { } else {
BUG_ON(tag >= tags->nr_reserved_tags); BUG_ON(tag >= tags->nr_reserved_tags);
sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); sbitmap_queue_clear(tags->breserved_tags, tag, ctx->cpu);
} }
} }
@ -197,7 +215,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* We can hit rq == NULL here, because the tagging functions * We can hit rq == NULL here, because the tagging functions
* test and set the bit before assigning ->rqs[]. * test and set the bit before assigning ->rqs[].
*/ */
if (rq && rq->q == hctx->queue) if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
return iter_data->fn(hctx, rq, iter_data->data, reserved); return iter_data->fn(hctx, rq, iter_data->data, reserved);
return true; return true;
} }
@ -298,9 +316,9 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED); WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
if (tags->nr_reserved_tags) if (tags->nr_reserved_tags)
bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, bt_tags_for_each(tags, tags->breserved_tags, fn, priv,
flags | BT_TAG_ITER_RESERVED); flags | BT_TAG_ITER_RESERVED);
bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags); bt_tags_for_each(tags, tags->bitmap_tags, fn, priv, flags);
} }
/** /**
@ -398,9 +416,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
/* /*
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
* while the queue is frozen. So we can use q_usage_counter to avoid * while the queue is frozen. So we can use q_usage_counter to avoid
* racing with it. __blk_mq_update_nr_hw_queues() uses * racing with it.
* synchronize_rcu() to ensure this function left the critical section
* below.
*/ */
if (!percpu_ref_tryget(&q->q_usage_counter)) if (!percpu_ref_tryget(&q->q_usage_counter))
return; return;
@ -416,8 +432,8 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
continue; continue;
if (tags->nr_reserved_tags) if (tags->nr_reserved_tags)
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); bt_for_each(hctx, tags->breserved_tags, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); bt_for_each(hctx, tags->bitmap_tags, fn, priv, false);
} }
blk_queue_exit(q); blk_queue_exit(q);
} }
@ -429,30 +445,64 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
node); node);
} }
static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
int node, int alloc_policy) int node, int alloc_policy)
{ {
unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) if (bt_alloc(&tags->__bitmap_tags, depth, round_robin, node))
goto free_tags; return -ENOMEM;
if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, if (bt_alloc(&tags->__breserved_tags, tags->nr_reserved_tags,
node)) round_robin, node))
goto free_bitmap_tags; goto free_bitmap_tags;
return tags; tags->bitmap_tags = &tags->__bitmap_tags;
tags->breserved_tags = &tags->__breserved_tags;
return 0;
free_bitmap_tags: free_bitmap_tags:
sbitmap_queue_free(&tags->bitmap_tags); sbitmap_queue_free(&tags->__bitmap_tags);
free_tags: return -ENOMEM;
kfree(tags); }
return NULL;
int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int flags)
{
unsigned int depth = set->queue_depth - set->reserved_tags;
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
int i, node = set->numa_node;
if (bt_alloc(&set->__bitmap_tags, depth, round_robin, node))
return -ENOMEM;
if (bt_alloc(&set->__breserved_tags, set->reserved_tags,
round_robin, node))
goto free_bitmap_tags;
for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];
tags->bitmap_tags = &set->__bitmap_tags;
tags->breserved_tags = &set->__breserved_tags;
}
return 0;
free_bitmap_tags:
sbitmap_queue_free(&set->__bitmap_tags);
return -ENOMEM;
}
void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
{
sbitmap_queue_free(&set->__bitmap_tags);
sbitmap_queue_free(&set->__breserved_tags);
} }
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
unsigned int reserved_tags, unsigned int reserved_tags,
int node, int alloc_policy) int node, unsigned int flags)
{ {
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags);
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
if (total_tags > BLK_MQ_TAG_MAX) { if (total_tags > BLK_MQ_TAG_MAX) {
@ -467,13 +517,22 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags; tags->nr_tags = total_tags;
tags->nr_reserved_tags = reserved_tags; tags->nr_reserved_tags = reserved_tags;
return blk_mq_init_bitmap_tags(tags, node, alloc_policy); if (flags & BLK_MQ_F_TAG_HCTX_SHARED)
return tags;
if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) {
kfree(tags);
return NULL;
}
return tags;
} }
void blk_mq_free_tags(struct blk_mq_tags *tags) void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags)
{ {
sbitmap_queue_free(&tags->bitmap_tags); if (!(flags & BLK_MQ_F_TAG_HCTX_SHARED)) {
sbitmap_queue_free(&tags->breserved_tags); sbitmap_queue_free(tags->bitmap_tags);
sbitmap_queue_free(tags->breserved_tags);
}
kfree(tags); kfree(tags);
} }
@ -492,6 +551,8 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
*/ */
if (tdepth > tags->nr_tags) { if (tdepth > tags->nr_tags) {
struct blk_mq_tag_set *set = hctx->queue->tag_set; struct blk_mq_tag_set *set = hctx->queue->tag_set;
/* Only sched tags can grow, so clear HCTX_SHARED flag */
unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
struct blk_mq_tags *new; struct blk_mq_tags *new;
bool ret; bool ret;
@ -506,30 +567,35 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
return -EINVAL; return -EINVAL;
new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth,
tags->nr_reserved_tags); tags->nr_reserved_tags, flags);
if (!new) if (!new)
return -ENOMEM; return -ENOMEM;
ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
if (ret) { if (ret) {
blk_mq_free_rq_map(new); blk_mq_free_rq_map(new, flags);
return -ENOMEM; return -ENOMEM;
} }
blk_mq_free_rqs(set, *tagsptr, hctx->queue_num); blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
blk_mq_free_rq_map(*tagsptr); blk_mq_free_rq_map(*tagsptr, flags);
*tagsptr = new; *tagsptr = new;
} else { } else {
/* /*
* Don't need (or can't) update reserved tags here, they * Don't need (or can't) update reserved tags here, they
* remain static and should never need resizing. * remain static and should never need resizing.
*/ */
sbitmap_queue_resize(&tags->bitmap_tags, sbitmap_queue_resize(tags->bitmap_tags,
tdepth - tags->nr_reserved_tags); tdepth - tags->nr_reserved_tags);
} }
return 0; return 0;
} }
void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size)
{
sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags);
}
/** /**
* blk_mq_unique_tag() - return a tag that is unique queue-wide * blk_mq_unique_tag() - return a tag that is unique queue-wide
* @rq: request for which to compute a unique tag * @rq: request for which to compute a unique tag

View File

@ -2,8 +2,6 @@
#ifndef INT_BLK_MQ_TAG_H #ifndef INT_BLK_MQ_TAG_H
#define INT_BLK_MQ_TAG_H #define INT_BLK_MQ_TAG_H
#include "blk-mq.h"
/* /*
* Tag address space map. * Tag address space map.
*/ */
@ -13,17 +11,25 @@ struct blk_mq_tags {
atomic_t active_queues; atomic_t active_queues;
struct sbitmap_queue bitmap_tags; struct sbitmap_queue *bitmap_tags;
struct sbitmap_queue breserved_tags; struct sbitmap_queue *breserved_tags;
struct sbitmap_queue __bitmap_tags;
struct sbitmap_queue __breserved_tags;
struct request **rqs; struct request **rqs;
struct request **static_rqs; struct request **static_rqs;
struct list_head page_list; struct list_head page_list;
}; };
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
unsigned int reserved_tags,
int node, unsigned int flags);
extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags);
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy); extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set,
extern void blk_mq_free_tags(struct blk_mq_tags *tags); unsigned int flags);
extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
@ -31,6 +37,9 @@ extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tags, struct blk_mq_tags **tags,
unsigned int depth, bool can_grow); unsigned int depth, bool can_grow);
extern void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set,
unsigned int size);
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv); void *priv);
@ -56,7 +65,7 @@ extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{ {
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
return false; return false;
return __blk_mq_tag_busy(hctx); return __blk_mq_tag_busy(hctx);
@ -64,43 +73,12 @@ static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{ {
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
return; return;
__blk_mq_tag_idle(hctx); __blk_mq_tag_idle(hctx);
} }
/*
* For shared tag users, we track the number of currently active users
* and attempt to provide a fair share of the tag depth for each of them.
*/
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
struct sbitmap_queue *bt)
{
unsigned int depth, users;
if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
return true;
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return true;
/*
* Don't try dividing an ant
*/
if (bt->sb.depth == 1)
return true;
users = atomic_read(&hctx->tags->active_queues);
if (!users)
return true;
/*
* Allow at least some tags
*/
depth = max((bt->sb.depth + users - 1) / users, 4U);
return atomic_read(&hctx->nr_active) < depth;
}
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
unsigned int tag) unsigned int tag)
{ {

View File

@ -105,7 +105,7 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{ {
struct mq_inflight *mi = priv; struct mq_inflight *mi = priv;
if (rq->part == mi->part) if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
mi->inflight[rq_data_dir(rq)]++; mi->inflight[rq_data_dir(rq)]++;
return true; return true;
@ -519,7 +519,7 @@ void blk_mq_free_request(struct request *rq)
ctx->rq_completed[rq_is_sync(rq)]++; ctx->rq_completed[rq_is_sync(rq)]++;
if (rq->rq_flags & RQF_MQ_INFLIGHT) if (rq->rq_flags & RQF_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active); __blk_mq_dec_active_requests(hctx);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->backing_dev_info); laptop_io_completion(q->backing_dev_info);
@ -1096,19 +1096,20 @@ static inline unsigned int queued_to_index(unsigned int queued)
static bool __blk_mq_get_driver_tag(struct request *rq) static bool __blk_mq_get_driver_tag(struct request *rq)
{ {
struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; struct sbitmap_queue *bt = rq->mq_hctx->tags->bitmap_tags;
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
int tag; int tag;
blk_mq_tag_busy(rq->mq_hctx); blk_mq_tag_busy(rq->mq_hctx);
if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
bt = &rq->mq_hctx->tags->breserved_tags; bt = rq->mq_hctx->tags->breserved_tags;
tag_offset = 0; tag_offset = 0;
} } else {
if (!hctx_may_queue(rq->mq_hctx, bt)) if (!hctx_may_queue(rq->mq_hctx, bt))
return false; return false;
}
tag = __sbitmap_queue_get(bt); tag = __sbitmap_queue_get(bt);
if (tag == BLK_MQ_NO_TAG) if (tag == BLK_MQ_NO_TAG)
return false; return false;
@ -1124,10 +1125,10 @@ static bool blk_mq_get_driver_tag(struct request *rq)
if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
return false; return false;
if ((hctx->flags & BLK_MQ_F_TAG_SHARED) && if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
!(rq->rq_flags & RQF_MQ_INFLIGHT)) { !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
rq->rq_flags |= RQF_MQ_INFLIGHT; rq->rq_flags |= RQF_MQ_INFLIGHT;
atomic_inc(&hctx->nr_active); __blk_mq_inc_active_requests(hctx);
} }
hctx->tags->rqs[rq->tag] = rq; hctx->tags->rqs[rq->tag] = rq;
return true; return true;
@ -1145,7 +1146,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
struct sbitmap_queue *sbq; struct sbitmap_queue *sbq;
list_del_init(&wait->entry); list_del_init(&wait->entry);
sbq = &hctx->tags->bitmap_tags; sbq = hctx->tags->bitmap_tags;
atomic_dec(&sbq->ws_active); atomic_dec(&sbq->ws_active);
} }
spin_unlock(&hctx->dispatch_wait_lock); spin_unlock(&hctx->dispatch_wait_lock);
@ -1163,12 +1164,12 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
struct request *rq) struct request *rq)
{ {
struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; struct sbitmap_queue *sbq = hctx->tags->bitmap_tags;
struct wait_queue_head *wq; struct wait_queue_head *wq;
wait_queue_entry_t *wait; wait_queue_entry_t *wait;
bool ret; bool ret;
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
blk_mq_sched_mark_restart_hctx(hctx); blk_mq_sched_mark_restart_hctx(hctx);
/* /*
@ -1425,7 +1426,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
bool needs_restart; bool needs_restart;
/* For non-shared tags, the RESTART check will suffice */ /* For non-shared tags, the RESTART check will suffice */
bool no_tag = prep == PREP_DISPATCH_NO_TAG && bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
(hctx->flags & BLK_MQ_F_TAG_SHARED); (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
blk_mq_release_budgets(q, nr_budgets); blk_mq_release_budgets(q, nr_budgets);
@ -1803,7 +1804,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
/* /*
* If we are stopped, don't run the queue. * If we are stopped, don't run the queue.
*/ */
if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) if (blk_mq_hctx_stopped(hctx))
return; return;
__blk_mq_run_hw_queue(hctx); __blk_mq_run_hw_queue(hctx);
@ -1936,13 +1937,18 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
unsigned int nr_segs) unsigned int nr_segs)
{ {
int err;
if (bio->bi_opf & REQ_RAHEAD) if (bio->bi_opf & REQ_RAHEAD)
rq->cmd_flags |= REQ_FAILFAST_MASK; rq->cmd_flags |= REQ_FAILFAST_MASK;
rq->__sector = bio->bi_iter.bi_sector; rq->__sector = bio->bi_iter.bi_sector;
rq->write_hint = bio->bi_write_hint; rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs); blk_rq_bio_prep(rq, bio, nr_segs);
blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
WARN_ON_ONCE(err);
blk_account_io_start(rq); blk_account_io_start(rq);
} }
@ -2296,20 +2302,21 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
} }
} }
void blk_mq_free_rq_map(struct blk_mq_tags *tags) void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags)
{ {
kfree(tags->rqs); kfree(tags->rqs);
tags->rqs = NULL; tags->rqs = NULL;
kfree(tags->static_rqs); kfree(tags->static_rqs);
tags->static_rqs = NULL; tags->static_rqs = NULL;
blk_mq_free_tags(tags); blk_mq_free_tags(tags, flags);
} }
struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx, unsigned int hctx_idx,
unsigned int nr_tags, unsigned int nr_tags,
unsigned int reserved_tags) unsigned int reserved_tags,
unsigned int flags)
{ {
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
int node; int node;
@ -2318,8 +2325,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
if (node == NUMA_NO_NODE) if (node == NUMA_NO_NODE)
node = set->numa_node; node = set->numa_node;
tags = blk_mq_init_tags(nr_tags, reserved_tags, node, tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags);
BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
if (!tags) if (!tags)
return NULL; return NULL;
@ -2327,7 +2333,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
node); node);
if (!tags->rqs) { if (!tags->rqs) {
blk_mq_free_tags(tags); blk_mq_free_tags(tags, flags);
return NULL; return NULL;
} }
@ -2336,7 +2342,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
node); node);
if (!tags->static_rqs) { if (!tags->static_rqs) {
kfree(tags->rqs); kfree(tags->rqs);
blk_mq_free_tags(tags); blk_mq_free_tags(tags, flags);
return NULL; return NULL;
} }
@ -2660,6 +2666,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
goto free_hctx; goto free_hctx;
atomic_set(&hctx->nr_active, 0); atomic_set(&hctx->nr_active, 0);
atomic_set(&hctx->elevator_queued, 0);
if (node == NUMA_NO_NODE) if (node == NUMA_NO_NODE)
node = set->numa_node; node = set->numa_node;
hctx->numa_node = node; hctx->numa_node = node;
@ -2668,7 +2675,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
spin_lock_init(&hctx->lock); spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch); INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q; hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
INIT_LIST_HEAD(&hctx->hctx_list); INIT_LIST_HEAD(&hctx->hctx_list);
@ -2745,10 +2752,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set, static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
int hctx_idx) int hctx_idx)
{ {
unsigned int flags = set->flags;
int ret = 0; int ret = 0;
set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
set->queue_depth, set->reserved_tags); set->queue_depth, set->reserved_tags, flags);
if (!set->tags[hctx_idx]) if (!set->tags[hctx_idx])
return false; return false;
@ -2757,7 +2765,7 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
if (!ret) if (!ret)
return true; return true;
blk_mq_free_rq_map(set->tags[hctx_idx]); blk_mq_free_rq_map(set->tags[hctx_idx], flags);
set->tags[hctx_idx] = NULL; set->tags[hctx_idx] = NULL;
return false; return false;
} }
@ -2765,9 +2773,11 @@ static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
unsigned int hctx_idx) unsigned int hctx_idx)
{ {
unsigned int flags = set->flags;
if (set->tags && set->tags[hctx_idx]) { if (set->tags && set->tags[hctx_idx]) {
blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx); blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
blk_mq_free_rq_map(set->tags[hctx_idx]); blk_mq_free_rq_map(set->tags[hctx_idx], flags);
set->tags[hctx_idx] = NULL; set->tags[hctx_idx] = NULL;
} }
} }
@ -2885,13 +2895,13 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
if (shared) if (shared)
hctx->flags |= BLK_MQ_F_TAG_SHARED; hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
else else
hctx->flags &= ~BLK_MQ_F_TAG_SHARED; hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
} }
} }
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
bool shared) bool shared)
{ {
struct request_queue *q; struct request_queue *q;
@ -2913,9 +2923,9 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
list_del(&q->tag_set_list); list_del(&q->tag_set_list);
if (list_is_singular(&set->tag_list)) { if (list_is_singular(&set->tag_list)) {
/* just transitioned to unshared */ /* just transitioned to unshared */
set->flags &= ~BLK_MQ_F_TAG_SHARED; set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
/* update existing queue */ /* update existing queue */
blk_mq_update_tag_set_depth(set, false); blk_mq_update_tag_set_shared(set, false);
} }
mutex_unlock(&set->tag_list_lock); mutex_unlock(&set->tag_list_lock);
INIT_LIST_HEAD(&q->tag_set_list); INIT_LIST_HEAD(&q->tag_set_list);
@ -2930,12 +2940,12 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
* Check to see if we're transitioning to shared (from 1 to 2 queues). * Check to see if we're transitioning to shared (from 1 to 2 queues).
*/ */
if (!list_empty(&set->tag_list) && if (!list_empty(&set->tag_list) &&
!(set->flags & BLK_MQ_F_TAG_SHARED)) { !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
set->flags |= BLK_MQ_F_TAG_SHARED; set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
/* update existing queue */ /* update existing queue */
blk_mq_update_tag_set_depth(set, true); blk_mq_update_tag_set_shared(set, true);
} }
if (set->flags & BLK_MQ_F_TAG_SHARED) if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
queue_set_hctx_shared(q, true); queue_set_hctx_shared(q, true);
list_add_tail(&q->tag_set_list, &set->tag_list); list_add_tail(&q->tag_set_list, &set->tag_list);
@ -3256,9 +3266,11 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{ {
int i; int i;
for (i = 0; i < set->nr_hw_queues; i++) for (i = 0; i < set->nr_hw_queues; i++) {
if (!__blk_mq_alloc_map_and_request(set, i)) if (!__blk_mq_alloc_map_and_request(set, i))
goto out_unwind; goto out_unwind;
cond_resched();
}
return 0; return 0;
@ -3438,11 +3450,23 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (ret) if (ret)
goto out_free_mq_map; goto out_free_mq_map;
if (blk_mq_is_sbitmap_shared(set->flags)) {
atomic_set(&set->active_queues_shared_sbitmap, 0);
if (blk_mq_init_shared_sbitmap(set, set->flags)) {
ret = -ENOMEM;
goto out_free_mq_rq_maps;
}
}
mutex_init(&set->tag_list_lock); mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list); INIT_LIST_HEAD(&set->tag_list);
return 0; return 0;
out_free_mq_rq_maps:
for (i = 0; i < set->nr_hw_queues; i++)
blk_mq_free_map_and_requests(set, i);
out_free_mq_map: out_free_mq_map:
for (i = 0; i < set->nr_maps; i++) { for (i = 0; i < set->nr_maps; i++) {
kfree(set->map[i].mq_map); kfree(set->map[i].mq_map);
@ -3461,6 +3485,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
for (i = 0; i < set->nr_hw_queues; i++) for (i = 0; i < set->nr_hw_queues; i++)
blk_mq_free_map_and_requests(set, i); blk_mq_free_map_and_requests(set, i);
if (blk_mq_is_sbitmap_shared(set->flags))
blk_mq_exit_shared_sbitmap(set);
for (j = 0; j < set->nr_maps; j++) { for (j = 0; j < set->nr_maps; j++) {
kfree(set->map[j].mq_map); kfree(set->map[j].mq_map);
set->map[j].mq_map = NULL; set->map[j].mq_map = NULL;
@ -3497,6 +3524,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
if (!hctx->sched_tags) { if (!hctx->sched_tags) {
ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
false); false);
if (!ret && blk_mq_is_sbitmap_shared(set->flags))
blk_mq_tag_resize_shared_sbitmap(set, nr);
} else { } else {
ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
nr, true); nr, true);

View File

@ -53,11 +53,12 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
*/ */
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx); unsigned int hctx_idx);
void blk_mq_free_rq_map(struct blk_mq_tags *tags); void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags);
struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx, unsigned int hctx_idx,
unsigned int nr_tags, unsigned int nr_tags,
unsigned int reserved_tags); unsigned int reserved_tags,
unsigned int flags);
int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx, unsigned int depth); unsigned int hctx_idx, unsigned int depth);
@ -158,6 +159,11 @@ struct blk_mq_alloc_data {
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
}; };
static inline bool blk_mq_is_sbitmap_shared(unsigned int flags)
{
return flags & BLK_MQ_F_TAG_HCTX_SHARED;
}
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{ {
if (data->q->elevator) if (data->q->elevator)
@ -193,6 +199,28 @@ static inline bool blk_mq_get_dispatch_budget(struct request_queue *q)
return true; return true;
} }
static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
{
if (blk_mq_is_sbitmap_shared(hctx->flags))
atomic_inc(&hctx->queue->nr_active_requests_shared_sbitmap);
else
atomic_inc(&hctx->nr_active);
}
static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
{
if (blk_mq_is_sbitmap_shared(hctx->flags))
atomic_dec(&hctx->queue->nr_active_requests_shared_sbitmap);
else
atomic_dec(&hctx->nr_active);
}
static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
{
if (blk_mq_is_sbitmap_shared(hctx->flags))
return atomic_read(&hctx->queue->nr_active_requests_shared_sbitmap);
return atomic_read(&hctx->nr_active);
}
static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq) struct request *rq)
{ {
@ -201,7 +229,7 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
if (rq->rq_flags & RQF_MQ_INFLIGHT) { if (rq->rq_flags & RQF_MQ_INFLIGHT) {
rq->rq_flags &= ~RQF_MQ_INFLIGHT; rq->rq_flags &= ~RQF_MQ_INFLIGHT;
atomic_dec(&hctx->nr_active); __blk_mq_dec_active_requests(hctx);
} }
} }
@ -253,4 +281,46 @@ static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
return NULL; return NULL;
} }
/*
* For shared tag users, we track the number of currently active users
* and attempt to provide a fair share of the tag depth for each of them.
*/
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
struct sbitmap_queue *bt)
{
unsigned int depth, users;
if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
return true;
/*
* Don't try dividing an ant
*/
if (bt->sb.depth == 1)
return true;
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags))
return true;
users = atomic_read(&set->active_queues_shared_sbitmap);
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return true;
users = atomic_read(&hctx->tags->active_queues);
}
if (!users)
return true;
/*
* Allow at least some tags
*/
depth = max((bt->sb.depth + users - 1) / users, 4U);
return __blk_mq_active_requests(hctx) < depth;
}
#endif #endif

View File

@ -172,15 +172,13 @@ EXPORT_SYMBOL(blk_queue_max_hw_sectors);
* *
* Description: * Description:
* If a driver doesn't want IOs to cross a given chunk size, it can set * If a driver doesn't want IOs to cross a given chunk size, it can set
* this limit and prevent merging across chunks. Note that the chunk size * this limit and prevent merging across chunks. Note that the block layer
* must currently be a power-of-2 in sectors. Also note that the block * must accept a page worth of data at any offset. So if the crossing of
* layer must accept a page worth of data at any offset. So if the * chunks is a hard limitation in the driver, it must still be prepared
* crossing of chunks is a hard limitation in the driver, it must still be * to split single page bios.
* prepared to split single page bios.
**/ **/
void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors) void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
{ {
BUG_ON(!is_power_of_2(chunk_sectors));
q->limits.chunk_sectors = chunk_sectors; q->limits.chunk_sectors = chunk_sectors;
} }
EXPORT_SYMBOL(blk_queue_chunk_sectors); EXPORT_SYMBOL(blk_queue_chunk_sectors);
@ -374,6 +372,19 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
} }
EXPORT_SYMBOL(blk_queue_alignment_offset); EXPORT_SYMBOL(blk_queue_alignment_offset);
void blk_queue_update_readahead(struct request_queue *q)
{
/*
* For read-ahead of large files to be effective, we need to read ahead
* at least twice the optimal I/O size.
*/
q->backing_dev_info->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
q->backing_dev_info->io_pages =
queue_max_sectors(q) >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL_GPL(blk_queue_update_readahead);
/** /**
* blk_limits_io_min - set minimum request size for a device * blk_limits_io_min - set minimum request size for a device
* @limits: the queue limits * @limits: the queue limits
@ -452,6 +463,8 @@ EXPORT_SYMBOL(blk_limits_io_opt);
void blk_queue_io_opt(struct request_queue *q, unsigned int opt) void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
{ {
blk_limits_io_opt(&q->limits, opt); blk_limits_io_opt(&q->limits, opt);
q->backing_dev_info->ra_pages =
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
} }
EXPORT_SYMBOL(blk_queue_io_opt); EXPORT_SYMBOL(blk_queue_io_opt);
@ -534,6 +547,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->io_min = max(t->io_min, b->io_min); t->io_min = max(t->io_min, b->io_min);
t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
t->chunk_sectors = lcm_not_zero(t->chunk_sectors, b->chunk_sectors);
/* Physical block size a multiple of the logical block size? */ /* Physical block size a multiple of the logical block size? */
if (t->physical_block_size & (t->logical_block_size - 1)) { if (t->physical_block_size & (t->logical_block_size - 1)) {
@ -556,6 +570,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
ret = -1; ret = -1;
} }
/* chunk_sectors a multiple of the physical block size? */
if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
t->chunk_sectors = 0;
t->misaligned = 1;
ret = -1;
}
t->raid_partial_stripes_expensive = t->raid_partial_stripes_expensive =
max(t->raid_partial_stripes_expensive, max(t->raid_partial_stripes_expensive,
b->raid_partial_stripes_expensive); b->raid_partial_stripes_expensive);
@ -594,10 +615,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->discard_granularity; t->discard_granularity;
} }
if (b->chunk_sectors)
t->chunk_sectors = min_not_zero(t->chunk_sectors,
b->chunk_sectors);
t->zoned = max(t->zoned, b->zoned); t->zoned = max(t->zoned, b->zoned);
return ret; return ret;
} }
@ -629,8 +646,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
top, bottom); top, bottom);
} }
t->backing_dev_info->io_pages = blk_queue_update_readahead(disk->queue);
t->limits.max_sectors >> (PAGE_SHIFT - 9);
} }
EXPORT_SYMBOL(disk_stack_limits); EXPORT_SYMBOL(disk_stack_limits);

View File

@ -260,14 +260,14 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ #define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
static ssize_t \ static ssize_t \
queue_show_##name(struct request_queue *q, char *page) \ queue_##name##_show(struct request_queue *q, char *page) \
{ \ { \
int bit; \ int bit; \
bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \ bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \
return queue_var_show(neg ? !bit : bit, page); \ return queue_var_show(neg ? !bit : bit, page); \
} \ } \
static ssize_t \ static ssize_t \
queue_store_##name(struct request_queue *q, const char *page, size_t count) \ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \
{ \ { \
unsigned long val; \ unsigned long val; \
ssize_t ret; \ ssize_t ret; \
@ -287,6 +287,7 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \
QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
#undef QUEUE_SYSFS_BIT_FNS #undef QUEUE_SYSFS_BIT_FNS
static ssize_t queue_zoned_show(struct request_queue *q, char *page) static ssize_t queue_zoned_show(struct request_queue *q, char *page)
@ -547,218 +548,73 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
return queue_var_show(blk_queue_dax(q), page); return queue_var_show(blk_queue_dax(q), page);
} }
static struct queue_sysfs_entry queue_requests_entry = { #define QUEUE_RO_ENTRY(_prefix, _name) \
.attr = {.name = "nr_requests", .mode = 0644 }, static struct queue_sysfs_entry _prefix##_entry = { \
.show = queue_requests_show, .attr = { .name = _name, .mode = 0444 }, \
.store = queue_requests_store, .show = _prefix##_show, \
}; };
static struct queue_sysfs_entry queue_ra_entry = { #define QUEUE_RW_ENTRY(_prefix, _name) \
.attr = {.name = "read_ahead_kb", .mode = 0644 }, static struct queue_sysfs_entry _prefix##_entry = { \
.show = queue_ra_show, .attr = { .name = _name, .mode = 0644 }, \
.store = queue_ra_store, .show = _prefix##_show, \
.store = _prefix##_store, \
}; };
static struct queue_sysfs_entry queue_max_sectors_entry = { QUEUE_RW_ENTRY(queue_requests, "nr_requests");
.attr = {.name = "max_sectors_kb", .mode = 0644 }, QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
.show = queue_max_sectors_show, QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
.store = queue_max_sectors_store, QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
}; QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
QUEUE_RW_ENTRY(elv_iosched, "scheduler");
static struct queue_sysfs_entry queue_max_hw_sectors_entry = { QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
.attr = {.name = "max_hw_sectors_kb", .mode = 0444 }, QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
.show = queue_max_hw_sectors_show, QUEUE_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
}; QUEUE_RO_ENTRY(queue_io_min, "minimum_io_size");
QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
static struct queue_sysfs_entry queue_max_segments_entry = { QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
.attr = {.name = "max_segments", .mode = 0444 }, QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
.show = queue_max_segments_show, QUEUE_RO_ENTRY(queue_discard_max_hw, "discard_max_hw_bytes");
}; QUEUE_RW_ENTRY(queue_discard_max, "discard_max_bytes");
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
static struct queue_sysfs_entry queue_max_discard_segments_entry = { QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
.attr = {.name = "max_discard_segments", .mode = 0444 }, QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
.show = queue_max_discard_segments_show, QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
};
static struct queue_sysfs_entry queue_max_integrity_segments_entry = { QUEUE_RO_ENTRY(queue_zoned, "zoned");
.attr = {.name = "max_integrity_segments", .mode = 0444 }, QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
.show = queue_max_integrity_segments_show, QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
}; QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
static struct queue_sysfs_entry queue_max_segment_size_entry = { QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
.attr = {.name = "max_segment_size", .mode = 0444 }, QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
.show = queue_max_segment_size_show, QUEUE_RW_ENTRY(queue_poll, "io_poll");
}; QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
QUEUE_RW_ENTRY(queue_wc, "write_cache");
QUEUE_RO_ENTRY(queue_fua, "fua");
QUEUE_RO_ENTRY(queue_dax, "dax");
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
static struct queue_sysfs_entry queue_iosched_entry = { #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
.attr = {.name = "scheduler", .mode = 0644 }, QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
.show = elv_iosched_show, #endif
.store = elv_iosched_store,
};
/* legacy alias for logical_block_size: */
static struct queue_sysfs_entry queue_hw_sector_size_entry = { static struct queue_sysfs_entry queue_hw_sector_size_entry = {
.attr = {.name = "hw_sector_size", .mode = 0444 }, .attr = {.name = "hw_sector_size", .mode = 0444 },
.show = queue_logical_block_size_show, .show = queue_logical_block_size_show,
}; };
static struct queue_sysfs_entry queue_logical_block_size_entry = { QUEUE_RW_ENTRY(queue_nonrot, "rotational");
.attr = {.name = "logical_block_size", .mode = 0444 }, QUEUE_RW_ENTRY(queue_iostats, "iostats");
.show = queue_logical_block_size_show, QUEUE_RW_ENTRY(queue_random, "add_random");
}; QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
static struct queue_sysfs_entry queue_physical_block_size_entry = {
.attr = {.name = "physical_block_size", .mode = 0444 },
.show = queue_physical_block_size_show,
};
static struct queue_sysfs_entry queue_chunk_sectors_entry = {
.attr = {.name = "chunk_sectors", .mode = 0444 },
.show = queue_chunk_sectors_show,
};
static struct queue_sysfs_entry queue_io_min_entry = {
.attr = {.name = "minimum_io_size", .mode = 0444 },
.show = queue_io_min_show,
};
static struct queue_sysfs_entry queue_io_opt_entry = {
.attr = {.name = "optimal_io_size", .mode = 0444 },
.show = queue_io_opt_show,
};
static struct queue_sysfs_entry queue_discard_granularity_entry = {
.attr = {.name = "discard_granularity", .mode = 0444 },
.show = queue_discard_granularity_show,
};
static struct queue_sysfs_entry queue_discard_max_hw_entry = {
.attr = {.name = "discard_max_hw_bytes", .mode = 0444 },
.show = queue_discard_max_hw_show,
};
static struct queue_sysfs_entry queue_discard_max_entry = {
.attr = {.name = "discard_max_bytes", .mode = 0644 },
.show = queue_discard_max_show,
.store = queue_discard_max_store,
};
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.attr = {.name = "discard_zeroes_data", .mode = 0444 },
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = 0444 },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
.attr = {.name = "write_zeroes_max_bytes", .mode = 0444 },
.show = queue_write_zeroes_max_show,
};
static struct queue_sysfs_entry queue_zone_append_max_entry = {
.attr = {.name = "zone_append_max_bytes", .mode = 0444 },
.show = queue_zone_append_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = 0644 },
.show = queue_show_nonrot,
.store = queue_store_nonrot,
};
static struct queue_sysfs_entry queue_zoned_entry = {
.attr = {.name = "zoned", .mode = 0444 },
.show = queue_zoned_show,
};
static struct queue_sysfs_entry queue_nr_zones_entry = {
.attr = {.name = "nr_zones", .mode = 0444 },
.show = queue_nr_zones_show,
};
static struct queue_sysfs_entry queue_max_open_zones_entry = {
.attr = {.name = "max_open_zones", .mode = 0444 },
.show = queue_max_open_zones_show,
};
static struct queue_sysfs_entry queue_max_active_zones_entry = {
.attr = {.name = "max_active_zones", .mode = 0444 },
.show = queue_max_active_zones_show,
};
static struct queue_sysfs_entry queue_nomerges_entry = {
.attr = {.name = "nomerges", .mode = 0644 },
.show = queue_nomerges_show,
.store = queue_nomerges_store,
};
static struct queue_sysfs_entry queue_rq_affinity_entry = {
.attr = {.name = "rq_affinity", .mode = 0644 },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};
static struct queue_sysfs_entry queue_iostats_entry = {
.attr = {.name = "iostats", .mode = 0644 },
.show = queue_show_iostats,
.store = queue_store_iostats,
};
static struct queue_sysfs_entry queue_random_entry = {
.attr = {.name = "add_random", .mode = 0644 },
.show = queue_show_random,
.store = queue_store_random,
};
static struct queue_sysfs_entry queue_poll_entry = {
.attr = {.name = "io_poll", .mode = 0644 },
.show = queue_poll_show,
.store = queue_poll_store,
};
static struct queue_sysfs_entry queue_poll_delay_entry = {
.attr = {.name = "io_poll_delay", .mode = 0644 },
.show = queue_poll_delay_show,
.store = queue_poll_delay_store,
};
static struct queue_sysfs_entry queue_wc_entry = {
.attr = {.name = "write_cache", .mode = 0644 },
.show = queue_wc_show,
.store = queue_wc_store,
};
static struct queue_sysfs_entry queue_fua_entry = {
.attr = {.name = "fua", .mode = 0444 },
.show = queue_fua_show,
};
static struct queue_sysfs_entry queue_dax_entry = {
.attr = {.name = "dax", .mode = 0444 },
.show = queue_dax_show,
};
static struct queue_sysfs_entry queue_io_timeout_entry = {
.attr = {.name = "io_timeout", .mode = 0644 },
.show = queue_io_timeout_show,
.store = queue_io_timeout_store,
};
static struct queue_sysfs_entry queue_wb_lat_entry = {
.attr = {.name = "wbt_lat_usec", .mode = 0644 },
.show = queue_wb_lat_show,
.store = queue_wb_lat_store,
};
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
static struct queue_sysfs_entry throtl_sample_time_entry = {
.attr = {.name = "throttle_sample_time", .mode = 0644 },
.show = blk_throtl_sample_time_show,
.store = blk_throtl_sample_time_store,
};
#endif
static struct attribute *queue_attrs[] = { static struct attribute *queue_attrs[] = {
&queue_requests_entry.attr, &queue_requests_entry.attr,
@ -769,7 +625,7 @@ static struct attribute *queue_attrs[] = {
&queue_max_discard_segments_entry.attr, &queue_max_discard_segments_entry.attr,
&queue_max_integrity_segments_entry.attr, &queue_max_integrity_segments_entry.attr,
&queue_max_segment_size_entry.attr, &queue_max_segment_size_entry.attr,
&queue_iosched_entry.attr, &elv_iosched_entry.attr,
&queue_hw_sector_size_entry.attr, &queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr, &queue_logical_block_size_entry.attr,
&queue_physical_block_size_entry.attr, &queue_physical_block_size_entry.attr,
@ -791,6 +647,7 @@ static struct attribute *queue_attrs[] = {
&queue_nomerges_entry.attr, &queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr, &queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr, &queue_iostats_entry.attr,
&queue_stable_writes_entry.attr,
&queue_random_entry.attr, &queue_random_entry.attr,
&queue_poll_entry.attr, &queue_poll_entry.attr,
&queue_wc_entry.attr, &queue_wc_entry.attr,
@ -800,7 +657,7 @@ static struct attribute *queue_attrs[] = {
&queue_poll_delay_entry.attr, &queue_poll_delay_entry.attr,
&queue_io_timeout_entry.attr, &queue_io_timeout_entry.attr,
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
&throtl_sample_time_entry.attr, &blk_throtl_sample_time_entry.attr,
#endif #endif
NULL, NULL,
}; };
@ -883,7 +740,6 @@ static void blk_exit_queue(struct request_queue *q)
if (q->elevator) { if (q->elevator) {
ioc_clear_queue(q); ioc_clear_queue(q);
__elevator_exit(q, q->elevator); __elevator_exit(q, q->elevator);
q->elevator = NULL;
} }
/* /*
@ -934,9 +790,16 @@ static void blk_release_queue(struct kobject *kobj)
blk_free_queue_stats(q->stats); blk_free_queue_stats(q->stats);
if (queue_is_mq(q)) if (queue_is_mq(q)) {
struct blk_mq_hw_ctx *hctx;
int i;
cancel_delayed_work_sync(&q->requeue_work); cancel_delayed_work_sync(&q->requeue_work);
queue_for_each_hw_ctx(q, hctx, i)
cancel_delayed_work_sync(&hctx->run_work);
}
blk_exit_queue(q); blk_exit_queue(q);
blk_queue_free_zone_bitmaps(q); blk_queue_free_zone_bitmaps(q);
@ -977,7 +840,6 @@ int blk_register_queue(struct gendisk *disk)
int ret; int ret;
struct device *dev = disk_to_dev(disk); struct device *dev = disk_to_dev(disk);
struct request_queue *q = disk->queue; struct request_queue *q = disk->queue;
bool has_elevator = false;
if (WARN_ON(!q)) if (WARN_ON(!q))
return -ENXIO; return -ENXIO;
@ -1000,6 +862,8 @@ int blk_register_queue(struct gendisk *disk)
percpu_ref_switch_to_percpu(&q->q_usage_counter); percpu_ref_switch_to_percpu(&q->q_usage_counter);
} }
blk_queue_update_readahead(q);
ret = blk_trace_init_sysfs(dev); ret = blk_trace_init_sysfs(dev);
if (ret) if (ret)
return ret; return ret;
@ -1041,7 +905,6 @@ int blk_register_queue(struct gendisk *disk)
kobject_put(&dev->kobj); kobject_put(&dev->kobj);
return ret; return ret;
} }
has_elevator = true;
} }
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
@ -1050,7 +913,7 @@ int blk_register_queue(struct gendisk *disk)
/* Now everything is ready and send out KOBJ_ADD uevent */ /* Now everything is ready and send out KOBJ_ADD uevent */
kobject_uevent(&q->kobj, KOBJ_ADD); kobject_uevent(&q->kobj, KOBJ_ADD);
if (has_elevator) if (q->elevator)
kobject_uevent(&q->elevator->kobj, KOBJ_ADD); kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_lock);

View File

@ -15,10 +15,10 @@
#include "blk-cgroup-rwstat.h" #include "blk-cgroup-rwstat.h"
/* Max dispatch from a group in 1 round */ /* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8; #define THROTL_GRP_QUANTUM 8
/* Total max dispatch from all groups in one round */ /* Total max dispatch from all groups in one round */
static int throtl_quantum = 32; #define THROTL_QUANTUM 32
/* Throttling is performed over a slice and after that slice is renewed */ /* Throttling is performed over a slice and after that slice is renewed */
#define DFL_THROTL_SLICE_HD (HZ / 10) #define DFL_THROTL_SLICE_HD (HZ / 10)
@ -150,7 +150,7 @@ struct throtl_grp {
/* user configured IOPS limits */ /* user configured IOPS limits */
unsigned int iops_conf[2][LIMIT_CNT]; unsigned int iops_conf[2][LIMIT_CNT];
/* Number of bytes disptached in current slice */ /* Number of bytes dispatched in current slice */
uint64_t bytes_disp[2]; uint64_t bytes_disp[2];
/* Number of bio's dispatched in current slice */ /* Number of bio's dispatched in current slice */
unsigned int io_disp[2]; unsigned int io_disp[2];
@ -423,12 +423,13 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
*/ */
static struct bio *throtl_peek_queued(struct list_head *queued) static struct bio *throtl_peek_queued(struct list_head *queued)
{ {
struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); struct throtl_qnode *qn;
struct bio *bio; struct bio *bio;
if (list_empty(queued)) if (list_empty(queued))
return NULL; return NULL;
qn = list_first_entry(queued, struct throtl_qnode, node);
bio = bio_list_peek(&qn->bios); bio = bio_list_peek(&qn->bios);
WARN_ON_ONCE(!bio); WARN_ON_ONCE(!bio);
return bio; return bio;
@ -451,12 +452,13 @@ static struct bio *throtl_peek_queued(struct list_head *queued)
static struct bio *throtl_pop_queued(struct list_head *queued, static struct bio *throtl_pop_queued(struct list_head *queued,
struct throtl_grp **tg_to_put) struct throtl_grp **tg_to_put)
{ {
struct throtl_qnode *qn = list_first_entry(queued, struct throtl_qnode, node); struct throtl_qnode *qn;
struct bio *bio; struct bio *bio;
if (list_empty(queued)) if (list_empty(queued))
return NULL; return NULL;
qn = list_first_entry(queued, struct throtl_qnode, node);
bio = bio_list_pop(&qn->bios); bio = bio_list_pop(&qn->bios);
WARN_ON_ONCE(!bio); WARN_ON_ONCE(!bio);
@ -636,9 +638,6 @@ static struct throtl_grp *
throtl_rb_first(struct throtl_service_queue *parent_sq) throtl_rb_first(struct throtl_service_queue *parent_sq)
{ {
struct rb_node *n; struct rb_node *n;
/* Service tree is empty */
if (!parent_sq->nr_pending)
return NULL;
n = rb_first_cached(&parent_sq->pending_tree); n = rb_first_cached(&parent_sq->pending_tree);
WARN_ON_ONCE(!n); WARN_ON_ONCE(!n);
@ -692,29 +691,21 @@ static void tg_service_queue_add(struct throtl_grp *tg)
leftmost); leftmost);
} }
static void __throtl_enqueue_tg(struct throtl_grp *tg) static void throtl_enqueue_tg(struct throtl_grp *tg)
{ {
if (!(tg->flags & THROTL_TG_PENDING)) {
tg_service_queue_add(tg); tg_service_queue_add(tg);
tg->flags |= THROTL_TG_PENDING; tg->flags |= THROTL_TG_PENDING;
tg->service_queue.parent_sq->nr_pending++; tg->service_queue.parent_sq->nr_pending++;
} }
static void throtl_enqueue_tg(struct throtl_grp *tg)
{
if (!(tg->flags & THROTL_TG_PENDING))
__throtl_enqueue_tg(tg);
}
static void __throtl_dequeue_tg(struct throtl_grp *tg)
{
throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
tg->flags &= ~THROTL_TG_PENDING;
} }
static void throtl_dequeue_tg(struct throtl_grp *tg) static void throtl_dequeue_tg(struct throtl_grp *tg)
{ {
if (tg->flags & THROTL_TG_PENDING) if (tg->flags & THROTL_TG_PENDING) {
__throtl_dequeue_tg(tg); throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
tg->flags &= ~THROTL_TG_PENDING;
}
} }
/* Call with queue lock held */ /* Call with queue lock held */
@ -817,7 +808,7 @@ static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
unsigned long jiffy_end) unsigned long jiffy_end)
{ {
tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice); throtl_set_slice_end(tg, rw, jiffy_end);
throtl_log(&tg->service_queue, throtl_log(&tg->service_queue,
"[%c] extend slice start=%lu end=%lu jiffies=%lu", "[%c] extend slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw], rw == READ ? 'R' : 'W', tg->slice_start[rw],
@ -852,7 +843,7 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
/* /*
* A bio has been dispatched. Also adjust slice_end. It might happen * A bio has been dispatched. Also adjust slice_end. It might happen
* that initially cgroup limit was very low resulting in high * that initially cgroup limit was very low resulting in high
* slice_end, but later limit was bumped up and bio was dispached * slice_end, but later limit was bumped up and bio was dispatched
* sooner, then we need to reduce slice_end. A high bogus slice_end * sooner, then we need to reduce slice_end. A high bogus slice_end
* is bad because it does not allow new slice to start. * is bad because it does not allow new slice to start.
*/ */
@ -894,13 +885,19 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
} }
static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
unsigned long *wait) u32 iops_limit, unsigned long *wait)
{ {
bool rw = bio_data_dir(bio); bool rw = bio_data_dir(bio);
unsigned int io_allowed; unsigned int io_allowed;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
u64 tmp; u64 tmp;
if (iops_limit == UINT_MAX) {
if (wait)
*wait = 0;
return true;
}
jiffy_elapsed = jiffies - tg->slice_start[rw]; jiffy_elapsed = jiffies - tg->slice_start[rw];
/* Round up to the next throttle slice, wait time must be nonzero */ /* Round up to the next throttle slice, wait time must be nonzero */
@ -913,7 +910,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
* have been trimmed. * have been trimmed.
*/ */
tmp = (u64)tg_iops_limit(tg, rw) * jiffy_elapsed_rnd; tmp = (u64)iops_limit * jiffy_elapsed_rnd;
do_div(tmp, HZ); do_div(tmp, HZ);
if (tmp > UINT_MAX) if (tmp > UINT_MAX)
@ -936,13 +933,19 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
} }
static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
unsigned long *wait) u64 bps_limit, unsigned long *wait)
{ {
bool rw = bio_data_dir(bio); bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes, tmp; u64 bytes_allowed, extra_bytes, tmp;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio); unsigned int bio_size = throtl_bio_data_size(bio);
if (bps_limit == U64_MAX) {
if (wait)
*wait = 0;
return true;
}
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
/* Slice has just started. Consider one slice interval */ /* Slice has just started. Consider one slice interval */
@ -951,7 +954,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
tmp = tg_bps_limit(tg, rw) * jiffy_elapsed_rnd; tmp = bps_limit * jiffy_elapsed_rnd;
do_div(tmp, HZ); do_div(tmp, HZ);
bytes_allowed = tmp; bytes_allowed = tmp;
@ -963,7 +966,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
/* Calc approx time to dispatch */ /* Calc approx time to dispatch */
extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw)); jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
if (!jiffy_wait) if (!jiffy_wait)
jiffy_wait = 1; jiffy_wait = 1;
@ -987,6 +990,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
{ {
bool rw = bio_data_dir(bio); bool rw = bio_data_dir(bio);
unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
u64 bps_limit = tg_bps_limit(tg, rw);
u32 iops_limit = tg_iops_limit(tg, rw);
/* /*
* Currently whole state machine of group depends on first bio * Currently whole state machine of group depends on first bio
@ -998,8 +1003,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
bio != throtl_peek_queued(&tg->service_queue.queued[rw])); bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
/* If tg->bps = -1, then BW is unlimited */ /* If tg->bps = -1, then BW is unlimited */
if (tg_bps_limit(tg, rw) == U64_MAX && if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
tg_iops_limit(tg, rw) == UINT_MAX) {
if (wait) if (wait)
*wait = 0; *wait = 0;
return true; return true;
@ -1021,8 +1025,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice); jiffies + tg->td->throtl_slice);
} }
if (tg_with_in_bps_limit(tg, bio, &bps_wait) && if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
tg_with_in_iops_limit(tg, bio, &iops_wait)) { tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait) if (wait)
*wait = 0; *wait = 0;
return true; return true;
@ -1082,7 +1086,7 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
* If @tg doesn't currently have any bios queued in the same * If @tg doesn't currently have any bios queued in the same
* direction, queueing @bio can change when @tg should be * direction, queueing @bio can change when @tg should be
* dispatched. Mark that @tg was empty. This is automatically * dispatched. Mark that @tg was empty. This is automatically
* cleaered on the next tg_update_disptime(). * cleared on the next tg_update_disptime().
*/ */
if (!sq->nr_queued[rw]) if (!sq->nr_queued[rw])
tg->flags |= THROTL_TG_WAS_EMPTY; tg->flags |= THROTL_TG_WAS_EMPTY;
@ -1175,8 +1179,8 @@ static int throtl_dispatch_tg(struct throtl_grp *tg)
{ {
struct throtl_service_queue *sq = &tg->service_queue; struct throtl_service_queue *sq = &tg->service_queue;
unsigned int nr_reads = 0, nr_writes = 0; unsigned int nr_reads = 0, nr_writes = 0;
unsigned int max_nr_reads = throtl_grp_quantum*3/4; unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads; unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
struct bio *bio; struct bio *bio;
/* Try to dispatch 75% READS and 25% WRITES */ /* Try to dispatch 75% READS and 25% WRITES */
@ -1209,9 +1213,13 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
unsigned int nr_disp = 0; unsigned int nr_disp = 0;
while (1) { while (1) {
struct throtl_grp *tg = throtl_rb_first(parent_sq); struct throtl_grp *tg;
struct throtl_service_queue *sq; struct throtl_service_queue *sq;
if (!parent_sq->nr_pending)
break;
tg = throtl_rb_first(parent_sq);
if (!tg) if (!tg)
break; break;
@ -1226,7 +1234,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
if (sq->nr_queued[0] || sq->nr_queued[1]) if (sq->nr_queued[0] || sq->nr_queued[1])
tg_update_disptime(tg); tg_update_disptime(tg);
if (nr_disp >= throtl_quantum) if (nr_disp >= THROTL_QUANTUM)
break; break;
} }
@ -1303,7 +1311,7 @@ static void throtl_pending_timer_fn(struct timer_list *t)
} }
} }
} else { } else {
/* reached the top-level, queue issueing */ /* reached the top-level, queue issuing */
queue_work(kthrotld_workqueue, &td->dispatch_work); queue_work(kthrotld_workqueue, &td->dispatch_work);
} }
out_unlock: out_unlock:
@ -1314,8 +1322,8 @@ static void throtl_pending_timer_fn(struct timer_list *t)
* blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
* @work: work item being executed * @work: work item being executed
* *
* This function is queued for execution when bio's reach the bio_lists[] * This function is queued for execution when bios reach the bio_lists[]
* of throtl_data->service_queue. Those bio's are ready and issued by this * of throtl_data->service_queue. Those bios are ready and issued by this
* function. * function.
*/ */
static void blk_throtl_dispatch_work_fn(struct work_struct *work) static void blk_throtl_dispatch_work_fn(struct work_struct *work)
@ -1428,8 +1436,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
* that a group's limit are dropped suddenly and we don't want to * that a group's limit are dropped suddenly and we don't want to
* account recently dispatched IO with new low rate. * account recently dispatched IO with new low rate.
*/ */
throtl_start_new_slice(tg, 0); throtl_start_new_slice(tg, READ);
throtl_start_new_slice(tg, 1); throtl_start_new_slice(tg, WRITE);
if (tg->flags & THROTL_TG_PENDING) { if (tg->flags & THROTL_TG_PENDING) {
tg_update_disptime(tg); tg_update_disptime(tg);
@ -1674,13 +1682,13 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
goto out_finish; goto out_finish;
ret = -EINVAL; ret = -EINVAL;
if (!strcmp(tok, "rbps")) if (!strcmp(tok, "rbps") && val > 1)
v[0] = val; v[0] = val;
else if (!strcmp(tok, "wbps")) else if (!strcmp(tok, "wbps") && val > 1)
v[1] = val; v[1] = val;
else if (!strcmp(tok, "riops")) else if (!strcmp(tok, "riops") && val > 1)
v[2] = min_t(u64, val, UINT_MAX); v[2] = min_t(u64, val, UINT_MAX);
else if (!strcmp(tok, "wiops")) else if (!strcmp(tok, "wiops") && val > 1)
v[3] = min_t(u64, val, UINT_MAX); v[3] = min_t(u64, val, UINT_MAX);
else if (off == LIMIT_LOW && !strcmp(tok, "idle")) else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
idle_time = val; idle_time = val;
@ -1957,7 +1965,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
queue_work(kthrotld_workqueue, &td->dispatch_work); queue_work(kthrotld_workqueue, &td->dispatch_work);
} }
static void throtl_downgrade_state(struct throtl_data *td, int new) static void throtl_downgrade_state(struct throtl_data *td)
{ {
td->scale /= 2; td->scale /= 2;
@ -1967,7 +1975,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
return; return;
} }
td->limit_index = new; td->limit_index = LIMIT_LOW;
td->low_downgrade_time = jiffies; td->low_downgrade_time = jiffies;
} }
@ -2054,7 +2062,7 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
* cgroups * cgroups
*/ */
if (throtl_hierarchy_can_downgrade(tg)) if (throtl_hierarchy_can_downgrade(tg))
throtl_downgrade_state(tg->td, LIMIT_LOW); throtl_downgrade_state(tg->td);
tg->last_bytes_disp[READ] = 0; tg->last_bytes_disp[READ] = 0;
tg->last_bytes_disp[WRITE] = 0; tg->last_bytes_disp[WRITE] = 0;
@ -2064,10 +2072,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
static void blk_throtl_update_idletime(struct throtl_grp *tg) static void blk_throtl_update_idletime(struct throtl_grp *tg)
{ {
unsigned long now = ktime_get_ns() >> 10; unsigned long now;
unsigned long last_finish_time = tg->last_finish_time; unsigned long last_finish_time = tg->last_finish_time;
if (now <= last_finish_time || last_finish_time == 0 || if (last_finish_time == 0)
return;
now = ktime_get_ns() >> 10;
if (now <= last_finish_time ||
last_finish_time == tg->checked_last_finish_time) last_finish_time == tg->checked_last_finish_time)
return; return;
@ -2083,7 +2095,7 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
unsigned long last_latency[2] = { 0 }; unsigned long last_latency[2] = { 0 };
unsigned long latency[2]; unsigned long latency[2];
if (!blk_queue_nonrot(td->queue)) if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
return; return;
if (time_before(jiffies, td->last_calculate_time + HZ)) if (time_before(jiffies, td->last_calculate_time + HZ))
return; return;
@ -2230,7 +2242,7 @@ bool blk_throtl_bio(struct bio *bio)
/* /*
* @bio passed through this layer without being throttled. * @bio passed through this layer without being throttled.
* Climb up the ladder. If we''re already at the top, it * Climb up the ladder. If we're already at the top, it
* can be executed directly. * can be executed directly.
*/ */
qn = &tg->qnode_on_parent[rw]; qn = &tg->qnode_on_parent[rw];
@ -2321,6 +2333,8 @@ void blk_throtl_bio_endio(struct bio *bio)
if (!blkg) if (!blkg)
return; return;
tg = blkg_to_tg(blkg); tg = blkg_to_tg(blkg);
if (!tg->td->limit_valid[LIMIT_LOW])
return;
finish_time_ns = ktime_get_ns(); finish_time_ns = ktime_get_ns();
tg->last_finish_time = finish_time_ns >> 10; tg->last_finish_time = finish_time_ns >> 10;

View File

@ -114,6 +114,11 @@ static inline bool bio_integrity_endio(struct bio *bio)
return true; return true;
} }
bool blk_integrity_merge_rq(struct request_queue *, struct request *,
struct request *);
bool blk_integrity_merge_bio(struct request_queue *, struct request *,
struct bio *);
static inline bool integrity_req_gap_back_merge(struct request *req, static inline bool integrity_req_gap_back_merge(struct request *req,
struct bio *next) struct bio *next)
{ {
@ -137,6 +142,16 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
void blk_integrity_add(struct gendisk *); void blk_integrity_add(struct gendisk *);
void blk_integrity_del(struct gendisk *); void blk_integrity_del(struct gendisk *);
#else /* CONFIG_BLK_DEV_INTEGRITY */ #else /* CONFIG_BLK_DEV_INTEGRITY */
static inline bool blk_integrity_merge_rq(struct request_queue *rq,
struct request *r1, struct request *r2)
{
return true;
}
static inline bool blk_integrity_merge_bio(struct request_queue *rq,
struct request *r, struct bio *b)
{
return true;
}
static inline bool integrity_req_gap_back_merge(struct request *req, static inline bool integrity_req_gap_back_merge(struct request *req,
struct bio *next) struct bio *next)
{ {
@ -169,14 +184,10 @@ static inline void blk_integrity_del(struct gendisk *disk)
unsigned long blk_rq_timeout(unsigned long timeout); unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req); void blk_add_timer(struct request *req);
bool bio_attempt_front_merge(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool bio_attempt_back_merge(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
struct bio *bio);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **same_queue_rq); unsigned int nr_segs, struct request **same_queue_rq);
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio, unsigned int nr_segs);
void blk_account_io_start(struct request *req); void blk_account_io_start(struct request *req);
void blk_account_io_done(struct request *req, u64 now); void blk_account_io_done(struct request *req, u64 now);
@ -223,10 +234,6 @@ ssize_t part_timeout_store(struct device *, struct device_attribute *,
void __blk_queue_split(struct bio **bio, unsigned int *nr_segs); void __blk_queue_split(struct bio **bio, unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio, int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs); unsigned int nr_segs);
int ll_front_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
struct request *attempt_back_merge(struct request_queue *q, struct request *rq);
struct request *attempt_front_merge(struct request_queue *q, struct request *rq);
int blk_attempt_req_merge(struct request_queue *q, struct request *rq, int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next); struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq); unsigned int blk_recalc_rq_segments(struct request *rq);
@ -350,7 +357,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf);
#define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_NONE 0
#define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_RAID 1
#define ADDPART_FLAG_WHOLEDISK 2 #define ADDPART_FLAG_WHOLEDISK 2
void delete_partition(struct gendisk *disk, struct hd_struct *part); void delete_partition(struct hd_struct *part);
int bdev_add_partition(struct block_device *bdev, int partno, int bdev_add_partition(struct block_device *bdev, int partno,
sector_t start, sector_t length); sector_t start, sector_t length);
int bdev_del_partition(struct block_device *bdev, int partno); int bdev_del_partition(struct block_device *bdev, int partno);

View File

@ -267,22 +267,21 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
break; break;
} }
bio_crypt_clone(bio, bio_src, gfp_mask); if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0)
goto err_put;
if (bio_integrity(bio_src)) { if (bio_integrity(bio_src) &&
int ret; bio_integrity_clone(bio, bio_src, gfp_mask) < 0)
goto err_put;
ret = bio_integrity_clone(bio, bio_src, gfp_mask);
if (ret < 0) {
bio_put(bio);
return NULL;
}
}
bio_clone_blkg_association(bio, bio_src); bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio); blkcg_bio_issue_init(bio);
return bio; return bio;
err_put:
bio_put(bio);
return NULL;
} }
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,

View File

@ -207,7 +207,7 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
BUG_ON(!req->nr_phys_segments); BUG_ON(!req->nr_phys_segments);
buf->sg_list = kzalloc(sz, GFP_KERNEL); buf->sg_list = kmalloc(sz, GFP_KERNEL);
if (!buf->sg_list) if (!buf->sg_list)
return -ENOMEM; return -ENOMEM;
sg_init_table(buf->sg_list, req->nr_phys_segments); sg_init_table(buf->sg_list, req->nr_phys_segments);

View File

@ -191,7 +191,6 @@ static void elevator_release(struct kobject *kobj)
void __elevator_exit(struct request_queue *q, struct elevator_queue *e) void __elevator_exit(struct request_queue *q, struct elevator_queue *e)
{ {
mutex_lock(&e->sysfs_lock); mutex_lock(&e->sysfs_lock);
if (e->type->ops.exit_sched)
blk_mq_exit_sched(q, e); blk_mq_exit_sched(q, e);
mutex_unlock(&e->sysfs_lock); mutex_unlock(&e->sysfs_lock);
@ -480,16 +479,13 @@ static struct kobj_type elv_ktype = {
.release = elevator_release, .release = elevator_release,
}; };
/*
* elv_register_queue is called from either blk_register_queue or
* elevator_switch, elevator switch is prevented from being happen
* in the two paths, so it is safe to not hold q->sysfs_lock.
*/
int elv_register_queue(struct request_queue *q, bool uevent) int elv_register_queue(struct request_queue *q, bool uevent)
{ {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
int error; int error;
lockdep_assert_held(&q->sysfs_lock);
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
if (!error) { if (!error) {
struct elv_fs_entry *attr = e->type->elevator_attrs; struct elv_fs_entry *attr = e->type->elevator_attrs;
@ -508,13 +504,10 @@ int elv_register_queue(struct request_queue *q, bool uevent)
return error; return error;
} }
/*
* elv_unregister_queue is called from either blk_unregister_queue or
* elevator_switch, elevator switch is prevented from being happen
* in the two paths, so it is safe to not hold q->sysfs_lock.
*/
void elv_unregister_queue(struct request_queue *q) void elv_unregister_queue(struct request_queue *q)
{ {
lockdep_assert_held(&q->sysfs_lock);
if (q) { if (q) {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
@ -616,7 +609,7 @@ int elevator_switch_mq(struct request_queue *q,
static inline bool elv_support_iosched(struct request_queue *q) static inline bool elv_support_iosched(struct request_queue *q)
{ {
if (!q->mq_ops || if (!queue_is_mq(q) ||
(q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
return false; return false;
return true; return true;
@ -673,7 +666,7 @@ void elevator_init_mq(struct request_queue *q)
if (!elv_support_iosched(q)) if (!elv_support_iosched(q))
return; return;
WARN_ON_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)); WARN_ON_ONCE(blk_queue_registered(q));
if (unlikely(q->elevator)) if (unlikely(q->elevator))
return; return;
@ -764,7 +757,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
{ {
int ret; int ret;
if (!queue_is_mq(q) || !elv_support_iosched(q)) if (!elv_support_iosched(q))
return count; return count;
ret = __elevator_change(q, name); ret = __elevator_change(q, name);

View File

@ -50,14 +50,13 @@ static void disk_release_events(struct gendisk *disk);
* zero and will not be set to zero * zero and will not be set to zero
*/ */
void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size,
bool revalidate) bool update_bdev)
{ {
sector_t capacity = get_capacity(disk); sector_t capacity = get_capacity(disk);
set_capacity(disk, size); set_capacity(disk, size);
if (update_bdev)
if (revalidate) revalidate_disk_size(disk, true);
revalidate_disk(disk);
if (capacity != size && capacity != 0 && size != 0) { if (capacity != size && capacity != 0 && size != 0) {
char *envp[] = { "RESIZE=1", NULL }; char *envp[] = { "RESIZE=1", NULL };
@ -86,7 +85,7 @@ char *disk_name(struct gendisk *hd, int partno, char *buf)
const char *bdevname(struct block_device *bdev, char *buf) const char *bdevname(struct block_device *bdev, char *buf)
{ {
return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); return disk_name(bdev->bd_disk, bdev->bd_partno, buf);
} }
EXPORT_SYMBOL(bdevname); EXPORT_SYMBOL(bdevname);
@ -110,8 +109,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
} }
} }
static unsigned int part_in_flight(struct request_queue *q, static unsigned int part_in_flight(struct hd_struct *part)
struct hd_struct *part)
{ {
unsigned int inflight = 0; unsigned int inflight = 0;
int cpu; int cpu;
@ -126,8 +124,7 @@ static unsigned int part_in_flight(struct request_queue *q,
return inflight; return inflight;
} }
static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2])
unsigned int inflight[2])
{ {
int cpu; int cpu;
@ -676,11 +673,23 @@ static int exact_lock(dev_t devt, void *data)
return 0; return 0;
} }
static void disk_scan_partitions(struct gendisk *disk)
{
struct block_device *bdev;
if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
return;
set_bit(GD_NEED_PART_SCAN, &disk->state);
bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
if (!IS_ERR(bdev))
blkdev_put(bdev, FMODE_READ);
}
static void register_disk(struct device *parent, struct gendisk *disk, static void register_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups) const struct attribute_group **groups)
{ {
struct device *ddev = disk_to_dev(disk); struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
struct disk_part_iter piter; struct disk_part_iter piter;
struct hd_struct *part; struct hd_struct *part;
int err; int err;
@ -722,25 +731,8 @@ static void register_disk(struct device *parent, struct gendisk *disk,
return; return;
} }
/* No minors to use for partitions */ disk_scan_partitions(disk);
if (!disk_part_scan_enabled(disk))
goto exit;
/* No such device (e.g., media were just removed) */
if (!get_capacity(disk))
goto exit;
bdev = bdget_disk(disk, 0);
if (!bdev)
goto exit;
bdev->bd_invalidated = 1;
err = blkdev_get(bdev, FMODE_READ, NULL);
if (err < 0)
goto exit;
blkdev_put(bdev, FMODE_READ);
exit:
/* announce disk after possible partitions are created */ /* announce disk after possible partitions are created */
dev_set_uevent_suppress(ddev, 0); dev_set_uevent_suppress(ddev, 0);
kobject_uevent(&ddev->kobj, KOBJ_ADD); kobject_uevent(&ddev->kobj, KOBJ_ADD);
@ -913,7 +905,7 @@ void del_gendisk(struct gendisk *disk)
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
while ((part = disk_part_iter_next(&piter))) { while ((part = disk_part_iter_next(&piter))) {
invalidate_partition(disk, part->partno); invalidate_partition(disk, part->partno);
delete_partition(disk, part); delete_partition(part);
} }
disk_part_iter_exit(&piter); disk_part_iter_exit(&piter);
@ -1056,7 +1048,7 @@ struct block_device *bdget_disk(struct gendisk *disk, int partno)
part = disk_get_part(disk, partno); part = disk_get_part(disk, partno);
if (part) if (part)
bdev = bdget(part_devt(part)); bdev = bdget_part(part);
disk_put_part(part); disk_put_part(part);
return bdev; return bdev;
@ -1301,7 +1293,7 @@ ssize_t part_stat_show(struct device *dev,
if (queue_is_mq(q)) if (queue_is_mq(q))
inflight = blk_mq_in_flight(q, p); inflight = blk_mq_in_flight(q, p);
else else
inflight = part_in_flight(q, p); inflight = part_in_flight(p);
return sprintf(buf, return sprintf(buf,
"%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u "
@ -1343,7 +1335,7 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_in_flight_rw(q, p, inflight); blk_mq_in_flight_rw(q, p, inflight);
else else
part_in_flight_rw(q, p, inflight); part_in_flight_rw(p, inflight);
return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
} }
@ -1623,7 +1615,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
if (queue_is_mq(gp->queue)) if (queue_is_mq(gp->queue))
inflight = blk_mq_in_flight(gp->queue, hd); inflight = blk_mq_in_flight(gp->queue, hd);
else else
inflight = part_in_flight(gp->queue, hd); inflight = part_in_flight(hd);
seq_printf(seqf, "%4d %7d %s " seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u " "%lu %lu %lu %u "
@ -1729,19 +1721,20 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
} }
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
if (disk) { if (!disk)
disk->part0.dkstats = alloc_percpu(struct disk_stats);
if (!disk->part0.dkstats) {
kfree(disk);
return NULL; return NULL;
}
disk->part0.dkstats = alloc_percpu(struct disk_stats);
if (!disk->part0.dkstats)
goto out_free_disk;
init_rwsem(&disk->lookup_sem); init_rwsem(&disk->lookup_sem);
disk->node_id = node_id; disk->node_id = node_id;
if (disk_expand_part_tbl(disk, 0)) { if (disk_expand_part_tbl(disk, 0)) {
free_percpu(disk->part0.dkstats); free_percpu(disk->part0.dkstats);
kfree(disk); goto out_free_disk;
return NULL;
} }
ptbl = rcu_dereference_protected(disk->part_tbl, 1); ptbl = rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(ptbl->part[0], &disk->part0); rcu_assign_pointer(ptbl->part[0], &disk->part0);
@ -1755,19 +1748,21 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
* converted to make use of bd_mutex and sequence counters. * converted to make use of bd_mutex and sequence counters.
*/ */
hd_sects_seq_init(&disk->part0); hd_sects_seq_init(&disk->part0);
if (hd_ref_init(&disk->part0)) { if (hd_ref_init(&disk->part0))
hd_free_part(&disk->part0); goto out_free_part0;
kfree(disk);
return NULL;
}
disk->minors = minors; disk->minors = minors;
rand_initialize_disk(disk); rand_initialize_disk(disk);
disk_to_dev(disk)->class = &block_class; disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type; disk_to_dev(disk)->type = &disk_type;
device_initialize(disk_to_dev(disk)); device_initialize(disk_to_dev(disk));
}
return disk; return disk;
out_free_part0:
hd_free_part(&disk->part0);
out_free_disk:
kfree(disk);
return NULL;
} }
EXPORT_SYMBOL(__alloc_disk_node); EXPORT_SYMBOL(__alloc_disk_node);
@ -2052,7 +2047,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
* CONTEXT: * CONTEXT:
* Might sleep. * Might sleep.
*/ */
unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
{ {
struct disk_events *ev = disk->ev; struct disk_events *ev = disk->ev;
unsigned int pending; unsigned int pending;
@ -2090,6 +2085,33 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
return pending; return pending;
} }
/**
* bdev_check_media_change - check if a removable media has been changed
* @bdev: block device to check
*
* Check whether a removable media has been changed, and attempt to free all
* dentries and inodes and invalidates all block device page cache entries in
* that case.
*
* Returns %true if the block device changed, or %false if not.
*/
bool bdev_check_media_change(struct block_device *bdev)
{
unsigned int events;
events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
DISK_EVENT_EJECT_REQUEST);
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return false;
if (__invalidate_device(bdev, true))
pr_warn("VFS: busy inodes on changed media %s\n",
bdev->bd_disk->disk_name);
set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
return true;
}
EXPORT_SYMBOL(bdev_check_media_change);
/* /*
* Separate this part out so that a different pointer for clearing_ptr can be * Separate this part out so that a different pointer for clearing_ptr can be
* passed in for disk_clear_events. * passed in for disk_clear_events.

View File

@ -23,7 +23,7 @@ static int blkpg_do_ioctl(struct block_device *bdev,
return -EACCES; return -EACCES;
if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
return -EFAULT; return -EFAULT;
if (bdev != bdev->bd_contains) if (bdev_is_partition(bdev))
return -EINVAL; return -EINVAL;
if (p.pno <= 0) if (p.pno <= 0)
@ -94,7 +94,7 @@ static int blkdev_reread_part(struct block_device *bdev)
{ {
int ret; int ret;
if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains) if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev))
return -EINVAL; return -EINVAL;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
@ -112,8 +112,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
uint64_t range[2]; uint64_t range[2];
uint64_t start, len; uint64_t start, len;
struct request_queue *q = bdev_get_queue(bdev); struct request_queue *q = bdev_get_queue(bdev);
struct address_space *mapping = bdev->bd_inode->i_mapping; int err;
if (!(mode & FMODE_WRITE)) if (!(mode & FMODE_WRITE))
return -EBADF; return -EBADF;
@ -134,7 +133,11 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
if (start + len > i_size_read(bdev->bd_inode)) if (start + len > i_size_read(bdev->bd_inode))
return -EINVAL; return -EINVAL;
truncate_inode_pages_range(mapping, start, start + len - 1);
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
return err;
return blkdev_issue_discard(bdev, start >> 9, len >> 9, return blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, flags); GFP_KERNEL, flags);
} }
@ -143,8 +146,8 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
unsigned long arg) unsigned long arg)
{ {
uint64_t range[2]; uint64_t range[2];
struct address_space *mapping;
uint64_t start, end, len; uint64_t start, end, len;
int err;
if (!(mode & FMODE_WRITE)) if (!(mode & FMODE_WRITE))
return -EBADF; return -EBADF;
@ -166,8 +169,9 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
return -EINVAL; return -EINVAL;
/* Invalidate the page cache, including dirty pages */ /* Invalidate the page cache, including dirty pages */
mapping = bdev->bd_inode->i_mapping; err = truncate_bdev_range(bdev, mode, start, end);
truncate_inode_pages_range(mapping, start, end); if (err)
return err;
return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
BLKDEV_ZERO_NOUNMAP); BLKDEV_ZERO_NOUNMAP);
@ -474,15 +478,14 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
if (get_user(n, argp)) if (get_user(n, argp))
return -EFAULT; return -EFAULT;
if (!(mode & FMODE_EXCL)) { if (mode & FMODE_EXCL)
bdgrab(bdev); return set_blocksize(bdev, n);
if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
return -EBUSY;
}
if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev)))
return -EBUSY;
ret = set_blocksize(bdev, n); ret = set_blocksize(bdev, n);
if (!(mode & FMODE_EXCL))
blkdev_put(bdev, mode | FMODE_EXCL); blkdev_put(bdev, mode | FMODE_EXCL);
return ret; return ret;
} }

View File

@ -69,7 +69,7 @@ int ioprio_check_cap(int ioprio)
switch (class) { switch (class) {
case IOPRIO_CLASS_RT: case IOPRIO_CLASS_RT:
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
fallthrough; fallthrough;
/* rt has prio field too */ /* rt has prio field too */

View File

@ -359,7 +359,7 @@ static unsigned int kyber_sched_tags_shift(struct request_queue *q)
* All of the hardware queues have the same depth, so we can just grab * All of the hardware queues have the same depth, so we can just grab
* the shift of the first one. * the shift of the first one.
*/ */
return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
} }
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
@ -502,7 +502,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
khd->batching = 0; khd->batching = 0;
hctx->sched_data = khd; hctx->sched_data = khd;
sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags, sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
kqd->async_depth); kqd->async_depth);
return 0; return 0;
@ -573,7 +573,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
bool merged; bool merged;
spin_lock(&kcq->lock); spin_lock(&kcq->lock);
merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs);
spin_unlock(&kcq->lock); spin_unlock(&kcq->lock);
return merged; return merged;

View File

@ -386,6 +386,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
spin_lock(&dd->lock); spin_lock(&dd->lock);
rq = __dd_dispatch_request(dd); rq = __dd_dispatch_request(dd);
spin_unlock(&dd->lock); spin_unlock(&dd->lock);
if (rq)
atomic_dec(&rq->mq_hctx->elevator_queued);
return rq; return rq;
} }
@ -533,6 +535,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
rq = list_first_entry(list, struct request, queuelist); rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist); list_del_init(&rq->queuelist);
dd_insert_request(hctx, rq, at_head); dd_insert_request(hctx, rq, at_head);
atomic_inc(&hctx->elevator_queued);
} }
spin_unlock(&dd->lock); spin_unlock(&dd->lock);
} }
@ -579,6 +582,9 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
{ {
struct deadline_data *dd = hctx->queue->elevator->elevator_data; struct deadline_data *dd = hctx->queue->elevator->elevator_data;
if (!atomic_read(&hctx->elevator_queued))
return false;
return !list_empty_careful(&dd->dispatch) || return !list_empty_careful(&dd->dispatch) ||
!list_empty_careful(&dd->fifo_list[0]) || !list_empty_careful(&dd->fifo_list[0]) ||
!list_empty_careful(&dd->fifo_list[1]); !list_empty_careful(&dd->fifo_list[1]);

View File

@ -199,14 +199,20 @@ static ssize_t part_alignment_offset_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hd_struct *p = dev_to_part(dev); struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
return sprintf(buf, "%u\n",
queue_limit_alignment_offset(&part_to_disk(p)->queue->limits,
p->start_sect));
} }
static ssize_t part_discard_alignment_show(struct device *dev, static ssize_t part_discard_alignment_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hd_struct *p = dev_to_part(dev); struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%u\n", p->discard_alignment);
return sprintf(buf, "%u\n",
queue_limit_discard_alignment(&part_to_disk(p)->queue->limits,
p->start_sect));
} }
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
@ -318,8 +324,9 @@ int hd_ref_init(struct hd_struct *part)
* Must be called either with bd_mutex held, before a disk can be opened or * Must be called either with bd_mutex held, before a disk can be opened or
* after all disk users are gone. * after all disk users are gone.
*/ */
void delete_partition(struct gendisk *disk, struct hd_struct *part) void delete_partition(struct hd_struct *part)
{ {
struct gendisk *disk = part_to_disk(part);
struct disk_part_tbl *ptbl = struct disk_part_tbl *ptbl =
rcu_dereference_protected(disk->part_tbl, 1); rcu_dereference_protected(disk->part_tbl, 1);
@ -327,7 +334,7 @@ void delete_partition(struct gendisk *disk, struct hd_struct *part)
* ->part_tbl is referenced in this part's release handler, so * ->part_tbl is referenced in this part's release handler, so
* we have to hold the disk device * we have to hold the disk device
*/ */
get_device(disk_to_dev(part_to_disk(part))); get_device(disk_to_dev(disk));
rcu_assign_pointer(ptbl->part[part->partno], NULL); rcu_assign_pointer(ptbl->part[part->partno], NULL);
kobject_put(part->holder_dir); kobject_put(part->holder_dir);
device_del(part_to_dev(part)); device_del(part_to_dev(part));
@ -405,10 +412,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
pdev = part_to_dev(p); pdev = part_to_dev(p);
p->start_sect = start; p->start_sect = start;
p->alignment_offset =
queue_limit_alignment_offset(&disk->queue->limits, start);
p->discard_alignment =
queue_limit_discard_alignment(&disk->queue->limits, start);
p->nr_sects = len; p->nr_sects = len;
p->partno = partno; p->partno = partno;
p->policy = get_disk_ro(disk); p->policy = get_disk_ro(disk);
@ -554,7 +557,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
sync_blockdev(bdevp); sync_blockdev(bdevp);
invalidate_bdev(bdevp); invalidate_bdev(bdevp);
delete_partition(bdev->bd_disk, part); delete_partition(part);
ret = 0; ret = 0;
out_unlock: out_unlock:
mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdev->bd_mutex);
@ -577,7 +580,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
return -ENXIO; return -ENXIO;
ret = -ENOMEM; ret = -ENOMEM;
bdevp = bdget(part_devt(part)); bdevp = bdget_part(part);
if (!bdevp) if (!bdevp)
goto out_put_part; goto out_put_part;
@ -592,8 +595,8 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
if (partition_overlaps(bdev->bd_disk, start, length, partno)) if (partition_overlaps(bdev->bd_disk, start, length, partno))
goto out_unlock; goto out_unlock;
part_nr_sects_write(part, (sector_t)length); part_nr_sects_write(part, length);
i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT); bd_set_nr_sectors(bdevp, length);
ret = 0; ret = 0;
out_unlock: out_unlock:
@ -634,7 +637,7 @@ int blk_drop_partitions(struct block_device *bdev)
disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY); disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY);
while ((part = disk_part_iter_next(&piter))) while ((part = disk_part_iter_next(&piter)))
delete_partition(bdev->bd_disk, part); delete_partition(part);
disk_part_iter_exit(&piter); disk_part_iter_exit(&piter);
return 0; return 0;

View File

@ -37,8 +37,6 @@ const unsigned char scsi_command_size_tbl[8] =
}; };
EXPORT_SYMBOL(scsi_command_size_tbl); EXPORT_SYMBOL(scsi_command_size_tbl);
#include <scsi/sg.h>
static int sg_get_version(int __user *p) static int sg_get_version(int __user *p)
{ {
static const int sg_version_num = 30527; static const int sg_version_num = 30527;
@ -847,7 +845,7 @@ EXPORT_SYMBOL(scsi_cmd_ioctl);
int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
{ {
if (bd && bd == bd->bd_contains) if (bd && !bdev_is_partition(bd))
return 0; return 0;
if (capable(CAP_SYS_RAWIO)) if (capable(CAP_SYS_RAWIO))

View File

@ -1670,7 +1670,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
} }
if (mode & (FMODE_READ|FMODE_WRITE)) { if (mode & (FMODE_READ|FMODE_WRITE)) {
check_disk_change(bdev); bdev_check_media_change(bdev);
if (mode & FMODE_WRITE) { if (mode & FMODE_WRITE) {
int wrprot; int wrprot;

View File

@ -347,7 +347,6 @@ aoeblk_gdalloc(void *vp)
mempool_t *mp; mempool_t *mp;
struct request_queue *q; struct request_queue *q;
struct blk_mq_tag_set *set; struct blk_mq_tag_set *set;
enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
ulong flags; ulong flags;
int late = 0; int late = 0;
int err; int err;
@ -407,7 +406,7 @@ aoeblk_gdalloc(void *vp)
WARN_ON(d->gd); WARN_ON(d->gd);
WARN_ON(d->flags & DEVFL_UP); WARN_ON(d->flags & DEVFL_UP);
blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE; blk_queue_io_opt(q, SZ_2M);
d->bufpool = mp; d->bufpool = mp;
d->blkq = gd->queue = q; d->blkq = gd->queue = q;
q->queuedata = d; q->queuedata = d;

View File

@ -900,9 +900,7 @@ aoecmd_sleepwork(struct work_struct *work)
ssize = get_capacity(d->gd); ssize = get_capacity(d->gd);
bd = bdget_disk(d->gd, 0); bd = bdget_disk(d->gd, 0);
if (bd) { if (bd) {
inode_lock(bd->bd_inode); bd_set_nr_sectors(bd, ssize);
i_size_write(bd->bd_inode, (loff_t)ssize<<9);
inode_unlock(bd->bd_inode);
bdput(bd); bdput(bd);
} }
spin_lock_irq(&d->lock); spin_lock_irq(&d->lock);

View File

@ -1732,7 +1732,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
/* invalidate the buffer track to force a reread */ /* invalidate the buffer track to force a reread */
BufferDrive = -1; BufferDrive = -1;
set_bit(drive, &fake_change); set_bit(drive, &fake_change);
check_disk_change(bdev); if (bdev_check_media_change(bdev))
floppy_revalidate(bdev->bd_disk);
return 0; return 0;
default: default:
return -EINVAL; return -EINVAL;
@ -1909,7 +1910,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
return 0; return 0;
if (mode & (FMODE_READ|FMODE_WRITE)) { if (mode & (FMODE_READ|FMODE_WRITE)) {
check_disk_change(bdev); if (bdev_check_media_change(bdev))
floppy_revalidate(bdev->bd_disk);
if (mode & FMODE_WRITE) { if (mode & FMODE_WRITE) {
if (p->wpstat) { if (p->wpstat) {
if (p->ref < 0) if (p->ref < 0)
@ -1953,7 +1955,6 @@ static const struct block_device_operations floppy_fops = {
.release = floppy_release, .release = floppy_release,
.ioctl = fd_ioctl, .ioctl = fd_ioctl,
.check_events = floppy_check_events, .check_events = floppy_check_events,
.revalidate_disk= floppy_revalidate,
}; };
static const struct blk_mq_ops ataflop_mq_ops = { static const struct blk_mq_ops ataflop_mq_ops = {

View File

@ -403,7 +403,6 @@ static struct brd_device *brd_alloc(int i)
disk->flags = GENHD_FL_EXT_DEVT; disk->flags = GENHD_FL_EXT_DEVT;
sprintf(disk->disk_name, "ram%d", i); sprintf(disk->disk_name, "ram%d", i);
set_capacity(disk, rd_size * 2); set_capacity(disk, rd_size * 2);
brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
/* Tell the block layer that this is not a rotational device */ /* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);

View File

@ -865,7 +865,7 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
if (!get_ldev(device)) if (!get_ldev(device))
return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
nr_sectors = drbd_get_capacity(device->this_bdev); nr_sectors = get_capacity(device->vdisk);
esector = sector + (size >> 9) - 1; esector = sector + (size >> 9) - 1;
if (!expect(sector < nr_sectors)) if (!expect(sector < nr_sectors))

View File

@ -841,7 +841,6 @@ struct drbd_device {
sector_t p_size; /* partner's disk size */ sector_t p_size; /* partner's disk size */
struct request_queue *rq_queue; struct request_queue *rq_queue;
struct block_device *this_bdev;
struct gendisk *vdisk; struct gendisk *vdisk;
unsigned long last_reattach_jif; unsigned long last_reattach_jif;

View File

@ -984,7 +984,10 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu
p->d_size = cpu_to_be64(d_size); p->d_size = cpu_to_be64(d_size);
p->u_size = cpu_to_be64(u_size); p->u_size = cpu_to_be64(u_size);
p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(device->this_bdev)); if (trigger_reply)
p->c_size = 0;
else
p->c_size = cpu_to_be64(get_capacity(device->vdisk));
p->max_bio_size = cpu_to_be32(max_bio_size); p->max_bio_size = cpu_to_be32(max_bio_size);
p->queue_order_type = cpu_to_be16(q_order_type); p->queue_order_type = cpu_to_be16(q_order_type);
p->dds_flags = cpu_to_be16(flags); p->dds_flags = cpu_to_be16(flags);
@ -2029,17 +2032,13 @@ void drbd_init_set_defaults(struct drbd_device *device)
device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
} }
static void _drbd_set_my_capacity(struct drbd_device *device, sector_t size)
{
/* set_capacity(device->this_bdev->bd_disk, size); */
set_capacity(device->vdisk, size);
device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
void drbd_set_my_capacity(struct drbd_device *device, sector_t size) void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
{ {
char ppb[10]; char ppb[10];
_drbd_set_my_capacity(device, size);
set_capacity(device->vdisk, size);
revalidate_disk_size(device->vdisk, false);
drbd_info(device, "size = %s (%llu KB)\n", drbd_info(device, "size = %s (%llu KB)\n",
ppsize(ppb, size>>1), (unsigned long long)size>>1); ppsize(ppb, size>>1), (unsigned long long)size>>1);
} }
@ -2069,7 +2068,8 @@ void drbd_device_cleanup(struct drbd_device *device)
} }
D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
_drbd_set_my_capacity(device, 0); set_capacity(device->vdisk, 0);
revalidate_disk_size(device->vdisk, false);
if (device->bitmap) { if (device->bitmap) {
/* maybe never allocated. */ /* maybe never allocated. */
drbd_bm_resize(device, 0, 1); drbd_bm_resize(device, 0, 1);
@ -2236,9 +2236,6 @@ void drbd_destroy_device(struct kref *kref)
/* cleanup stuff that may have been allocated during /* cleanup stuff that may have been allocated during
* device (re-)configuration or state changes */ * device (re-)configuration or state changes */
if (device->this_bdev)
bdput(device->this_bdev);
drbd_backing_dev_free(device, device->ldev); drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL; device->ldev = NULL;
@ -2765,10 +2762,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
sprintf(disk->disk_name, "drbd%d", minor); sprintf(disk->disk_name, "drbd%d", minor);
disk->private_data = device; disk->private_data = device;
device->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
/* we have no partitions. we contain only ourselves. */
device->this_bdev->bd_contains = device->this_bdev;
blk_queue_write_cache(q, true, true); blk_queue_write_cache(q, true, true);
/* Setting the max_hw_sectors to an odd value of 8kibyte here /* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */ This triggers a max_bio_size message upon first attach or connect */
@ -3044,7 +3037,7 @@ void drbd_md_write(struct drbd_device *device, void *b)
memset(buffer, 0, sizeof(*buffer)); memset(buffer, 0, sizeof(*buffer));
buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(device->this_bdev)); buffer->la_size_sect = cpu_to_be64(get_capacity(device->vdisk));
for (i = UI_CURRENT; i < UI_SIZE; i++) for (i = UI_CURRENT; i < UI_SIZE; i++)
buffer->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]); buffer->uuid[i] = cpu_to_be64(device->ldev->md.uuid[i]);
buffer->flags = cpu_to_be32(device->ldev->md.flags); buffer->flags = cpu_to_be32(device->ldev->md.flags);
@ -3102,7 +3095,7 @@ void drbd_md_sync(struct drbd_device *device)
/* Update device->ldev->md.la_size_sect, /* Update device->ldev->md.la_size_sect,
* since we updated it on metadata. */ * since we updated it on metadata. */
device->ldev->md.la_size_sect = drbd_get_capacity(device->this_bdev); device->ldev->md.la_size_sect = get_capacity(device->vdisk);
drbd_md_put_buffer(device); drbd_md_put_buffer(device);
out: out:

View File

@ -996,7 +996,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
goto err_out; goto err_out;
} }
if (drbd_get_capacity(device->this_bdev) != size || if (get_capacity(device->vdisk) != size ||
drbd_bm_capacity(device) != size) { drbd_bm_capacity(device) != size) {
int err; int err;
err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC)); err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
@ -1362,15 +1362,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
if (b) { if (b) {
blk_stack_limits(&q->limits, &b->limits, 0); blk_stack_limits(&q->limits, &b->limits, 0);
blk_queue_update_readahead(q);
if (q->backing_dev_info->ra_pages !=
b->backing_dev_info->ra_pages) {
drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
q->backing_dev_info->ra_pages,
b->backing_dev_info->ra_pages);
q->backing_dev_info->ra_pages =
b->backing_dev_info->ra_pages;
}
} }
fixup_discard_if_not_supported(q); fixup_discard_if_not_supported(q);
fixup_write_zeroes(device, q); fixup_write_zeroes(device, q);
@ -1941,8 +1933,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
/* Make sure the new disk is big enough /* Make sure the new disk is big enough
* (we may currently be R_PRIMARY with no local disk...) */ * (we may currently be R_PRIMARY with no local disk...) */
if (drbd_get_max_capacity(nbc) < if (drbd_get_max_capacity(nbc) < get_capacity(device->vdisk)) {
drbd_get_capacity(device->this_bdev)) {
retcode = ERR_DISK_TOO_SMALL; retcode = ERR_DISK_TOO_SMALL;
goto fail; goto fail;
} }
@ -3370,7 +3361,6 @@ static void device_to_statistics(struct device_statistics *s,
if (get_ldev(device)) { if (get_ldev(device)) {
struct drbd_md *md = &device->ldev->md; struct drbd_md *md = &device->ldev->md;
u64 *history_uuids = (u64 *)s->history_uuids; u64 *history_uuids = (u64 *)s->history_uuids;
struct request_queue *q;
int n; int n;
spin_lock_irq(&md->uuid_lock); spin_lock_irq(&md->uuid_lock);
@ -3384,14 +3374,9 @@ static void device_to_statistics(struct device_statistics *s,
spin_unlock_irq(&md->uuid_lock); spin_unlock_irq(&md->uuid_lock);
s->dev_disk_flags = md->flags; s->dev_disk_flags = md->flags;
q = bdev_get_queue(device->ldev->backing_bdev);
s->dev_lower_blocked =
bdi_congested(q->backing_dev_info,
(1 << WB_async_congested) |
(1 << WB_sync_congested));
put_ldev(device); put_ldev(device);
} }
s->dev_size = drbd_get_capacity(device->this_bdev); s->dev_size = get_capacity(device->vdisk);
s->dev_read = device->read_cnt; s->dev_read = device->read_cnt;
s->dev_write = device->writ_cnt; s->dev_write = device->writ_cnt;
s->dev_al_writes = device->al_writ_cnt; s->dev_al_writes = device->al_writ_cnt;
@ -3831,8 +3816,7 @@ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
nla_put_u32(skb, T_current_state, device->state.i) || nla_put_u32(skb, T_current_state, device->state.i) ||
nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) || nla_put_u64_0pad(skb, T_ed_uuid, device->ed_uuid) ||
nla_put_u64_0pad(skb, T_capacity, nla_put_u64_0pad(skb, T_capacity, get_capacity(device->vdisk)) ||
drbd_get_capacity(device->this_bdev)) ||
nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) || nla_put_u64_0pad(skb, T_send_cnt, device->send_cnt) ||
nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) || nla_put_u64_0pad(skb, T_recv_cnt, device->recv_cnt) ||
nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) || nla_put_u64_0pad(skb, T_read_cnt, device->read_cnt) ||

View File

@ -1860,7 +1860,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
struct packet_info *pi) __must_hold(local) struct packet_info *pi) __must_hold(local)
{ {
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
const sector_t capacity = drbd_get_capacity(device->this_bdev); const sector_t capacity = get_capacity(device->vdisk);
struct drbd_peer_request *peer_req; struct drbd_peer_request *peer_req;
struct page *page; struct page *page;
int digest_size, err; int digest_size, err;
@ -2789,7 +2789,7 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
{ {
struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
unsigned long db, dt, dbdt; unsigned long db, dt, dbdt;
unsigned int c_min_rate; unsigned int c_min_rate;
int curr_events; int curr_events;
@ -2849,7 +2849,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
if (!peer_device) if (!peer_device)
return -EIO; return -EIO;
device = peer_device->device; device = peer_device->device;
capacity = drbd_get_capacity(device->this_bdev); capacity = get_capacity(device->vdisk);
sector = be64_to_cpu(p->sector); sector = be64_to_cpu(p->sector);
size = be32_to_cpu(p->blksize); size = be32_to_cpu(p->blksize);
@ -4117,7 +4117,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
if (!peer_device) if (!peer_device)
return config_unknown_volume(connection, pi); return config_unknown_volume(connection, pi);
device = peer_device->device; device = peer_device->device;
cur_size = drbd_get_capacity(device->this_bdev); cur_size = get_capacity(device->vdisk);
p_size = be64_to_cpu(p->d_size); p_size = be64_to_cpu(p->d_size);
p_usize = be64_to_cpu(p->u_size); p_usize = be64_to_cpu(p->u_size);
@ -4252,8 +4252,8 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
} }
if (device->state.conn > C_WF_REPORT_PARAMS) { if (device->state.conn > C_WF_REPORT_PARAMS) {
if (be64_to_cpu(p->c_size) != if (be64_to_cpu(p->c_size) != get_capacity(device->vdisk) ||
drbd_get_capacity(device->this_bdev) || ldsc) { ldsc) {
/* we have different sizes, probably peer /* we have different sizes, probably peer
* needs to know my new size... */ * needs to know my new size... */
drbd_send_sizes(peer_device, 0, ddsf); drbd_send_sizes(peer_device, 0, ddsf);

View File

@ -888,7 +888,7 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector,
if (device->state.disk != D_INCONSISTENT) if (device->state.disk != D_INCONSISTENT)
return false; return false;
esector = sector + (size >> 9) - 1; esector = sector + (size >> 9) - 1;
nr_sectors = drbd_get_capacity(device->this_bdev); nr_sectors = get_capacity(device->vdisk);
D_ASSERT(device, sector < nr_sectors); D_ASSERT(device, sector < nr_sectors);
D_ASSERT(device, esector < nr_sectors); D_ASSERT(device, esector < nr_sectors);

View File

@ -591,7 +591,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
unsigned long bit; unsigned long bit;
sector_t sector; sector_t sector;
const sector_t capacity = drbd_get_capacity(device->this_bdev); const sector_t capacity = get_capacity(device->vdisk);
int max_bio_size; int max_bio_size;
int number, rollback_i, size; int number, rollback_i, size;
int align, requeue = 0; int align, requeue = 0;
@ -769,7 +769,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
{ {
int number, i, size; int number, i, size;
sector_t sector; sector_t sector;
const sector_t capacity = drbd_get_capacity(device->this_bdev); const sector_t capacity = get_capacity(device->vdisk);
bool stop_sector_reached = false; bool stop_sector_reached = false;
if (unlikely(cancel)) if (unlikely(cancel))
@ -1672,7 +1672,7 @@ void drbd_resync_after_changed(struct drbd_device *device)
void drbd_rs_controller_reset(struct drbd_device *device) void drbd_rs_controller_reset(struct drbd_device *device)
{ {
struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
struct fifo_buffer *plan; struct fifo_buffer *plan;
atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_in, 0);

View File

@ -561,6 +561,7 @@ static void floppy_release_irq_and_dma(void);
* output_byte is automatically disabled when reset is set. * output_byte is automatically disabled when reset is set.
*/ */
static void reset_fdc(void); static void reset_fdc(void);
static int floppy_revalidate(struct gendisk *disk);
/* /*
* These are global variables, as that's the easiest way to give * These are global variables, as that's the easiest way to give
@ -3275,7 +3276,8 @@ static int invalidate_drive(struct block_device *bdev)
/* invalidate the buffer track to force a reread */ /* invalidate the buffer track to force a reread */
set_bit((long)bdev->bd_disk->private_data, &fake_change); set_bit((long)bdev->bd_disk->private_data, &fake_change);
process_fd_request(); process_fd_request();
check_disk_change(bdev); if (bdev_check_media_change(bdev))
floppy_revalidate(bdev->bd_disk);
return 0; return 0;
} }
@ -4123,7 +4125,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
drive_state[drive].last_checked = 0; drive_state[drive].last_checked = 0;
clear_bit(FD_OPEN_SHOULD_FAIL_BIT, clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
&drive_state[drive].flags); &drive_state[drive].flags);
check_disk_change(bdev); if (bdev_check_media_change(bdev))
floppy_revalidate(bdev->bd_disk);
if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
goto out; goto out;
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
@ -4291,7 +4294,6 @@ static const struct block_device_operations floppy_fops = {
.ioctl = fd_ioctl, .ioctl = fd_ioctl,
.getgeo = fd_getgeo, .getgeo = fd_getgeo,
.check_events = floppy_check_events, .check_events = floppy_check_events,
.revalidate_disk = floppy_revalidate,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = fd_compat_ioctl, .compat_ioctl = fd_compat_ioctl,
#endif #endif

View File

@ -253,7 +253,7 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
{ {
struct block_device *bdev = lo->lo_device; struct block_device *bdev = lo->lo_device;
bd_set_size(bdev, size << SECTOR_SHIFT); bd_set_nr_sectors(bdev, size);
set_capacity_revalidate_and_notify(lo->lo_disk, size, false); set_capacity_revalidate_and_notify(lo->lo_disk, size, false);
} }
@ -1251,7 +1251,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
set_capacity(lo->lo_disk, 0); set_capacity(lo->lo_disk, 0);
loop_sysfs_exit(lo); loop_sysfs_exit(lo);
if (bdev) { if (bdev) {
bd_set_size(bdev, 0); bd_set_nr_sectors(bdev, 0);
/* let user-space know about this change */ /* let user-space know about this change */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
} }

View File

@ -300,6 +300,7 @@ static void nbd_size_update(struct nbd_device *nbd)
{ {
struct nbd_config *config = nbd->config; struct nbd_config *config = nbd->config;
struct block_device *bdev = bdget_disk(nbd->disk, 0); struct block_device *bdev = bdget_disk(nbd->disk, 0);
sector_t nr_sectors = config->bytesize >> 9;
if (config->flags & NBD_FLAG_SEND_TRIM) { if (config->flags & NBD_FLAG_SEND_TRIM) {
nbd->disk->queue->limits.discard_granularity = config->blksize; nbd->disk->queue->limits.discard_granularity = config->blksize;
@ -308,13 +309,13 @@ static void nbd_size_update(struct nbd_device *nbd)
} }
blk_queue_logical_block_size(nbd->disk->queue, config->blksize); blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
blk_queue_physical_block_size(nbd->disk->queue, config->blksize); blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
set_capacity(nbd->disk, config->bytesize >> 9); set_capacity(nbd->disk, nr_sectors);
if (bdev) { if (bdev) {
if (bdev->bd_disk) { if (bdev->bd_disk) {
bd_set_size(bdev, config->bytesize); bd_set_nr_sectors(bdev, nr_sectors);
set_blocksize(bdev, config->blksize); set_blocksize(bdev, config->blksize);
} else } else
bdev->bd_invalidated = 1; set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
bdput(bdev); bdput(bdev);
} }
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
@ -1138,7 +1139,7 @@ static void nbd_bdev_reset(struct block_device *bdev)
{ {
if (bdev->bd_openers > 1) if (bdev->bd_openers > 1)
return; return;
bd_set_size(bdev, 0); bd_set_nr_sectors(bdev, 0);
} }
static void nbd_parse_flags(struct nbd_device *nbd) static void nbd_parse_flags(struct nbd_device *nbd)
@ -1321,7 +1322,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
return ret; return ret;
if (max_part) if (max_part)
bdev->bd_invalidated = 1; set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
mutex_unlock(&nbd->config_lock); mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq, ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0); atomic_read(&config->recv_threads) == 0);
@ -1499,9 +1500,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
refcount_set(&nbd->config_refs, 1); refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs); refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock); mutex_unlock(&nbd->config_lock);
bdev->bd_invalidated = 1; set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
} else if (nbd_disconnected(nbd->config)) { } else if (nbd_disconnected(nbd->config)) {
bdev->bd_invalidated = 1; set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
} }
out: out:
mutex_unlock(&nbd_index_mutex); mutex_unlock(&nbd_index_mutex);

View File

@ -233,7 +233,7 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode)
struct pcd_unit *cd = bdev->bd_disk->private_data; struct pcd_unit *cd = bdev->bd_disk->private_data;
int ret; int ret;
check_disk_change(bdev); bdev_check_media_change(bdev);
mutex_lock(&pcd_mutex); mutex_lock(&pcd_mutex);
ret = cdrom_open(&cd->info, bdev, mode); ret = cdrom_open(&cd->info, bdev, mode);

View File

@ -1082,65 +1082,6 @@ static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *p
} }
} }
/*
* recover a failed write, query for relocation if possible
*
* returns 1 if recovery is possible, or 0 if not
*
*/
static int pkt_start_recovery(struct packet_data *pkt)
{
/*
* FIXME. We need help from the file system to implement
* recovery handling.
*/
return 0;
#if 0
struct request *rq = pkt->rq;
struct pktcdvd_device *pd = rq->rq_disk->private_data;
struct block_device *pkt_bdev;
struct super_block *sb = NULL;
unsigned long old_block, new_block;
sector_t new_sector;
pkt_bdev = bdget(kdev_t_to_nr(pd->pkt_dev));
if (pkt_bdev) {
sb = get_super(pkt_bdev);
bdput(pkt_bdev);
}
if (!sb)
return 0;
if (!sb->s_op->relocate_blocks)
goto out;
old_block = pkt->sector / (CD_FRAMESIZE >> 9);
if (sb->s_op->relocate_blocks(sb, old_block, &new_block))
goto out;
new_sector = new_block * (CD_FRAMESIZE >> 9);
pkt->sector = new_sector;
bio_reset(pkt->bio);
bio_set_dev(pkt->bio, pd->bdev);
bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0);
pkt->bio->bi_iter.bi_sector = new_sector;
pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
pkt->bio->bi_vcnt = pkt->frames;
pkt->bio->bi_end_io = pkt_end_io_packet_write;
pkt->bio->bi_private = pkt;
drop_super(sb);
return 1;
out:
drop_super(sb);
return 0;
#endif
}
static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state) static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
{ {
#if PACKET_DEBUG > 1 #if PACKET_DEBUG > 1
@ -1357,12 +1298,8 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
break; break;
case PACKET_RECOVERY_STATE: case PACKET_RECOVERY_STATE:
if (pkt_start_recovery(pkt)) {
pkt_start_write(pd, pkt);
} else {
pkt_dbg(2, pd, "No recovery possible\n"); pkt_dbg(2, pd, "No recovery possible\n");
pkt_set_state(pkt, PACKET_FINISHED_STATE); pkt_set_state(pkt, PACKET_FINISHED_STATE);
}
break; break;
case PACKET_FINISHED_STATE: case PACKET_FINISHED_STATE:
@ -2173,16 +2110,18 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
int ret; int ret;
long lba; long lba;
struct request_queue *q; struct request_queue *q;
struct block_device *bdev;
/* /*
* We need to re-open the cdrom device without O_NONBLOCK to be able * We need to re-open the cdrom device without O_NONBLOCK to be able
* to read/write from/to it. It is already opened in O_NONBLOCK mode * to read/write from/to it. It is already opened in O_NONBLOCK mode
* so bdget() can't fail. * so open should not fail.
*/ */
bdget(pd->bdev->bd_dev); bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd); if (IS_ERR(bdev)) {
if (ret) ret = PTR_ERR(bdev);
goto out; goto out;
}
ret = pkt_get_last_written(pd, &lba); ret = pkt_get_last_written(pd, &lba);
if (ret) { if (ret) {
@ -2192,7 +2131,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
set_capacity(pd->disk, lba << 2); set_capacity(pd->disk, lba << 2);
set_capacity(pd->bdev->bd_disk, lba << 2); set_capacity(pd->bdev->bd_disk, lba << 2);
bd_set_size(pd->bdev, (loff_t)lba << 11); bd_set_nr_sectors(pd->bdev, lba << 2);
q = bdev_get_queue(pd->bdev); q = bdev_get_queue(pd->bdev);
if (write) { if (write) {
@ -2226,7 +2165,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
return 0; return 0;
out_putdev: out_putdev:
blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
out: out:
return ret; return ret;
} }
@ -2563,7 +2502,6 @@ static int pkt_seq_show(struct seq_file *m, void *p)
static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
{ {
int i; int i;
int ret = 0;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
struct block_device *bdev; struct block_device *bdev;
@ -2586,12 +2524,9 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
} }
} }
bdev = bdget(dev); bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
if (!bdev) if (IS_ERR(bdev))
return -ENOMEM; return PTR_ERR(bdev);
ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
if (ret)
return ret;
if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) { if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
return -EINVAL; return -EINVAL;
@ -2609,7 +2544,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
if (IS_ERR(pd->cdrw.thread)) { if (IS_ERR(pd->cdrw.thread)) {
pkt_err(pd, "can't start kernel thread\n"); pkt_err(pd, "can't start kernel thread\n");
ret = -ENOMEM;
goto out_mem; goto out_mem;
} }
@ -2621,7 +2555,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
/* This is safe: open() is still holding a reference. */ /* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE); module_put(THIS_MODULE);
return ret; return -ENOMEM;
} }
static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)

View File

@ -4921,7 +4921,7 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
dout("setting size to %llu sectors", (unsigned long long)size); dout("setting size to %llu sectors", (unsigned long long)size);
set_capacity(rbd_dev->disk, size); set_capacity(rbd_dev->disk, size);
revalidate_disk(rbd_dev->disk); revalidate_disk_size(rbd_dev->disk, true);
} }
} }
@ -5022,7 +5022,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
} }
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
/* /*
* disk_release() expects a queue ref from add_disk() and will * disk_release() expects a queue ref from add_disk() and will

View File

@ -102,18 +102,12 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
size_t new_nsectors) size_t new_nsectors)
{ {
int err = 0;
rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n",
dev->nsectors, new_nsectors); dev->nsectors, new_nsectors);
dev->nsectors = new_nsectors; dev->nsectors = new_nsectors;
set_capacity(dev->gd, dev->nsectors); set_capacity(dev->gd, dev->nsectors);
err = revalidate_disk(dev->gd); revalidate_disk_size(dev->gd, true);
if (err) return 0;
rnbd_clt_err(dev,
"Failed to change device size from %zu to %zu, err: %d\n",
dev->nsectors, new_nsectors, err);
return err;
} }
static int process_msg_open_rsp(struct rnbd_clt_dev *dev, static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
@ -1180,7 +1174,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
tag_set->queue_depth = sess->queue_depth; tag_set->queue_depth = sess->queue_depth;
tag_set->numa_node = NUMA_NO_NODE; tag_set->numa_node = NUMA_NO_NODE;
tag_set->flags = BLK_MQ_F_SHOULD_MERGE | tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_SHARED; BLK_MQ_F_TAG_QUEUE_SHARED;
tag_set->cmd_size = sizeof(struct rnbd_iu); tag_set->cmd_size = sizeof(struct rnbd_iu);
tag_set->nr_hw_queues = num_online_cpus(); tag_set->nr_hw_queues = num_online_cpus();

View File

@ -638,7 +638,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
return 0; return 0;
if (mode & (FMODE_READ|FMODE_WRITE)) { if (mode & (FMODE_READ|FMODE_WRITE)) {
check_disk_change(bdev); if (bdev_check_media_change(bdev) && fs->disk_in)
fs->ejected = 0;
if ((mode & FMODE_WRITE) && fs->write_protected) { if ((mode & FMODE_WRITE) && fs->write_protected) {
err = -EROFS; err = -EROFS;
goto out; goto out;
@ -735,24 +736,6 @@ static unsigned int floppy_check_events(struct gendisk *disk,
return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0; return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
} }
static int floppy_revalidate(struct gendisk *disk)
{
struct floppy_state *fs = disk->private_data;
struct swim __iomem *base = fs->swd->base;
swim_drive(base, fs->location);
if (fs->ejected)
setup_medium(fs);
if (!fs->disk_in)
swim_motor(base, OFF);
else
fs->ejected = 0;
return !fs->disk_in;
}
static const struct block_device_operations floppy_fops = { static const struct block_device_operations floppy_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = floppy_unlocked_open, .open = floppy_unlocked_open,
@ -760,7 +743,6 @@ static const struct block_device_operations floppy_fops = {
.ioctl = floppy_ioctl, .ioctl = floppy_ioctl,
.getgeo = floppy_getgeo, .getgeo = floppy_getgeo,
.check_events = floppy_check_events, .check_events = floppy_check_events,
.revalidate_disk = floppy_revalidate,
}; };
static struct kobject *floppy_find(dev_t dev, int *part, void *data) static struct kobject *floppy_find(dev_t dev, int *part, void *data)

View File

@ -945,7 +945,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (err == 0 && (mode & FMODE_NDELAY) == 0 if (err == 0 && (mode & FMODE_NDELAY) == 0
&& (mode & (FMODE_READ|FMODE_WRITE))) { && (mode & (FMODE_READ|FMODE_WRITE))) {
check_disk_change(bdev); if (bdev_check_media_change(bdev))
floppy_revalidate(bdev->bd_disk);
if (fs->ejected) if (fs->ejected)
err = -ENXIO; err = -ENXIO;
} }
@ -1055,7 +1056,6 @@ static const struct block_device_operations floppy_fops = {
.release = floppy_release, .release = floppy_release,
.ioctl = floppy_ioctl, .ioctl = floppy_ioctl,
.check_events = floppy_check_events, .check_events = floppy_check_events,
.revalidate_disk= floppy_revalidate,
}; };
static const struct blk_mq_ops swim3_mq_ops = { static const struct blk_mq_ops swim3_mq_ops = {

View File

@ -598,7 +598,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev)
struct virtio_blk *vblk = vdev->priv; struct virtio_blk *vblk = vdev->priv;
blk_queue_write_cache(vblk->disk->queue, writeback, false); blk_queue_write_cache(vblk->disk->queue, writeback, false);
revalidate_disk(vblk->disk); revalidate_disk_size(vblk->disk, true);
} }
static const char *const virtblk_cache_types[] = { static const char *const virtblk_cache_types[] = {
@ -646,7 +646,7 @@ static struct attribute *virtblk_attrs[] = {
static umode_t virtblk_attrs_are_visible(struct kobject *kobj, static umode_t virtblk_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n) struct attribute *a, int n)
{ {
struct device *dev = container_of(kobj, struct device, kobj); struct device *dev = kobj_to_dev(kobj);
struct gendisk *disk = dev_to_disk(dev); struct gendisk *disk = dev_to_disk(dev);
struct virtio_blk *vblk = disk->private_data; struct virtio_blk *vblk = disk->private_data;
struct virtio_device *vdev = vblk->vdev; struct virtio_device *vdev = vblk->vdev;

View File

@ -888,14 +888,10 @@ static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing)
return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0; return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0;
} }
static int ace_revalidate_disk(struct gendisk *gd) static void ace_media_changed(struct ace_device *ace)
{ {
struct ace_device *ace = gd->private_data;
unsigned long flags; unsigned long flags;
dev_dbg(ace->dev, "ace_revalidate_disk()\n");
if (ace->media_change) {
dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n"); dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n");
spin_lock_irqsave(&ace->lock, flags); spin_lock_irqsave(&ace->lock, flags);
@ -904,10 +900,8 @@ static int ace_revalidate_disk(struct gendisk *gd)
tasklet_schedule(&ace->fsm_tasklet); tasklet_schedule(&ace->fsm_tasklet);
wait_for_completion(&ace->id_completion); wait_for_completion(&ace->id_completion);
}
dev_dbg(ace->dev, "revalidate complete\n"); dev_dbg(ace->dev, "revalidate complete\n");
return ace->id_result;
} }
static int ace_open(struct block_device *bdev, fmode_t mode) static int ace_open(struct block_device *bdev, fmode_t mode)
@ -922,7 +916,8 @@ static int ace_open(struct block_device *bdev, fmode_t mode)
ace->users++; ace->users++;
spin_unlock_irqrestore(&ace->lock, flags); spin_unlock_irqrestore(&ace->lock, flags);
check_disk_change(bdev); if (bdev_check_media_change(bdev) && ace->media_change)
ace_media_changed(ace);
mutex_unlock(&xsysace_mutex); mutex_unlock(&xsysace_mutex);
return 0; return 0;
@ -966,7 +961,6 @@ static const struct block_device_operations ace_fops = {
.open = ace_open, .open = ace_open,
.release = ace_release, .release = ace_release,
.check_events = ace_check_events, .check_events = ace_check_events,
.revalidate_disk = ace_revalidate_disk,
.getgeo = ace_getgeo, .getgeo = ace_getgeo,
}; };
@ -1080,7 +1074,7 @@ static int ace_setup(struct ace_device *ace)
(unsigned long long) ace->physaddr, ace->baseaddr, ace->irq); (unsigned long long) ace->physaddr, ace->baseaddr, ace->irq);
ace->media_change = 1; ace->media_change = 1;
ace_revalidate_disk(ace->gd); ace_media_changed(ace);
/* Make the sysace device 'live' */ /* Make the sysace device 'live' */
add_disk(ace->gd); add_disk(ace->gd);

View File

@ -52,6 +52,9 @@ static unsigned int num_devices = 1;
*/ */
static size_t huge_class_size; static size_t huge_class_size;
static const struct block_device_operations zram_devops;
static const struct block_device_operations zram_wb_devops;
static void zram_free_page(struct zram *zram, size_t index); static void zram_free_page(struct zram *zram, size_t index);
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio); u32 index, int offset, struct bio *bio);
@ -408,8 +411,7 @@ static void reset_bdev(struct zram *zram)
zram->backing_dev = NULL; zram->backing_dev = NULL;
zram->old_block_size = 0; zram->old_block_size = 0;
zram->bdev = NULL; zram->bdev = NULL;
zram->disk->queue->backing_dev_info->capabilities |= zram->disk->fops = &zram_devops;
BDI_CAP_SYNCHRONOUS_IO;
kvfree(zram->bitmap); kvfree(zram->bitmap);
zram->bitmap = NULL; zram->bitmap = NULL;
} }
@ -491,9 +493,10 @@ static ssize_t backing_dev_store(struct device *dev,
goto out; goto out;
} }
bdev = bdgrab(I_BDEV(inode)); bdev = blkdev_get_by_dev(inode->i_rdev,
err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
if (err < 0) { if (IS_ERR(bdev)) {
err = PTR_ERR(bdev);
bdev = NULL; bdev = NULL;
goto out; goto out;
} }
@ -528,8 +531,7 @@ static ssize_t backing_dev_store(struct device *dev,
* freely but in fact, IO is going on so finally could cause * freely but in fact, IO is going on so finally could cause
* use-after-free when the IO is really done. * use-after-free when the IO is really done.
*/ */
zram->disk->queue->backing_dev_info->capabilities &= zram->disk->fops = &zram_wb_devops;
~BDI_CAP_SYNCHRONOUS_IO;
up_write(&zram->init_lock); up_write(&zram->init_lock);
pr_info("setup backing device %s\n", file_name); pr_info("setup backing device %s\n", file_name);
@ -1739,7 +1741,7 @@ static ssize_t disksize_store(struct device *dev,
zram->disksize = disksize; zram->disksize = disksize;
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
revalidate_disk(zram->disk); revalidate_disk_size(zram->disk, true);
up_write(&zram->init_lock); up_write(&zram->init_lock);
return len; return len;
@ -1786,7 +1788,7 @@ static ssize_t reset_store(struct device *dev,
/* Make sure all the pending I/O are finished */ /* Make sure all the pending I/O are finished */
fsync_bdev(bdev); fsync_bdev(bdev);
zram_reset_device(zram); zram_reset_device(zram);
revalidate_disk(zram->disk); revalidate_disk_size(zram->disk, true);
bdput(bdev); bdput(bdev);
mutex_lock(&bdev->bd_mutex); mutex_lock(&bdev->bd_mutex);
@ -1819,6 +1821,13 @@ static const struct block_device_operations zram_devops = {
.owner = THIS_MODULE .owner = THIS_MODULE
}; };
static const struct block_device_operations zram_wb_devops = {
.open = zram_open,
.submit_bio = zram_submit_bio,
.swap_slot_free_notify = zram_slot_free_notify,
.owner = THIS_MODULE
};
static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate); static DEVICE_ATTR_RO(initstate);
@ -1946,8 +1955,7 @@ static int zram_add(void)
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
zram->disk->queue->backing_dev_info->capabilities |= blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
(BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
device_add_disk(NULL, zram->disk, zram_disk_attr_groups); device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));

View File

@ -479,7 +479,7 @@ static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
{ {
int ret; int ret;
check_disk_change(bdev); bdev_check_media_change(bdev);
mutex_lock(&gdrom_mutex); mutex_lock(&gdrom_mutex);
ret = cdrom_open(gd.cd_info, bdev, mode); ret = cdrom_open(gd.cd_info, bdev, mode);

View File

@ -28,7 +28,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
struct raw_device_data { struct raw_device_data {
struct block_device *binding; dev_t binding;
struct block_device *bdev;
int inuse; int inuse;
}; };
@ -63,19 +64,25 @@ static int raw_open(struct inode *inode, struct file *filp)
return 0; return 0;
} }
pr_warn_ratelimited(
"process %s (pid %d) is using the deprecated raw device\n"
"support will be removed in Linux 5.14.\n",
current->comm, current->pid);
mutex_lock(&raw_mutex); mutex_lock(&raw_mutex);
/* /*
* All we need to do on open is check that the device is bound. * All we need to do on open is check that the device is bound.
*/ */
bdev = raw_devices[minor].binding;
err = -ENODEV; err = -ENODEV;
if (!bdev) if (!raw_devices[minor].binding)
goto out; goto out;
bdgrab(bdev); bdev = blkdev_get_by_dev(raw_devices[minor].binding,
err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open); filp->f_mode | FMODE_EXCL, raw_open);
if (err) if (IS_ERR(bdev)) {
err = PTR_ERR(bdev);
goto out; goto out;
}
err = set_blocksize(bdev, bdev_logical_block_size(bdev)); err = set_blocksize(bdev, bdev_logical_block_size(bdev));
if (err) if (err)
goto out1; goto out1;
@ -85,6 +92,7 @@ static int raw_open(struct inode *inode, struct file *filp)
file_inode(filp)->i_mapping = file_inode(filp)->i_mapping =
bdev->bd_inode->i_mapping; bdev->bd_inode->i_mapping;
filp->private_data = bdev; filp->private_data = bdev;
raw_devices[minor].bdev = bdev;
mutex_unlock(&raw_mutex); mutex_unlock(&raw_mutex);
return 0; return 0;
@ -105,7 +113,7 @@ static int raw_release(struct inode *inode, struct file *filp)
struct block_device *bdev; struct block_device *bdev;
mutex_lock(&raw_mutex); mutex_lock(&raw_mutex);
bdev = raw_devices[minor].binding; bdev = raw_devices[minor].bdev;
if (--raw_devices[minor].inuse == 0) if (--raw_devices[minor].inuse == 0)
/* Here inode->i_mapping == bdev->bd_inode->i_mapping */ /* Here inode->i_mapping == bdev->bd_inode->i_mapping */
inode->i_mapping = &inode->i_data; inode->i_mapping = &inode->i_data;
@ -128,6 +136,7 @@ raw_ioctl(struct file *filp, unsigned int command, unsigned long arg)
static int bind_set(int number, u64 major, u64 minor) static int bind_set(int number, u64 major, u64 minor)
{ {
dev_t dev = MKDEV(major, minor); dev_t dev = MKDEV(major, minor);
dev_t raw = MKDEV(RAW_MAJOR, number);
struct raw_device_data *rawdev; struct raw_device_data *rawdev;
int err = 0; int err = 0;
@ -161,25 +170,17 @@ static int bind_set(int number, u64 major, u64 minor)
mutex_unlock(&raw_mutex); mutex_unlock(&raw_mutex);
return -EBUSY; return -EBUSY;
} }
if (rawdev->binding) { if (rawdev->binding)
bdput(rawdev->binding);
module_put(THIS_MODULE); module_put(THIS_MODULE);
}
rawdev->binding = dev;
if (!dev) { if (!dev) {
/* unbind */ /* unbind */
rawdev->binding = NULL; device_destroy(raw_class, raw);
device_destroy(raw_class, MKDEV(RAW_MAJOR, number));
} else { } else {
rawdev->binding = bdget(dev);
if (rawdev->binding == NULL) {
err = -ENOMEM;
} else {
dev_t raw = MKDEV(RAW_MAJOR, number);
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
device_destroy(raw_class, raw); device_destroy(raw_class, raw);
device_create(raw_class, NULL, raw, NULL, device_create(raw_class, NULL, raw, NULL, "raw%d", number);
"raw%d", number);
}
} }
mutex_unlock(&raw_mutex); mutex_unlock(&raw_mutex);
return err; return err;
@ -187,18 +188,9 @@ static int bind_set(int number, u64 major, u64 minor)
static int bind_get(int number, dev_t *dev) static int bind_get(int number, dev_t *dev)
{ {
struct raw_device_data *rawdev;
struct block_device *bdev;
if (number <= 0 || number >= max_raw_minors) if (number <= 0 || number >= max_raw_minors)
return -EINVAL; return -EINVAL;
*dev = raw_devices[number].binding;
rawdev = &raw_devices[number];
mutex_lock(&raw_mutex);
bdev = rawdev->binding;
*dev = bdev ? bdev->bd_dev : 0;
mutex_unlock(&raw_mutex);
return 0; return 0;
} }

View File

@ -1611,7 +1611,11 @@ static int idecd_open(struct block_device *bdev, fmode_t mode)
struct cdrom_info *info; struct cdrom_info *info;
int rc = -ENXIO; int rc = -ENXIO;
check_disk_change(bdev); if (bdev_check_media_change(bdev)) {
info = ide_drv_g(bdev->bd_disk, cdrom_info);
ide_cd_read_toc(info->drive);
}
mutex_lock(&ide_cd_mutex); mutex_lock(&ide_cd_mutex);
info = ide_cd_get(bdev->bd_disk); info = ide_cd_get(bdev->bd_disk);
@ -1753,15 +1757,6 @@ static unsigned int idecd_check_events(struct gendisk *disk,
return cdrom_check_events(&info->devinfo, clearing); return cdrom_check_events(&info->devinfo, clearing);
} }
static int idecd_revalidate_disk(struct gendisk *disk)
{
struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
ide_cd_read_toc(info->drive);
return 0;
}
static const struct block_device_operations idecd_ops = { static const struct block_device_operations idecd_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = idecd_open, .open = idecd_open,
@ -1770,7 +1765,6 @@ static const struct block_device_operations idecd_ops = {
.compat_ioctl = IS_ENABLED(CONFIG_COMPAT) ? .compat_ioctl = IS_ENABLED(CONFIG_COMPAT) ?
idecd_compat_ioctl : NULL, idecd_compat_ioctl : NULL,
.check_events = idecd_check_events, .check_events = idecd_check_events,
.revalidate_disk = idecd_revalidate_disk
}; };
/* module options */ /* module options */

View File

@ -739,12 +739,9 @@ static void ide_disk_setup(ide_drive_t *drive)
set_wcache(drive, 1); set_wcache(drive, 1);
if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 && if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
(drive->head == 0 || drive->head > 16)) { (drive->head == 0 || drive->head > 16))
printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n", printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
drive->name, drive->head); drive->name, drive->head);
drive->dev_flags &= ~IDE_DFLAG_ATTACH;
} else
drive->dev_flags |= IDE_DFLAG_ATTACH;
} }
static void ide_disk_flush(ide_drive_t *drive) static void ide_disk_flush(ide_drive_t *drive)

View File

@ -516,8 +516,6 @@ static void ide_floppy_setup(ide_drive_t *drive)
(void) ide_floppy_get_capacity(drive); (void) ide_floppy_get_capacity(drive);
ide_proc_register_driver(drive, floppy->driver); ide_proc_register_driver(drive, floppy->driver);
drive->dev_flags |= IDE_DFLAG_ATTACH;
} }
static void ide_floppy_flush(ide_drive_t *drive) static void ide_floppy_flush(ide_drive_t *drive)

View File

@ -225,8 +225,12 @@ static int ide_gd_open(struct block_device *bdev, fmode_t mode)
* and the door_lock is irrelevant at this point. * and the door_lock is irrelevant at this point.
*/ */
drive->disk_ops->set_doorlock(drive, disk, 1); drive->disk_ops->set_doorlock(drive, disk, 1);
drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED; if (__invalidate_device(bdev, true))
check_disk_change(bdev); pr_warn("VFS: busy inodes on changed media %s\n",
bdev->bd_disk->disk_name);
drive->disk_ops->get_capacity(drive);
set_capacity(disk, ide_gd_capacity(drive));
set_bit(GD_NEED_PART_SCAN, &disk->state);
} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) { } else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
ret = -EBUSY; ret = -EBUSY;
goto out_put_idkp; goto out_put_idkp;
@ -284,32 +288,6 @@ static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0; return 0;
} }
static unsigned int ide_gd_check_events(struct gendisk *disk,
unsigned int clearing)
{
struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
ide_drive_t *drive = idkp->drive;
bool ret;
/* do not scan partitions twice if this is a removable device */
if (drive->dev_flags & IDE_DFLAG_ATTACH) {
drive->dev_flags &= ~IDE_DFLAG_ATTACH;
return 0;
}
/*
* The following is used to force revalidation on the first open on
* removeable devices, and never gets reported to userland as
* DISK_EVENT_FLAG_UEVENT isn't set in genhd->event_flags.
* This is intended as removable ide disk can't really detect
* MEDIA_CHANGE events.
*/
ret = drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED;
drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
return ret ? DISK_EVENT_MEDIA_CHANGE : 0;
}
static void ide_gd_unlock_native_capacity(struct gendisk *disk) static void ide_gd_unlock_native_capacity(struct gendisk *disk)
{ {
struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj); struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@ -320,18 +298,6 @@ static void ide_gd_unlock_native_capacity(struct gendisk *disk)
disk_ops->unlock_native_capacity(drive); disk_ops->unlock_native_capacity(drive);
} }
static int ide_gd_revalidate_disk(struct gendisk *disk)
{
struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
ide_drive_t *drive = idkp->drive;
if (ide_gd_check_events(disk, 0))
drive->disk_ops->get_capacity(drive);
set_capacity(disk, ide_gd_capacity(drive));
return 0;
}
static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode, static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg) unsigned int cmd, unsigned long arg)
{ {
@ -364,9 +330,7 @@ static const struct block_device_operations ide_gd_ops = {
.compat_ioctl = ide_gd_compat_ioctl, .compat_ioctl = ide_gd_compat_ioctl,
#endif #endif
.getgeo = ide_gd_getgeo, .getgeo = ide_gd_getgeo,
.check_events = ide_gd_check_events,
.unlock_native_capacity = ide_gd_unlock_native_capacity, .unlock_native_capacity = ide_gd_unlock_native_capacity,
.revalidate_disk = ide_gd_revalidate_disk
}; };
static int ide_gd_probe(ide_drive_t *drive) static int ide_gd_probe(ide_drive_t *drive)

View File

@ -49,7 +49,7 @@ int ide_setting_ioctl(ide_drive_t *drive, struct block_device *bdev,
return err >= 0 ? put_user_long(err, arg) : err; return err >= 0 ? put_user_long(err, arg) : err;
set_val: set_val:
if (bdev != bdev->bd_contains) if (bdev_is_partition(bdev))
err = -EINVAL; err = -EINVAL;
else { else {
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
@ -257,7 +257,7 @@ int generic_ide_ioctl(ide_drive_t *drive, struct block_device *bdev,
switch (cmd) { switch (cmd) {
case HDIO_OBSOLETE_IDENTITY: case HDIO_OBSOLETE_IDENTITY:
case HDIO_GET_IDENTITY: case HDIO_GET_IDENTITY:
if (bdev != bdev->bd_contains) if (bdev_is_partition(bdev))
return -EINVAL; return -EINVAL;
return ide_get_identity_ioctl(drive, cmd, argp); return ide_get_identity_ioctl(drive, cmd, argp);
case HDIO_GET_NICE: case HDIO_GET_NICE:

View File

@ -499,7 +499,7 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
rvt_pr_err(rdi, rvt_pr_err(rdi,
"%s timeout mr %p pd %p lkey %x refcount %ld\n", "%s timeout mr %p pd %p lkey %x refcount %ld\n",
t, mr, mr->pd, mr->lkey, t, mr, mr->pd, mr->lkey,
atomic_long_read(&mr->refcount.count)); atomic_long_read(&mr->refcount.data->count));
rvt_get_mr(mr); rvt_get_mr(mr);
return -EBUSY; return -EBUSY;
} }

View File

@ -475,6 +475,7 @@ struct search {
unsigned int read_dirty_data:1; unsigned int read_dirty_data:1;
unsigned int cache_missed:1; unsigned int cache_missed:1;
struct hd_struct *part;
unsigned long start_time; unsigned long start_time;
struct btree_op op; struct btree_op op;
@ -669,7 +670,7 @@ static void bio_complete(struct search *s)
{ {
if (s->orig_bio) { if (s->orig_bio) {
/* Count on bcache device */ /* Count on bcache device */
disk_end_io_acct(s->d->disk, bio_op(s->orig_bio), s->start_time); part_end_io_acct(s->part, s->orig_bio, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio); trace_bcache_request_end(s->d, s->orig_bio);
s->orig_bio->bi_status = s->iop.status; s->orig_bio->bi_status = s->iop.status;
@ -731,7 +732,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->write = op_is_write(bio_op(bio)); s->write = op_is_write(bio_op(bio));
s->read_dirty_data = 0; s->read_dirty_data = 0;
/* Count on the bcache device */ /* Count on the bcache device */
s->start_time = disk_start_io_acct(d->disk, bio_sectors(bio), bio_op(bio)); s->start_time = part_start_io_acct(d->disk, &s->part, bio);
s->iop.c = d->c; s->iop.c = d->c;
s->iop.bio = NULL; s->iop.bio = NULL;
s->iop.inode = d->id; s->iop.inode = d->id;
@ -1072,6 +1073,7 @@ struct detached_dev_io_private {
unsigned long start_time; unsigned long start_time;
bio_end_io_t *bi_end_io; bio_end_io_t *bi_end_io;
void *bi_private; void *bi_private;
struct hd_struct *part;
}; };
static void detached_dev_end_io(struct bio *bio) static void detached_dev_end_io(struct bio *bio)
@ -1083,7 +1085,7 @@ static void detached_dev_end_io(struct bio *bio)
bio->bi_private = ddip->bi_private; bio->bi_private = ddip->bi_private;
/* Count on the bcache device */ /* Count on the bcache device */
disk_end_io_acct(ddip->d->disk, bio_op(bio), ddip->start_time); part_end_io_acct(ddip->part, bio, ddip->start_time);
if (bio->bi_status) { if (bio->bi_status) {
struct cached_dev *dc = container_of(ddip->d, struct cached_dev *dc = container_of(ddip->d,
@ -1109,7 +1111,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
ddip->d = d; ddip->d = d;
/* Count on the bcache device */ /* Count on the bcache device */
ddip->start_time = disk_start_io_acct(d->disk, bio_sectors(bio), bio_op(bio)); ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio);
ddip->bi_end_io = bio->bi_end_io; ddip->bi_end_io = bio->bi_end_io;
ddip->bi_private = bio->bi_private; ddip->bi_private = bio->bi_private;
bio->bi_end_io = detached_dev_end_io; bio->bi_end_io = detached_dev_end_io;

View File

@ -1427,9 +1427,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
if (ret) if (ret)
return ret; return ret;
dc->disk.disk->queue->backing_dev_info->ra_pages = blk_queue_io_opt(dc->disk.disk->queue,
max(dc->disk.disk->queue->backing_dev_info->ra_pages, max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
q->backing_dev_info->ra_pages);
atomic_set(&dc->io_errors, 0); atomic_set(&dc->io_errors, 0);
dc->io_disable = false; dc->io_disable = false;

View File

@ -228,10 +228,11 @@ static struct target_type linear_target = {
.name = "linear", .name = "linear",
.version = {1, 4, 0}, .version = {1, 4, 0},
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT |
DM_TARGET_ZONED_HM,
.report_zones = linear_report_zones, .report_zones = linear_report_zones,
#else #else
.features = DM_TARGET_PASSES_INTEGRITY, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
#endif #endif
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = linear_ctr, .ctr = linear_ctr,

View File

@ -701,7 +701,7 @@ static void rs_set_capacity(struct raid_set *rs)
struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
set_capacity(gendisk, rs->md.array_sectors); set_capacity(gendisk, rs->md.array_sectors);
revalidate_disk(gendisk); revalidate_disk_size(gendisk, true);
} }
/* /*

View File

@ -907,7 +907,7 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
struct request_queue *q = bdev_get_queue(bdev); struct request_queue *q = bdev_get_queue(bdev);
/* request-based cannot stack on partitions! */ /* request-based cannot stack on partitions! */
if (bdev != bdev->bd_contains) if (bdev_is_partition(bdev))
return false; return false;
return queue_is_mq(q); return queue_is_mq(q);
@ -1752,6 +1752,33 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t)
return true; return true;
} }
static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return q && !blk_queue_nowait(q);
}
static bool dm_table_supports_nowait(struct dm_table *t)
{
struct dm_target *ti;
unsigned i = 0;
while (i < dm_table_get_num_targets(t)) {
ti = dm_table_get_target(t, i++);
if (!dm_target_supports_nowait(ti->type))
return false;
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
return false;
}
return true;
}
static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data) sector_t start, sector_t len, void *data)
{ {
@ -1819,7 +1846,7 @@ static int device_requires_stable_pages(struct dm_target *ti,
{ {
struct request_queue *q = bdev_get_queue(dev->bdev); struct request_queue *q = bdev_get_queue(dev->bdev);
return q && bdi_cap_stable_pages_required(q->backing_dev_info); return q && blk_queue_stable_writes(q);
} }
/* /*
@ -1854,6 +1881,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
*/ */
q->limits = *limits; q->limits = *limits;
if (dm_table_supports_nowait(t))
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
else
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
if (!dm_table_supports_discards(t)) { if (!dm_table_supports_discards(t)) {
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
/* Must also clear discard limits... */ /* Must also clear discard limits... */
@ -1904,9 +1936,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
* because they do their own checksumming. * because they do their own checksumming.
*/ */
if (dm_table_requires_stable_pages(t)) if (dm_table_requires_stable_pages(t))
q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
else else
q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
/* /*
* Determine whether or not this queue's I/O timings contribute * Determine whether or not this queue's I/O timings contribute
@ -1929,8 +1961,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
} }
#endif #endif
/* Allow reads to exceed readahead limits */ blk_queue_update_readahead(q);
q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
} }
unsigned int dm_table_get_num_targets(struct dm_table *t) unsigned int dm_table_get_num_targets(struct dm_table *t)

View File

@ -1327,14 +1327,15 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
sector_t sector, unsigned len) sector_t sector, unsigned len)
{ {
struct bio *clone = &tio->clone; struct bio *clone = &tio->clone;
int r;
__bio_clone_fast(clone, bio); __bio_clone_fast(clone, bio);
bio_crypt_clone(clone, bio, GFP_NOIO); r = bio_crypt_clone(clone, bio, GFP_NOIO);
if (r < 0)
return r;
if (bio_integrity(bio)) { if (bio_integrity(bio)) {
int r;
if (unlikely(!dm_target_has_integrity(tio->ti->type) && if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
!dm_target_passes_integrity(tio->ti->type))) { !dm_target_passes_integrity(tio->ti->type))) {
DMWARN("%s: the target %s doesn't support integrity data.", DMWARN("%s: the target %s doesn't support integrity data.",
@ -1786,7 +1787,9 @@ static blk_qc_t dm_submit_bio(struct bio *bio)
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
dm_put_live_table(md, srcu_idx); dm_put_live_table(md, srcu_idx);
if (!(bio->bi_opf & REQ_RAHEAD)) if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
else if (!(bio->bi_opf & REQ_RAHEAD))
queue_io(md, bio); queue_io(md, bio);
else else
bio_io_error(bio); bio_io_error(bio);
@ -2081,18 +2084,6 @@ static void event_callback(void *context)
dm_issue_global_event(); dm_issue_global_event();
} }
/*
* Protected by md->suspend_lock obtained by dm_swap_table().
*/
static void __set_size(struct mapped_device *md, sector_t size)
{
lockdep_assert_held(&md->suspend_lock);
set_capacity(md->disk, size);
i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
}
/* /*
* Returns old map, which caller must destroy. * Returns old map, which caller must destroy.
*/ */
@ -2115,7 +2106,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
if (size != dm_get_size(md)) if (size != dm_get_size(md))
memset(&md->geometry, 0, sizeof(md->geometry)); memset(&md->geometry, 0, sizeof(md->geometry));
__set_size(md, size); set_capacity(md->disk, size);
bd_set_nr_sectors(md->bdev, size);
dm_table_event_callback(t, event_callback, md); dm_table_event_callback(t, event_callback, md);

View File

@ -582,7 +582,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
break; break;
case CHANGE_CAPACITY: case CHANGE_CAPACITY:
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
break; break;
case RESYNCING: case RESYNCING:
set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
@ -1296,12 +1296,12 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors)
pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n",
__func__, __LINE__); __func__, __LINE__);
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
} else { } else {
/* revert to previous sectors */ /* revert to previous sectors */
ret = mddev->pers->resize(mddev, old_dev_sectors); ret = mddev->pers->resize(mddev, old_dev_sectors);
if (!ret) if (!ret)
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
ret = __sendmsg(cinfo, &cmsg); ret = __sendmsg(cinfo, &cmsg);
if (ret) if (ret)
pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",

View File

@ -202,7 +202,7 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
mddev_resume(mddev); mddev_resume(mddev);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
kfree_rcu(oldconf, rcu); kfree_rcu(oldconf, rcu);
return 0; return 0;
} }

View File

@ -464,6 +464,7 @@ struct md_io {
bio_end_io_t *orig_bi_end_io; bio_end_io_t *orig_bi_end_io;
void *orig_bi_private; void *orig_bi_private;
unsigned long start_time; unsigned long start_time;
struct hd_struct *part;
}; };
static void md_end_io(struct bio *bio) static void md_end_io(struct bio *bio)
@ -471,7 +472,7 @@ static void md_end_io(struct bio *bio)
struct md_io *md_io = bio->bi_private; struct md_io *md_io = bio->bi_private;
struct mddev *mddev = md_io->mddev; struct mddev *mddev = md_io->mddev;
disk_end_io_acct(mddev->gendisk, bio_op(bio), md_io->start_time); part_end_io_acct(md_io->part, bio, md_io->start_time);
bio->bi_end_io = md_io->orig_bi_end_io; bio->bi_end_io = md_io->orig_bi_end_io;
bio->bi_private = md_io->orig_bi_private; bio->bi_private = md_io->orig_bi_private;
@ -517,9 +518,8 @@ static blk_qc_t md_submit_bio(struct bio *bio)
bio->bi_end_io = md_end_io; bio->bi_end_io = md_end_io;
bio->bi_private = md_io; bio->bi_private = md_io;
md_io->start_time = disk_start_io_acct(mddev->gendisk, md_io->start_time = part_start_io_acct(mddev->gendisk,
bio_sectors(bio), &md_io->part, bio);
bio_op(bio));
} }
/* bio could be mergeable after passing to underlayer */ /* bio could be mergeable after passing to underlayer */
@ -2322,8 +2322,7 @@ static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
test_bit(Journal, &rdev2->flags) || test_bit(Journal, &rdev2->flags) ||
rdev2->raid_disk == -1) rdev2->raid_disk == -1)
continue; continue;
if (rdev->bdev->bd_contains == if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
rdev2->bdev->bd_contains) {
rcu_read_unlock(); rcu_read_unlock();
return 1; return 1;
} }
@ -5358,7 +5357,7 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
mddev->array_sectors = sectors; mddev->array_sectors = sectors;
if (mddev->pers) { if (mddev->pers) {
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
} }
} }
mddev_unlock(mddev); mddev_unlock(mddev);
@ -5944,8 +5943,8 @@ int md_run(struct mddev *mddev)
rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev)
rdev_for_each(rdev2, mddev) { rdev_for_each(rdev2, mddev) {
if (rdev < rdev2 && if (rdev < rdev2 &&
rdev->bdev->bd_contains == rdev->bdev->bd_disk ==
rdev2->bdev->bd_contains) { rdev2->bdev->bd_disk) {
pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n", pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
mdname(mddev), mdname(mddev),
bdevname(rdev->bdev,b), bdevname(rdev->bdev,b),
@ -6109,7 +6108,7 @@ int do_md_run(struct mddev *mddev)
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
clear_bit(MD_NOT_READY, &mddev->flags); clear_bit(MD_NOT_READY, &mddev->flags);
mddev->changed = 1; mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@ -6427,7 +6426,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
set_capacity(disk, 0); set_capacity(disk, 0);
mutex_unlock(&mddev->open_mutex); mutex_unlock(&mddev->open_mutex);
mddev->changed = 1; mddev->changed = 1;
revalidate_disk(disk); revalidate_disk_size(disk, true);
if (mddev->ro) if (mddev->ro)
mddev->ro = 0; mddev->ro = 0;
@ -7259,7 +7258,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
md_cluster_ops->update_size(mddev, old_dev_sectors); md_cluster_ops->update_size(mddev, old_dev_sectors);
else if (mddev->queue) { else if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
} }
} }
return rv; return rv;
@ -7848,7 +7847,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
atomic_inc(&mddev->openers); atomic_inc(&mddev->openers);
mutex_unlock(&mddev->open_mutex); mutex_unlock(&mddev->open_mutex);
check_disk_change(bdev); bdev_check_media_change(bdev);
out: out:
if (err) if (err)
mddev_put(mddev); mddev_put(mddev);
@ -8445,7 +8444,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
idle = 1; idle = 1;
rcu_read_lock(); rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) { rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; struct gendisk *disk = rdev->bdev->bd_disk;
curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
atomic_read(&disk->sync_io); atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats /* sync IO will cause sync_io to increase before the disk_stats
@ -9018,7 +9017,7 @@ void md_do_sync(struct md_thread *thread)
mddev_unlock(mddev); mddev_unlock(mddev);
if (!mddev_is_clustered(mddev)) { if (!mddev_is_clustered(mddev)) {
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk_size(mddev->gendisk, true);
} }
} }

View File

@ -397,7 +397,7 @@ struct mddev {
* These locks are separate due to conflicting interactions * These locks are separate due to conflicting interactions
* with bdev->bd_mutex. * with bdev->bd_mutex.
* Lock ordering is: * Lock ordering is:
* reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk * reconfig_mutex -> bd_mutex
* bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
*/ */
struct mutex open_mutex; struct mutex open_mutex;
@ -551,7 +551,7 @@ extern void mddev_unlock(struct mddev *mddev);
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors) static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{ {
atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
} }
static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)

View File

@ -410,22 +410,6 @@ static int raid0_run(struct mddev *mddev)
mdname(mddev), mdname(mddev),
(unsigned long long)mddev->array_sectors); (unsigned long long)mddev->array_sectors);
if (mddev->queue) {
/* calculate the max read-ahead size.
* For read-ahead of large files to be effective, we need to
* readahead at least twice a whole stripe. i.e. number of devices
* multiplied by chunk size times 2.
* If an individual device has an ra_pages greater than the
* chunk size, then we will not drive that device as hard as it
* wants. We consider this a configuration error: a larger
* chunksize should be used in that case.
*/
int stripe = mddev->raid_disks *
(mddev->chunk_sectors << 9) / PAGE_SIZE;
if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
mddev->queue->backing_dev_info->ra_pages = 2* stripe;
}
dump_zones(mddev); dump_zones(mddev);
ret = md_integrity_register(mddev); ret = md_integrity_register(mddev);

View File

@ -3703,10 +3703,20 @@ static struct r10conf *setup_conf(struct mddev *mddev)
return ERR_PTR(err); return ERR_PTR(err);
} }
static void raid10_set_io_opt(struct r10conf *conf)
{
int raid_disks = conf->geo.raid_disks;
if (!(conf->geo.raid_disks % conf->geo.near_copies))
raid_disks /= conf->geo.near_copies;
blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) *
raid_disks);
}
static int raid10_run(struct mddev *mddev) static int raid10_run(struct mddev *mddev)
{ {
struct r10conf *conf; struct r10conf *conf;
int i, disk_idx, chunk_size; int i, disk_idx;
struct raid10_info *disk; struct raid10_info *disk;
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t size; sector_t size;
@ -3742,18 +3752,13 @@ static int raid10_run(struct mddev *mddev)
mddev->thread = conf->thread; mddev->thread = conf->thread;
conf->thread = NULL; conf->thread = NULL;
chunk_size = mddev->chunk_sectors << 9;
if (mddev->queue) { if (mddev->queue) {
blk_queue_max_discard_sectors(mddev->queue, blk_queue_max_discard_sectors(mddev->queue,
mddev->chunk_sectors); mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, 0); blk_queue_max_write_same_sectors(mddev->queue, 0);
blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, chunk_size); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
if (conf->geo.raid_disks % conf->geo.near_copies) raid10_set_io_opt(conf);
blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
else
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->geo.raid_disks / conf->geo.near_copies));
} }
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
@ -3868,19 +3873,6 @@ static int raid10_run(struct mddev *mddev)
mddev->resync_max_sectors = size; mddev->resync_max_sectors = size;
set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags); set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
if (mddev->queue) {
int stripe = conf->geo.raid_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
/* Calculate max read-ahead size.
* We need to readahead at least twice a whole stripe....
* maybe...
*/
stripe /= conf->geo.near_copies;
if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}
if (md_integrity_register(mddev)) if (md_integrity_register(mddev))
goto out_free_conf; goto out_free_conf;
@ -4718,16 +4710,8 @@ static void end_reshape(struct r10conf *conf)
conf->reshape_safe = MaxSector; conf->reshape_safe = MaxSector;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
/* read-ahead size must cover two whole stripes, which is if (conf->mddev->queue)
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices raid10_set_io_opt(conf);
*/
if (conf->mddev->queue) {
int stripe = conf->geo.raid_disks *
((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
stripe /= conf->geo.near_copies;
if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}
conf->fullsync = 0; conf->fullsync = 0;
} }

View File

@ -6638,14 +6638,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
if (!conf) if (!conf)
err = -ENODEV; err = -ENODEV;
else if (new != conf->skip_copy) { else if (new != conf->skip_copy) {
struct request_queue *q = mddev->queue;
mddev_suspend(mddev); mddev_suspend(mddev);
conf->skip_copy = new; conf->skip_copy = new;
if (new) if (new)
mddev->queue->backing_dev_info->capabilities |= blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
BDI_CAP_STABLE_WRITES;
else else
mddev->queue->backing_dev_info->capabilities &= blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
~BDI_CAP_STABLE_WRITES;
mddev_resume(mddev); mddev_resume(mddev);
} }
mddev_unlock(mddev); mddev_unlock(mddev);
@ -7232,6 +7232,12 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
return 0; return 0;
} }
static void raid5_set_io_opt(struct r5conf *conf)
{
blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
(conf->raid_disks - conf->max_degraded));
}
static int raid5_run(struct mddev *mddev) static int raid5_run(struct mddev *mddev)
{ {
struct r5conf *conf; struct r5conf *conf;
@ -7516,13 +7522,10 @@ static int raid5_run(struct mddev *mddev)
int data_disks = conf->previous_raid_disks - conf->max_degraded; int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks * int stripe = data_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE); ((mddev->chunk_sectors << 9) / PAGE_SIZE);
if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
chunk_size = mddev->chunk_sectors << 9; chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size); blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size * raid5_set_io_opt(conf);
(conf->raid_disks - conf->max_degraded));
mddev->queue->limits.raid_partial_stripes_expensive = 1; mddev->queue->limits.raid_partial_stripes_expensive = 1;
/* /*
* We can only discard a whole stripe. It doesn't make sense to * We can only discard a whole stripe. It doesn't make sense to
@ -8106,16 +8109,8 @@ static void end_reshape(struct r5conf *conf)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap); wake_up(&conf->wait_for_overlap);
/* read-ahead size must cover two whole stripes, which is if (conf->mddev->queue)
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices raid5_set_io_opt(conf);
*/
if (conf->mddev->queue) {
int data_disks = conf->raid_disks - conf->max_degraded;
int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);
if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
}
} }
} }

View File

@ -723,7 +723,7 @@ static int mmc_blk_check_blkdev(struct block_device *bdev)
* whole block device, not on a partition. This prevents overspray * whole block device, not on a partition. This prevents overspray
* between sibling partitions. * between sibling partitions.
*/ */
if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) if (!capable(CAP_SYS_RAWIO) || bdev_is_partition(bdev))
return -EPERM; return -EPERM;
return 0; return 0;
} }

View File

@ -472,8 +472,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
} }
if (mmc_host_is_spi(host) && host->use_spi_crc) if (mmc_host_is_spi(host) && host->use_spi_crc)
mq->queue->backing_dev_info->capabilities |= blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
BDI_CAP_STABLE_WRITES;
mq->queue->queuedata = mq; mq->queue->queuedata = mq;
blk_queue_rq_timeout(mq->queue, 60 * HZ); blk_queue_rq_timeout(mq->queue, 60 * HZ);

View File

@ -2196,6 +2196,8 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name)
bdi = bdi_alloc(NUMA_NO_NODE); bdi = bdi_alloc(NUMA_NO_NODE);
if (!bdi) if (!bdi)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
bdi->ra_pages = 0;
bdi->io_pages = 0;
/* /*
* We put '-0' suffix to the name to get the same name format as we * We put '-0' suffix to the name to get the same name format as we

View File

@ -226,7 +226,6 @@ static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
static const struct block_device_operations nd_blk_fops = { static const struct block_device_operations nd_blk_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.submit_bio = nd_blk_submit_bio, .submit_bio = nd_blk_submit_bio,
.revalidate_disk = nvdimm_revalidate_disk,
}; };
static void nd_blk_release_queue(void *q) static void nd_blk_release_queue(void *q)
@ -284,7 +283,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
set_capacity(disk, available_disk_size >> SECTOR_SHIFT); set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
device_add_disk(dev, disk, NULL); device_add_disk(dev, disk, NULL);
revalidate_disk(disk); nvdimm_check_and_set_ro(disk);
return 0; return 0;
} }

View File

@ -1513,7 +1513,6 @@ static const struct block_device_operations btt_fops = {
.submit_bio = btt_submit_bio, .submit_bio = btt_submit_bio,
.rw_page = btt_rw_page, .rw_page = btt_rw_page,
.getgeo = btt_getgeo, .getgeo = btt_getgeo,
.revalidate_disk = nvdimm_revalidate_disk,
}; };
static int btt_blk_init(struct btt *btt) static int btt_blk_init(struct btt *btt)
@ -1538,8 +1537,6 @@ static int btt_blk_init(struct btt *btt)
btt->btt_disk->private_data = btt; btt->btt_disk->private_data = btt;
btt->btt_disk->queue = btt->btt_queue; btt->btt_disk->queue = btt->btt_queue;
btt->btt_disk->flags = GENHD_FL_EXT_DEVT; btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
btt->btt_disk->queue->backing_dev_info->capabilities |=
BDI_CAP_SYNCHRONOUS_IO;
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
@ -1558,7 +1555,7 @@ static int btt_blk_init(struct btt *btt)
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
btt->nd_btt->size = btt->nlba * (u64)btt->sector_size; btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
revalidate_disk(btt->btt_disk); nvdimm_check_and_set_ro(btt->btt_disk);
return 0; return 0;
} }

View File

@ -628,7 +628,7 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
} }
EXPORT_SYMBOL(__nd_driver_register); EXPORT_SYMBOL(__nd_driver_register);
int nvdimm_revalidate_disk(struct gendisk *disk) void nvdimm_check_and_set_ro(struct gendisk *disk)
{ {
struct device *dev = disk_to_dev(disk)->parent; struct device *dev = disk_to_dev(disk)->parent;
struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_region *nd_region = to_nd_region(dev->parent);
@ -639,16 +639,13 @@ int nvdimm_revalidate_disk(struct gendisk *disk)
* read-only if the disk is already read-only. * read-only if the disk is already read-only.
*/ */
if (disk_ro || nd_region->ro == disk_ro) if (disk_ro || nd_region->ro == disk_ro)
return 0; return;
dev_info(dev, "%s read-only, marking %s read-only\n", dev_info(dev, "%s read-only, marking %s read-only\n",
dev_name(&nd_region->dev), disk->disk_name); dev_name(&nd_region->dev), disk->disk_name);
set_disk_ro(disk, 1); set_disk_ro(disk, 1);
return 0;
} }
EXPORT_SYMBOL(nvdimm_revalidate_disk); EXPORT_SYMBOL(nvdimm_check_and_set_ro);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)

View File

@ -361,7 +361,7 @@ u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_lock(struct device *dev);
void nvdimm_bus_unlock(struct device *dev); void nvdimm_bus_unlock(struct device *dev);
bool is_nvdimm_bus_locked(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev);
int nvdimm_revalidate_disk(struct gendisk *disk); void nvdimm_check_and_set_ro(struct gendisk *disk);
void nvdimm_drvdata_release(struct kref *kref); void nvdimm_drvdata_release(struct kref *kref);
void put_ndd(struct nvdimm_drvdata *ndd); void put_ndd(struct nvdimm_drvdata *ndd);
int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);

View File

@ -281,7 +281,6 @@ static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.submit_bio = pmem_submit_bio, .submit_bio = pmem_submit_bio,
.rw_page = pmem_rw_page, .rw_page = pmem_rw_page,
.revalidate_disk = nvdimm_revalidate_disk,
}; };
static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
@ -476,7 +475,6 @@ static int pmem_attach_disk(struct device *dev,
disk->queue = q; disk->queue = q;
disk->flags = GENHD_FL_EXT_DEVT; disk->flags = GENHD_FL_EXT_DEVT;
disk->private_data = pmem; disk->private_data = pmem;
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
nvdimm_namespace_disk_name(ndns, disk->disk_name); nvdimm_namespace_disk_name(ndns, disk->disk_name);
set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
/ 512); / 512);
@ -501,7 +499,7 @@ static int pmem_attach_disk(struct device *dev,
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM; return -ENOMEM;
revalidate_disk(disk); nvdimm_check_and_set_ro(disk);
pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
"badblocks"); "badblocks");

View File

@ -94,21 +94,34 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid); unsigned nsid);
static void nvme_update_bdev_size(struct gendisk *disk)
{
struct block_device *bdev = bdget_disk(disk, 0);
if (bdev) {
bd_set_nr_sectors(bdev, get_capacity(disk));
bdput(bdev);
}
}
/*
* Prepare a queue for teardown.
*
* This must forcibly unquiesce queues to avoid blocking dispatch, and only set
* the capacity to 0 after that to avoid blocking dispatchers that may be
* holding bd_butex. This will end buffered writers dirtying pages that can't
* be synced.
*/
static void nvme_set_queue_dying(struct nvme_ns *ns) static void nvme_set_queue_dying(struct nvme_ns *ns)
{ {
/*
* Revalidating a dead namespace sets capacity to 0. This will end
* buffered writers dirtying pages that can't be synced.
*/
if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return; return;
blk_set_queue_dying(ns->queue); blk_set_queue_dying(ns->queue);
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ns->queue); blk_mq_unquiesce_queue(ns->queue);
/*
* Revalidate after unblocking dispatchers that may be holding bd_butex set_capacity(ns->disk, 0);
*/ nvme_update_bdev_size(ns->disk);
revalidate_disk(ns->disk);
} }
static void nvme_queue_scan(struct nvme_ctrl *ctrl) static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@ -2134,7 +2147,8 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
nvme_update_disk_info(ns->head->disk, ns, id); nvme_update_disk_info(ns->head->disk, ns, id);
blk_stack_limits(&ns->head->disk->queue->limits, blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0); &ns->queue->limits, 0);
nvme_mpath_update_disk_size(ns->head->disk); blk_queue_update_readahead(ns->head->disk->queue);
nvme_update_bdev_size(ns->head->disk);
} }
#endif #endif
return 0; return 0;
@ -2339,7 +2353,6 @@ static const struct block_device_operations nvme_fops = {
.open = nvme_open, .open = nvme_open,
.release = nvme_release, .release = nvme_release,
.getgeo = nvme_getgeo, .getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk,
.report_zones = nvme_report_zones, .report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops, .pr_ops = &nvme_pr_ops,
}; };
@ -3929,8 +3942,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
goto out_free_ns; goto out_free_ns;
if (ctrl->opts && ctrl->opts->data_digest) if (ctrl->opts && ctrl->opts->data_digest)
ns->queue->backing_dev_info->capabilities blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue);
|= BDI_CAP_STABLE_WRITES;
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
@ -4056,14 +4068,19 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int ret;
ns = nvme_find_get_ns(ctrl, nsid); ns = nvme_find_get_ns(ctrl, nsid);
if (ns) { if (!ns) {
if (revalidate_disk(ns->disk)) nvme_alloc_ns(ctrl, nsid);
return;
}
ret = nvme_revalidate_disk(ns->disk);
revalidate_disk_size(ns->disk, ret == 0);
if (ret)
nvme_ns_remove(ns); nvme_ns_remove(ns);
nvme_put_ns(ns); nvme_put_ns(ns);
} else
nvme_alloc_ns(ctrl, nsid);
} }
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,

View File

@ -673,13 +673,9 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
nvme_mpath_set_live(ns); nvme_mpath_set_live(ns);
} }
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
struct gendisk *disk = ns->head->disk; blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
ns->head->disk->queue);
if (disk)
disk->queue->backing_dev_info->capabilities |=
BDI_CAP_STABLE_WRITES;
}
} }
void nvme_mpath_remove_disk(struct nvme_ns_head *head) void nvme_mpath_remove_disk(struct nvme_ns_head *head)

View File

@ -682,16 +682,6 @@ static inline void nvme_trace_bio_complete(struct request *req,
trace_block_bio_complete(ns->head->disk->queue, req->bio); trace_block_bio_complete(ns->head->disk->queue, req->bio);
} }
static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
{
struct block_device *bdev = bdget_disk(disk, 0);
if (bdev) {
bd_set_size(bdev, get_capacity(disk) << SECTOR_SHIFT);
bdput(bdev);
}
}
extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state; extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy; extern struct device_attribute subsys_attr_iopolicy;
@ -766,9 +756,6 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{ {
} }
static inline void nvme_mpath_update_disk_size(struct gendisk *disk)
{
}
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED

View File

@ -101,18 +101,11 @@ int dasd_scan_partitions(struct dasd_block *block)
struct block_device *bdev; struct block_device *bdev;
int rc; int rc;
bdev = bdget_disk(block->gdp, 0); bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL);
if (!bdev) { if (IS_ERR(bdev)) {
DBF_DEV_EVENT(DBF_ERR, block->base, "%s",
"scan partitions error, bdget returned NULL");
return -ENODEV;
}
rc = blkdev_get(bdev, FMODE_READ, NULL);
if (rc < 0) {
DBF_DEV_EVENT(DBF_ERR, block->base, DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, blkdev_get returned %d", "scan partitions error, blkdev_get returned %ld",
rc); PTR_ERR(bdev));
return -ENODEV; return -ENODEV;
} }

View File

@ -55,10 +55,7 @@ dasd_ioctl_enable(struct block_device *bdev)
dasd_enable_device(base); dasd_enable_device(base);
/* Formatting the dasd device can change the capacity. */ /* Formatting the dasd device can change the capacity. */
mutex_lock(&bdev->bd_mutex); bd_set_nr_sectors(bdev, get_capacity(base->block->gdp));
i_size_write(bdev->bd_inode,
(loff_t)get_capacity(base->block->gdp) << 9);
mutex_unlock(&bdev->bd_mutex);
dasd_put_device(base); dasd_put_device(base);
return 0; return 0;
} }
@ -91,9 +88,7 @@ dasd_ioctl_disable(struct block_device *bdev)
* Set i_size to zero, since read, write, etc. check against this * Set i_size to zero, since read, write, etc. check against this
* value. * value.
*/ */
mutex_lock(&bdev->bd_mutex); bd_set_nr_sectors(bdev, 0);
i_size_write(bdev->bd_inode, 0);
mutex_unlock(&bdev->bd_mutex);
dasd_put_device(base); dasd_put_device(base);
return 0; return 0;
} }
@ -282,7 +277,7 @@ dasd_ioctl_format(struct block_device *bdev, void __user *argp)
dasd_put_device(base); dasd_put_device(base);
return -EFAULT; return -EFAULT;
} }
if (bdev != bdev->bd_contains) { if (bdev_is_partition(bdev)) {
pr_warn("%s: The specified DASD is a partition and cannot be formatted\n", pr_warn("%s: The specified DASD is a partition and cannot be formatted\n",
dev_name(&base->cdev->dev)); dev_name(&base->cdev->dev));
dasd_put_device(base); dasd_put_device(base);
@ -309,7 +304,7 @@ static int dasd_ioctl_check_format(struct block_device *bdev, void __user *argp)
base = dasd_device_from_gendisk(bdev->bd_disk); base = dasd_device_from_gendisk(bdev->bd_disk);
if (!base) if (!base)
return -ENODEV; return -ENODEV;
if (bdev != bdev->bd_contains) { if (bdev_is_partition(bdev)) {
pr_warn("%s: The specified DASD is a partition and cannot be checked\n", pr_warn("%s: The specified DASD is a partition and cannot be checked\n",
dev_name(&base->cdev->dev)); dev_name(&base->cdev->dev));
rc = -EINVAL; rc = -EINVAL;
@ -367,7 +362,7 @@ static int dasd_ioctl_release_space(struct block_device *bdev, void __user *argp
rc = -EROFS; rc = -EROFS;
goto out_err; goto out_err;
} }
if (bdev != bdev->bd_contains) { if (bdev_is_partition(bdev)) {
pr_warn("%s: The specified DASD is a partition and tracks cannot be released\n", pr_warn("%s: The specified DASD is a partition and tracks cannot be released\n",
dev_name(&base->cdev->dev)); dev_name(&base->cdev->dev));
rc = -EINVAL; rc = -EINVAL;
@ -545,7 +540,7 @@ dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
if (bdev != bdev->bd_contains) if (bdev_is_partition(bdev))
// ro setting is not allowed for partitions // ro setting is not allowed for partitions
return -EINVAL; return -EINVAL;
if (get_user(intval, (int __user *)argp)) if (get_user(intval, (int __user *)argp))

View File

@ -970,8 +970,8 @@ static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
struct iscsi_conn *conn = session->leadconn; struct iscsi_conn *conn = session->leadconn;
if (conn->datadgst_en) if (conn->datadgst_en)
sdev->request_queue->backing_dev_info->capabilities blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
|= BDI_CAP_STABLE_WRITES; sdev->request_queue);
blk_queue_dma_alignment(sdev->request_queue, 0); blk_queue_dma_alignment(sdev->request_queue, 0);
return 0; return 0;
} }

View File

@ -217,7 +217,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
sd_print_sense_hdr(sdkp, &sshdr); sd_print_sense_hdr(sdkp, &sshdr);
return -EINVAL; return -EINVAL;
} }
revalidate_disk(sdkp->disk); sd_revalidate_disk(sdkp->disk);
return count; return count;
} }
@ -1381,8 +1381,10 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
if (!scsi_block_when_processing_errors(sdev)) if (!scsi_block_when_processing_errors(sdev))
goto error_out; goto error_out;
if (sdev->removable || sdkp->write_prot) if (sdev->removable || sdkp->write_prot) {
check_disk_change(bdev); if (bdev_check_media_change(bdev))
sd_revalidate_disk(bdev->bd_disk);
}
/* /*
* If the drive is empty, just let the open fail. * If the drive is empty, just let the open fail.
@ -1706,8 +1708,10 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
static void sd_rescan(struct device *dev) static void sd_rescan(struct device *dev)
{ {
struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret;
revalidate_disk(sdkp->disk); ret = sd_revalidate_disk(sdkp->disk);
revalidate_disk_size(sdkp->disk, ret == 0);
} }
static int sd_ioctl(struct block_device *bdev, fmode_t mode, static int sd_ioctl(struct block_device *bdev, fmode_t mode,
@ -1841,7 +1845,6 @@ static const struct block_device_operations sd_fops = {
.compat_ioctl = sd_compat_ioctl, .compat_ioctl = sd_compat_ioctl,
#endif #endif
.check_events = sd_check_events, .check_events = sd_check_events,
.revalidate_disk = sd_revalidate_disk,
.unlock_native_capacity = sd_unlock_native_capacity, .unlock_native_capacity = sd_unlock_native_capacity,
.report_zones = sd_zbc_report_zones, .report_zones = sd_zbc_report_zones,
.pr_ops = &sd_pr_ops, .pr_ops = &sd_pr_ops,

View File

@ -517,6 +517,17 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt)
return ret; return ret;
} }
static void sr_revalidate_disk(struct scsi_cd *cd)
{
struct scsi_sense_hdr sshdr;
/* if the unit is not ready, nothing more to do */
if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
return;
sr_cd_check(&cd->cdi);
get_sectorsize(cd);
}
static int sr_block_open(struct block_device *bdev, fmode_t mode) static int sr_block_open(struct block_device *bdev, fmode_t mode)
{ {
struct scsi_cd *cd; struct scsi_cd *cd;
@ -529,7 +540,8 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
sdev = cd->device; sdev = cd->device;
scsi_autopm_get_device(sdev); scsi_autopm_get_device(sdev);
check_disk_change(bdev); if (bdev_check_media_change(bdev))
sr_revalidate_disk(cd);
mutex_lock(&cd->lock); mutex_lock(&cd->lock);
ret = cdrom_open(&cd->cdi, bdev, mode); ret = cdrom_open(&cd->cdi, bdev, mode);
@ -658,26 +670,6 @@ static unsigned int sr_block_check_events(struct gendisk *disk,
return ret; return ret;
} }
static int sr_block_revalidate_disk(struct gendisk *disk)
{
struct scsi_sense_hdr sshdr;
struct scsi_cd *cd;
cd = scsi_cd_get(disk);
if (!cd)
return -ENXIO;
/* if the unit is not ready, nothing more to do */
if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
goto out;
sr_cd_check(&cd->cdi);
get_sectorsize(cd);
out:
scsi_cd_put(cd);
return 0;
}
static const struct block_device_operations sr_bdops = static const struct block_device_operations sr_bdops =
{ {
.owner = THIS_MODULE, .owner = THIS_MODULE,
@ -688,7 +680,6 @@ static const struct block_device_operations sr_bdops =
.compat_ioctl = sr_block_compat_ioctl, .compat_ioctl = sr_block_compat_ioctl,
#endif #endif
.check_events = sr_block_check_events, .check_events = sr_block_check_events,
.revalidate_disk = sr_block_revalidate_disk,
}; };
static int sr_open(struct cdrom_device_info *cdi, int purpose) static int sr_open(struct cdrom_device_info *cdi, int purpose)
@ -802,6 +793,7 @@ static int sr_probe(struct device *dev)
dev_set_drvdata(dev, cd); dev_set_drvdata(dev, cd);
disk->flags |= GENHD_FL_REMOVABLE; disk->flags |= GENHD_FL_REMOVABLE;
sr_revalidate_disk(cd);
device_add_disk(&sdev->sdev_gendev, disk, NULL); device_add_disk(&sdev->sdev_gendev, disk, NULL);
sdev_printk(KERN_DEBUG, sdev, sdev_printk(KERN_DEBUG, sdev,

View File

@ -611,9 +611,8 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b)
bl += sprintf(b + bl, " "); bl += sprintf(b + bl, " ");
if (bd) { if (bd) {
bl += sprintf(b + bl, "Major: %d Minor: %d %s\n", bl += sprintf(b + bl, "Major: %d Minor: %d %s\n",
MAJOR(bd->bd_dev), MINOR(bd->bd_dev), (!bd->bd_contains) ? MAJOR(bd->bd_dev), MINOR(bd->bd_dev),
"" : (bd->bd_holder == ib_dev) ? "CLAIMED: IBLOCK");
"CLAIMED: IBLOCK" : "CLAIMED: OS");
} else { } else {
bl += sprintf(b + bl, "Major: 0 Minor: 0\n"); bl += sprintf(b + bl, "Major: 0 Minor: 0\n");
} }

View File

@ -625,7 +625,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
inode = file_inode(vma->vm_file); inode = file_inode(vma->vm_file);
if (!mapping_cap_writeback_dirty(inode->i_mapping)) if (!mapping_can_writeback(inode->i_mapping))
wbc.nr_to_write = 0; wbc.nr_to_write = 0;
might_sleep(); might_sleep();

Some files were not shown because too many files have changed in this diff Show More