diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8f43a366b82c..2dbffb3b5485 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1326,8 +1326,16 @@ struct bm_extent { #endif #endif -#define HT_SHIFT 8 -#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) +/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE, + * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte. + * Since we may live in a mixed-platform cluster, + * we limit us to a platform agnostic constant here for now. + * A followup commit may allow even bigger BIO sizes, + * once we thought that through. */ +#define DRBD_MAX_BIO_SIZE (1 << 20) +#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE +#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE +#endif #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ @@ -2231,20 +2239,20 @@ static inline bool may_inc_ap_bio(struct drbd_conf *mdev) return true; } -static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) +static inline bool inc_ap_bio_cond(struct drbd_conf *mdev) { bool rv = false; spin_lock_irq(&mdev->tconn->req_lock); rv = may_inc_ap_bio(mdev); if (rv) - atomic_add(count, &mdev->ap_bio_cnt); + atomic_inc(&mdev->ap_bio_cnt); spin_unlock_irq(&mdev->tconn->req_lock); return rv; } -static inline void inc_ap_bio(struct drbd_conf *mdev, int count) +static inline void inc_ap_bio(struct drbd_conf *mdev) { /* we wait here * as long as the device is suspended @@ -2254,7 +2262,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ - wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count)); + wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev)); } static inline void dec_ap_bio(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4406d829800f..6e0e3bb33167 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -998,7 +998,6 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s int drbd_make_request(struct request_queue *q, struct bio *bio) { - unsigned int s_enr, e_enr; struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; unsigned long start_time; @@ -1010,93 +1009,30 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(bio->bi_size > 0); D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); - /* to make some things easier, force alignment of requests within the - * granularity of our hash tables */ - s_enr = bio->bi_sector >> HT_SHIFT; - e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; - - if (likely(s_enr == e_enr)) { - inc_ap_bio(mdev, 1); - return __drbd_make_request(mdev, bio, start_time); - } - - /* can this bio be split generically? - * Maybe add our own split-arbitrary-bios function. */ - if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) { - /* rather error out here than BUG in bio_split */ - dev_err(DEV, "bio would need to, but cannot, be split: " - "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n", - bio->bi_vcnt, bio->bi_idx, bio->bi_size, - (unsigned long long)bio->bi_sector); - bio_endio(bio, -EINVAL); - } else { - /* This bio crosses some boundary, so we have to split it. */ - struct bio_pair *bp; - /* works for the "do not cross hash slot boundaries" case - * e.g. sector 262269, size 4096 - * s_enr = 262269 >> 6 = 4097 - * e_enr = (262269+8-1) >> 6 = 4098 - * HT_SHIFT = 6 - * sps = 64, mask = 63 - * first_sectors = 64 - (262269 & 63) = 3 - */ - const sector_t sect = bio->bi_sector; - const int sps = 1 << HT_SHIFT; /* sectors per slot */ - const int mask = sps - 1; - const sector_t first_sectors = sps - (sect & mask); - bp = bio_split(bio, first_sectors); - - /* we need to get a "reference count" (ap_bio_cnt) - * to avoid races with the disconnect/reconnect/suspend code. - * In case we need to split the bio here, we need to get three references - * atomically, otherwise we might deadlock when trying to submit the - * second one! */ - inc_ap_bio(mdev, 3); - - D_ASSERT(e_enr == s_enr + 1); - - while (__drbd_make_request(mdev, &bp->bio1, start_time)) - inc_ap_bio(mdev, 1); - - while (__drbd_make_request(mdev, &bp->bio2, start_time)) - inc_ap_bio(mdev, 1); - - dec_ap_bio(mdev); - - bio_pair_release(bp); - } - return 0; + inc_ap_bio(mdev); + return __drbd_make_request(mdev, bio, start_time); } -/* This is called by bio_add_page(). With this function we reduce - * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs - * units (was AL_EXTENTs). +/* This is called by bio_add_page(). * - * we do the calculation within the lower 32bit of the byte offsets, - * since we don't care for actual offset, but only check whether it - * would cross "activity log extent" boundaries. + * q->max_hw_sectors and other global limits are already enforced there. + * + * We need to call down to our lower level device, + * in case it has special restrictions. + * + * We also may need to enforce configured max-bio-bvecs limits. * * As long as the BIO is empty we have to allow at least one bvec, - * regardless of size and offset. so the resulting bio may still - * cross extent boundaries. those are dealt with (bio_split) in - * drbd_make_request. + * regardless of size and offset, so no need to ask lower levels. */ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec) { struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; - unsigned int bio_offset = - (unsigned int)bvm->bi_sector << 9; /* 32 bit */ unsigned int bio_size = bvm->bi_size; - int limit, backing_limit; + int limit = DRBD_MAX_BIO_SIZE; + int backing_limit; - limit = DRBD_MAX_BIO_SIZE - - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size); - if (limit < 0) - limit = 0; - if (bio_size == 0) { - if (limit <= bvec->bv_len) - limit = bvec->bv_len; - } else if (limit && get_ldev(mdev)) { + if (bio_size && get_ldev(mdev)) { struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn) {