block: Add discard flag to blkdev_issue_zeroout() function

blkdev_issue_discard() will zero a given block range. This is done by
way of explicit writing, thus provisioning or allocating the blocks on
disk.

There are use cases where the desired behavior is to zero the blocks but
unprovision them if possible. The blocks must deterministically contain
zeroes when they are subsequently read back.

This patch adds a flag to blkdev_issue_zeroout() that provides this
variant. If the discard flag is set and a block device guarantees
discard_zeroes_data we will use REQ_DISCARD to clear the block range. If
the device does not support discard_zeroes_data or if the discard
request fails we will fall back to first REQ_WRITE_SAME and then a
regular REQ_WRITE.

Also update the callers of blkdev_issue_zero() to reflect the new flag
and make sb_issue_zeroout() prefer the discard approach.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Martin K. Petersen 2015-01-20 20:06:30 -05:00 committed by Jens Axboe
parent c6ce194325
commit d93ba7a5a9
4 changed files with 30 additions and 8 deletions

View File

@ -283,23 +283,45 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
* @sector: start sector * @sector: start sector
* @nr_sects: number of sectors to write * @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc) * @gfp_mask: memory allocation flags (for bio_alloc)
* @discard: whether to discard the block range
* *
* Description: * Description:
* Generate and issue number of bios with zerofiled pages.
* Zero-fill a block range. If the discard flag is set and the block
* device guarantees that subsequent READ operations to the block range
* in question will return zeroes, the blocks will be discarded. Should
* the discard request fail, if the discard flag is not set, or if
* discard_zeroes_data is not supported, this function will resort to
* zeroing the blocks manually, thus provisioning (allocating,
* anchoring) them. If the block device supports the WRITE SAME command
* blkdev_issue_zeroout() will use it to optimize the process of
* clearing the block range. Otherwise the zeroing will be performed
* using regular WRITE calls.
*/ */
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask) sector_t nr_sects, gfp_t gfp_mask, bool discard)
{ {
struct request_queue *q = bdev_get_queue(bdev);
unsigned char bdn[BDEVNAME_SIZE];
if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data) {
if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0))
return 0;
bdevname(bdev, bdn);
pr_warn("%s: DISCARD failed. Manually zeroing.\n", bdn);
}
if (bdev_write_same(bdev)) { if (bdev_write_same(bdev)) {
unsigned char bdn[BDEVNAME_SIZE];
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
ZERO_PAGE(0))) ZERO_PAGE(0)))
return 0; return 0;
bdevname(bdev, bdn); bdevname(bdev, bdn);
pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); pr_warn("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
} }
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);

View File

@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
if (start + len > (i_size_read(bdev->bd_inode) >> 9)) if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL; return -EINVAL;
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
} }
static int put_ushort(unsigned long arg, unsigned short val) static int put_ushort(unsigned long arg, unsigned short val)

View File

@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
list_add_tail(&peer_req->w.list, &device->active_ee); list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock); spin_unlock_irq(&device->resource->req_lock);
if (blkdev_issue_zeroout(device->ldev->backing_bdev, if (blkdev_issue_zeroout(device->ldev->backing_bdev,
sector, data_size >> 9, GFP_NOIO)) sector, data_size >> 9, GFP_NOIO, false))
peer_req->flags |= EE_WAS_ERROR; peer_req->flags |= EE_WAS_ERROR;
drbd_endio_write_sec_final(peer_req); drbd_endio_write_sec_final(peer_req);
return 0; return 0;

View File

@ -1162,7 +1162,7 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page); sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask); sector_t nr_sects, gfp_t gfp_mask, bool discard);
static inline int sb_issue_discard(struct super_block *sb, sector_t block, static inline int sb_issue_discard(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
{ {
@ -1176,7 +1176,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
return blkdev_issue_zeroout(sb->s_bdev, return blkdev_issue_zeroout(sb->s_bdev,
block << (sb->s_blocksize_bits - 9), block << (sb->s_blocksize_bits - 9),
nr_blocks << (sb->s_blocksize_bits - 9), nr_blocks << (sb->s_blocksize_bits - 9),
gfp_mask); gfp_mask, true);
} }
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);