Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block

Pull block IO update from Jens Axboe:
 "Core block IO bits for 3.7.  Not a huge round this time, it contains:

   - First series from Kent cleaning up and generalizing bio allocation
     and freeing.

   - WRITE_SAME support from Martin.

   - Mikulas patches to prevent O_DIRECT crashes when someone changes
     the block size of a device.

   - Make bio_split() work on data-less bio's (like trim/discards).

   - A few other minor fixups."

Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew
Morton.  It is due to the VM no longer using a prio-tree (see commit
6b2dbba8b6: "mm: replace vma prio_tree with an interval tree").

So make set_blocksize() use mapping_mapped() instead of open-coding the
internal VM knowledge that has changed.

* 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits)
  block: makes bio_split support bio without data
  scatterlist: refactor the sg_nents
  scatterlist: add sg_nents
  fs: fix include/percpu-rwsem.h export error
  percpu-rw-semaphore: fix documentation typos
  fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared
  blockdev: turn a rw semaphore into a percpu rw semaphore
  Fix a crash when block device is read and block size is changed at the same time
  block: fix request_queue->flags initialization
  block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue()
  block: ioctl to zero block ranges
  block: Make blkdev_issue_zeroout use WRITE SAME
  block: Implement support for WRITE SAME
  block: Consolidate command flag and queue limit checks for merges
  block: Clean up special command handling logic
  block/blk-tag.c: Remove useless kfree
  block: remove the duplicated setting for congestion_threshold
  block: reject invalid queue attribute values
  block: Add bio_clone_bioset(), bio_clone_kmalloc()
  block: Consolidate bio_alloc_bioset(), bio_kmalloc()
  ...
This commit is contained in:
Linus Torvalds 2012-10-11 09:04:23 +09:00
commit ce40be7a82
33 changed files with 771 additions and 465 deletions

View file

@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
/*
* A queue starts its life with bypass turned on to avoid
* unnecessary bypass on/off overhead and nasty surprises during
* init. The initial bypass will be finished at the end of
* blk_init_allocated_queue().
* init. The initial bypass will be finished when the queue is
* registered by blk_register_queue().
*/
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
q->request_fn = rfn;
q->prep_rq_fn = NULL;
q->unprep_rq_fn = NULL;
q->queue_flags = QUEUE_FLAG_DEFAULT;
q->queue_flags |= QUEUE_FLAG_DEFAULT;
/* Override internal queue lock with supplied lock pointer */
if (lock)
@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
/* init elevator */
if (elevator_init(q, NULL))
return NULL;
blk_queue_congestion_threshold(q);
/* all done, end the initial bypass */
blk_queue_bypass_end(q);
return q;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
nr_sectors > queue_max_hw_sectors(q))) {
if (likely(bio_is_rw(bio) &&
nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_rw & REQ_DISCARD) &&
(!blk_queue_discard(q) ||
((bio->bi_rw & REQ_SECURE) &&
!blk_queue_secdiscard(q)))) {
((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
err = -EOPNOTSUPP;
goto end_io;
}
if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
err = -EOPNOTSUPP;
goto end_io;
}
@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request);
*/
void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);
bio->bi_rw |= rw;
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
if (bio_has_data(bio)) {
unsigned int count;
if (unlikely(rw & REQ_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio);
*/
int blk_rq_check_limits(struct request_queue *q, struct request *rq)
{
if (rq->cmd_flags & REQ_DISCARD)
if (!rq_mergeable(rq))
return 0;
if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
}
@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
req->buffer = bio_data(req->bio);
/* update sector only for requests with clear definition of sector */
if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
if (req->cmd_type == REQ_TYPE_FS)
req->__sector += total_bytes >> 9;
/* mixed attributes always follow the first bio */
@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
blk_rq_init(NULL, rq);
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
bio = bio_clone_bioset(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;
__bio_clone(bio, bio_src);
if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
free_and_out:
if (bio)
bio_free(bio, bs);
bio_put(bio);
blk_rq_unprep_clone(rq);
return -ENOMEM;

View file

@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
/**
* blkdev_issue_write_same - queue a write same operation
* @bdev: target blockdev
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @page: page containing data to write
*
* Description:
* Issue a write same request for the sectors in question.
*/
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask,
struct page *page)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_write_same_sectors;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
if (!q)
return -ENXIO;
max_write_same_sectors = q->limits.max_write_same_sectors;
if (max_write_same_sectors == 0)
return -EOPNOTSUPP;
atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
while (nr_sects) {
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
ret = -ENOMEM;
break;
}
bio->bi_sector = sector;
bio->bi_end_io = bio_batch_end_io;
bio->bi_bdev = bdev;
bio->bi_private = &bb;
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
if (nr_sects > max_write_same_sectors) {
bio->bi_size = max_write_same_sectors << 9;
nr_sects -= max_write_same_sectors;
sector += max_write_same_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
atomic_inc(&bb.done);
submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
}
/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
ret = -ENOTSUPP;
return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);
/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard);
* Generate and issue number of bios with zerofiled pages.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
int ret;
@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
/**
* blkdev_issue_zeroout - zero-fill a block range
* @bdev: blockdev to write
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
*
* Description:
* Generate and issue number of bios with zerofiled pages.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
if (bdev_write_same(bdev)) {
unsigned char bdn[BDEVNAME_SIZE];
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
ZERO_PAGE(0)))
return 0;
bdevname(bdev, bdn);
pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
}
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
}
EXPORT_SYMBOL(blkdev_issue_zeroout);

View file

@ -275,14 +275,8 @@ no_merge:
int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
max_sectors = queue_max_sectors(q);
if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
max_sectors = queue_max_sectors(q);
if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
/*
* Will it become too large?
*/
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
blk_rq_get_max_sectors(req))
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (!rq_mergeable(req) || !rq_mergeable(next))
return 0;
/*
* Don't merge file system requests and discard requests
*/
if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))
return 0;
/*
* Don't merge discard requests and secure discard requests
*/
if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE))
if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
return 0;
/*
@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
if (req->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(req->bio, next->bio))
return 0;
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
if (!rq_mergeable(rq))
if (!rq_mergeable(rq) || !bio_mergeable(bio))
return false;
/* don't merge file system requests and discard requests */
if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
return false;
/* don't merge discard requests and secure discard requests */
if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
return false;
/* different data direction or already started, don't merge */
@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;
/* must be using the same buffer */
if (rq->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(rq->bio, bio))
return false;
return true;
}

View file

@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
* blk_queue_max_write_same_sectors - set max sectors for a single write same
* @q: the request queue for the device
* @max_write_same_sectors: maximum number of sectors to write per command
**/
void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors)
{
q->limits.max_write_same_sectors = max_write_same_sectors;
}
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,

View file

@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page)
static ssize_t
queue_var_store(unsigned long *var, const char *page, size_t count)
{
char *p = (char *) page;
int err;
unsigned long v;
err = strict_strtoul(page, 10, &v);
if (err || v > UINT_MAX)
return -EINVAL;
*var = v;
*var = simple_strtoul(p, &p, 10);
return count;
}
@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
return -EINVAL;
ret = queue_var_store(&nr, page, count);
if (ret < 0)
return ret;
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
unsigned long ra_kb;
ssize_t ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
return ret;
@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page);
}
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)q->limits.max_write_same_sectors << 9);
}
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
if (ret < 0)
return ret;
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
unsigned long nm;
ssize_t ret = queue_var_store(&nm, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
unsigned long val;
ret = queue_var_store(&val, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
if (val == 2) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk)
if (WARN_ON(!q))
return -ENXIO;
/*
* Initialization must be complete by now. Finish the initial
* bypass from queue allocation.
*/
blk_queue_bypass_end(q);
ret = blk_trace_init_sysfs(dev);
if (ret)
return ret;

View file

@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
tags = __blk_queue_init_tags(q, depth);
if (!tags)
goto fail;
return -ENOMEM;
} else if (q->queue_tags) {
rc = blk_queue_resize_tags(q, depth);
if (rc)
@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
INIT_LIST_HEAD(&q->tag_busy_list);
return 0;
fail:
kfree(tags);
return -ENOMEM;
}
EXPORT_SYMBOL(blk_queue_init_tags);

View file

@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
*
* a) it's attached to a gendisk, and
* b) the queue had IO stats enabled when this request was started, and
* c) it's a file system request or a discard request
* c) it's a file system request
*/
static inline int blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->cmd_flags & REQ_IO_STAT) &&
(rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD));
(rq->cmd_type == REQ_TYPE_FS);
}
/*

View file

@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD)) {
if (rq->cmd_type == REQ_TYPE_FS) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (elv_attempt_insert_merge(q, rq))
break;
case ELEVATOR_INSERT_SORT:
BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
!(rq->cmd_flags & REQ_DISCARD));
BUG_ON(rq->cmd_type != REQ_TYPE_FS);
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) {

View file

@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}
static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
uint64_t len)
{
if (start & 511)
return -EINVAL;
if (len & 511)
return -EINVAL;
start >>= 9;
len >>= 9;
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL;
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
}
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return blk_ioctl_discard(bdev, range[0], range[1],
cmd == BLKSECDISCARD);
}
case BLKZEROOUT: {
uint64_t range[2];
if (!(mode & FMODE_WRITE))
return -EBADF;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
return blk_ioctl_zeroout(bdev, range[0], range[1]);
}
case HDIO_GETGEO: {
struct hd_geometry geo;