Merge branch 'for-3.8/core' of git://git.kernel.dk/linux-block

Pull block layer core updates from Jens Axboe:
 "Here are the core block IO bits for 3.8.  The branch contains:

   - The final version of the surprise device removal fixups from Bart.

   - Don't hide EFI partitions under advanced partition types.  It's
     fairly wide spread these days.  This is especially dangerous for
     systems that have both msdos and efi partition tables, where you
     want to keep them in sync.

   - Cleanup of using -1 instead of the proper NUMA_NO_NODE

   - Export control of bdi flusher thread CPU mask and default to using
     the home node (if known) from Jeff.

   - Export unplug tracepoint for MD.

   - Core improvements from Shaohua.  Reinstate the recursive merge, as
     the original bug has been fixed.  Add plugging for discard and also
     fix a problem handling non pow-of-2 discard limits.

  There's a trivial merge in block/blk-exec.c due to a fix that went
  into 3.7-rc at a later point than -rc4 where this is based."

* 'for-3.8/core' of git://git.kernel.dk/linux-block:
  block: export block_unplug tracepoint
  block: add plug for blkdev_issue_discard
  block: discard granularity might not be power of 2
  deadline: Allow 0ms deadline latency, increase the read speed
  partitions: enable EFI/GPT support by default
  bsg: Remove unused function bsg_goose_queue()
  block: Make blk_cleanup_queue() wait until request_fn finished
  block: Avoid scheduling delayed work on a dead queue
  block: Avoid that request_fn is invoked on a dead queue
  block: Let blk_drain_queue() caller obtain the queue lock
  block: Rename queue dead flag
  bdi: add a user-tunable cpu_list for the bdi flusher threads
  block: use NUMA_NO_NODE instead of -1
  block: recursive merge requests
  block CFQ: avoid moving request to different queue
This commit is contained in:
Linus Torvalds 2012-12-17 08:27:23 -08:00
commit 60da5bf47d
19 changed files with 224 additions and 99 deletions

View file

@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
* we shouldn't allow anything to go through for a bypassing queue.
*/
if (unlikely(blk_queue_bypass(q)))
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
return __blkg_lookup_create(blkcg, q, NULL);
}
EXPORT_SYMBOL_GPL(blkg_lookup_create);

View file

@ -40,6 +40,7 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
DEFINE_IDA(blk_queue_ida);
@ -219,12 +220,13 @@ static void blk_delay_work(struct work_struct *work)
* Description:
* Sometimes queueing needs to be postponed for a little while, to allow
* resources to come back. This function will make sure that queueing is
* restarted around the specified time.
* restarted around the specified time. Queue lock must be held.
*/
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
{
queue_delayed_work(kblockd_workqueue, &q->delay_work,
msecs_to_jiffies(msecs));
if (likely(!blk_queue_dead(q)))
queue_delayed_work(kblockd_workqueue, &q->delay_work,
msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_delay_queue);
@ -292,6 +294,34 @@ void blk_sync_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_sync_queue);
/**
* __blk_run_queue_uncond - run a queue whether or not it has been stopped
* @q: The queue to run
*
* Description:
* Invoke request handling on a queue if there are any pending requests.
* May be used to restart request handling after a request has completed.
* This variant runs the queue whether or not the queue has been
* stopped. Must be called with the queue lock held and interrupts
* disabled. See also @blk_run_queue.
*/
inline void __blk_run_queue_uncond(struct request_queue *q)
{
if (unlikely(blk_queue_dead(q)))
return;
/*
* Some request_fn implementations, e.g. scsi_request_fn(), unlock
* the queue lock internally. As a result multiple threads may be
* running such a request function concurrently. Keep track of the
* number of active request_fn invocations such that blk_drain_queue()
* can wait until all these request_fn calls have finished.
*/
q->request_fn_active++;
q->request_fn(q);
q->request_fn_active--;
}
/**
* __blk_run_queue - run a single device queue
* @q: The queue to run
@ -305,7 +335,7 @@ void __blk_run_queue(struct request_queue *q)
if (unlikely(blk_queue_stopped(q)))
return;
q->request_fn(q);
__blk_run_queue_uncond(q);
}
EXPORT_SYMBOL(__blk_run_queue);
@ -315,11 +345,11 @@ EXPORT_SYMBOL(__blk_run_queue);
*
* Description:
* Tells kblockd to perform the equivalent of @blk_run_queue on behalf
* of us.
* of us. The caller must hold the queue lock.
*/
void blk_run_queue_async(struct request_queue *q)
{
if (likely(!blk_queue_stopped(q)))
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
EXPORT_SYMBOL(blk_run_queue_async);
@ -349,7 +379,7 @@ void blk_put_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_put_queue);
/**
* blk_drain_queue - drain requests from request_queue
* __blk_drain_queue - drain requests from request_queue
* @q: queue to drain
* @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
*
@ -357,15 +387,17 @@ EXPORT_SYMBOL(blk_put_queue);
* If not, only ELVPRIV requests are drained. The caller is responsible
* for ensuring that no new requests which need to be drained are queued.
*/
void blk_drain_queue(struct request_queue *q, bool drain_all)
static void __blk_drain_queue(struct request_queue *q, bool drain_all)
__releases(q->queue_lock)
__acquires(q->queue_lock)
{
int i;
lockdep_assert_held(q->queue_lock);
while (true) {
bool drain = false;
spin_lock_irq(q->queue_lock);
/*
* The caller might be trying to drain @q before its
* elevator is initialized.
@ -386,6 +418,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
__blk_run_queue(q);
drain |= q->nr_rqs_elvpriv;
drain |= q->request_fn_active;
/*
* Unfortunately, requests are queued at and tracked from
@ -401,11 +434,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
}
}
spin_unlock_irq(q->queue_lock);
if (!drain)
break;
spin_unlock_irq(q->queue_lock);
msleep(10);
spin_lock_irq(q->queue_lock);
}
/*
@ -416,13 +452,9 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
if (q->request_fn) {
struct request_list *rl;
spin_lock_irq(q->queue_lock);
blk_queue_for_each_rl(rl, q)
for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
wake_up_all(&rl->wait[i]);
spin_unlock_irq(q->queue_lock);
}
}
@ -446,7 +478,10 @@ void blk_queue_bypass_start(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
if (drain) {
blk_drain_queue(q, false);
spin_lock_irq(q->queue_lock);
__blk_drain_queue(q, false);
spin_unlock_irq(q->queue_lock);
/* ensure blk_queue_bypass() is %true inside RCU read lock */
synchronize_rcu();
}
@ -473,20 +508,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
*
* Mark @q DEAD, drain all pending requests, destroy and put it. All
* future requests will be failed immediately with -ENODEV.
* Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
* put it. All future requests will be failed immediately with -ENODEV.
*/
void blk_cleanup_queue(struct request_queue *q)
{
spinlock_t *lock = q->queue_lock;
/* mark @q DEAD, no new request or merges will be allowed afterwards */
/* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
spin_lock_irq(lock);
/*
* Dead queue is permanently in bypass mode till released. Note
* A dying queue is permanently in bypass mode till released. Note
* that, unlike blk_queue_bypass_start(), we aren't performing
* synchronize_rcu() after entering bypass mode to avoid the delay
* as some drivers create and destroy a lot of queues while
@ -499,12 +534,18 @@ void blk_cleanup_queue(struct request_queue *q)
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
queue_flag_set(QUEUE_FLAG_DEAD, q);
queue_flag_set(QUEUE_FLAG_DYING, q);
spin_unlock_irq(lock);
mutex_unlock(&q->sysfs_lock);
/* drain all requests queued before DEAD marking */
blk_drain_queue(q, true);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
* prevent that q->request_fn() gets invoked after draining finished.
*/
spin_lock_irq(lock);
__blk_drain_queue(q, true);
queue_flag_set(QUEUE_FLAG_DEAD, q);
spin_unlock_irq(lock);
/* @q won't process any more request, flush async actions */
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
@ -549,7 +590,7 @@ void blk_exit_rl(struct request_list *rl)
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
return blk_alloc_queue_node(gfp_mask, -1);
return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
}
EXPORT_SYMBOL(blk_alloc_queue);
@ -660,7 +701,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
{
return blk_init_queue_node(rfn, lock, -1);
return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
}
EXPORT_SYMBOL(blk_init_queue);
@ -716,7 +757,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue);
bool blk_get_queue(struct request_queue *q)
{
if (likely(!blk_queue_dead(q))) {
if (likely(!blk_queue_dying(q))) {
__blk_get_queue(q);
return true;
}
@ -870,7 +911,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
const bool is_sync = rw_is_sync(rw_flags) != 0;
int may_queue;
if (unlikely(blk_queue_dead(q)))
if (unlikely(blk_queue_dying(q)))
return NULL;
may_queue = elv_may_queue(q, rw_flags);
@ -1050,7 +1091,7 @@ retry:
if (rq)
return rq;
if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return NULL;
}
@ -1910,7 +1951,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
return -EIO;
spin_lock_irqsave(q->queue_lock, flags);
if (unlikely(blk_queue_dead(q))) {
if (unlikely(blk_queue_dying(q))) {
spin_unlock_irqrestore(q->queue_lock, flags);
return -ENODEV;
}
@ -2884,27 +2925,11 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
{
trace_block_unplug(q, depth, !from_schedule);
/*
* Don't mess with dead queue.
*/
if (unlikely(blk_queue_dead(q))) {
spin_unlock(q->queue_lock);
return;
}
/*
* If we are punting this to kblockd, then we can safely drop
* the queue_lock before waking kblockd (which needs to take
* this lock).
*/
if (from_schedule) {
spin_unlock(q->queue_lock);
if (from_schedule)
blk_run_queue_async(q);
} else {
else
__blk_run_queue(q);
spin_unlock(q->queue_lock);
}
spin_unlock(q->queue_lock);
}
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@ -2996,7 +3021,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* Short-circuit if @q is dead
*/
if (unlikely(blk_queue_dead(q))) {
if (unlikely(blk_queue_dying(q))) {
__blk_end_request_all(rq, -ENODEV);
continue;
}

View file

@ -66,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
spin_lock_irq(q->queue_lock);
if (unlikely(blk_queue_dead(q))) {
if (unlikely(blk_queue_dying(q))) {
rq->errors = -ENXIO;
if (rq->end_io)
rq->end_io(rq, rq->errors);
@ -78,7 +78,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
__blk_run_queue(q);
/* the queue is stopped so it won't be run */
if (is_pm_resume)
q->request_fn(q);
__blk_run_queue_uncond(q);
spin_unlock_irq(q->queue_lock);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

View file

@ -43,11 +43,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
int type = REQ_WRITE | REQ_DISCARD;
unsigned int max_discard_sectors;
unsigned int granularity, alignment, mask;
sector_t max_discard_sectors;
sector_t granularity, alignment;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
struct blk_plug plug;
if (!q)
return -ENXIO;
@ -57,15 +58,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);
mask = granularity - 1;
alignment = (bdev_discard_alignment(bdev) >> 9) & mask;
alignment = bdev_discard_alignment(bdev) >> 9;
alignment = sector_div(alignment, granularity);
/*
* Ensure that max_discard_sectors is of the proper
* granularity, so that requests stay aligned after a split.
*/
max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
max_discard_sectors = round_down(max_discard_sectors, granularity);
sector_div(max_discard_sectors, granularity);
max_discard_sectors *= granularity;
if (unlikely(!max_discard_sectors)) {
/* Avoid infinite loop below. Being cautious never hurts. */
return -EOPNOTSUPP;
@ -81,9 +83,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
blk_start_plug(&plug);
while (nr_sects) {
unsigned int req_sects;
sector_t end_sect;
sector_t end_sect, tmp;
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
@ -98,10 +101,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* misaligned, stop the discard at the previous aligned sector.
*/
end_sect = sector + req_sects;
if (req_sects < nr_sects && (end_sect & mask) != alignment) {
end_sect =
round_down(end_sect - alignment, granularity)
+ alignment;
tmp = end_sect;
if (req_sects < nr_sects &&
sector_div(tmp, granularity) != alignment) {
end_sect = end_sect - alignment;
sector_div(end_sect, granularity);
end_sect = end_sect * granularity + alignment;
req_sects = end_sect - sector;
}
@ -117,6 +122,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
atomic_inc(&bb.done);
submit_bio(type, bio);
}
blk_finish_plug(&plug);
/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))

View file

@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
bottom = b->discard_granularity + alignment;
/* Verify that top and bottom intervals line up */
if (max(top, bottom) & (min(top, bottom) - 1))
if ((max(top, bottom) % min(top, bottom)) != 0)
t->discard_misaligned = 1;
}
@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->max_discard_sectors);
t->discard_granularity = max(t->discard_granularity,
b->discard_granularity);
t->discard_alignment = lcm(t->discard_alignment, alignment) &
(t->discard_granularity - 1);
t->discard_alignment = lcm(t->discard_alignment, alignment) %
t->discard_granularity;
}
return ret;

View file

@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
if (blk_queue_dead(q)) {
if (blk_queue_dying(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
q = container_of(kobj, struct request_queue, kobj);
mutex_lock(&q->sysfs_lock);
if (blk_queue_dead(q)) {
if (blk_queue_dying(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}

View file

@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
/* if %NULL and @q is alive, fall back to root_tg */
if (!IS_ERR(blkg))
tg = blkg_to_tg(blkg);
else if (!blk_queue_dead(q))
else if (!blk_queue_dying(q))
tg = td_root_tg(td);
}

View file

@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
q->flush_queue_delayed = 1;
return NULL;
}
if (unlikely(blk_queue_dead(q)) ||
if (unlikely(blk_queue_dying(q)) ||
!q->elevator->type->ops.elevator_dispatch_fn(q, 0))
return NULL;
}
@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio);
void blk_queue_congestion_threshold(struct request_queue *q);
void __blk_run_queue_uncond(struct request_queue *q);
int blk_dev_init(void);

View file

@ -151,19 +151,6 @@ failjob_rls_job:
return -ENOMEM;
}
/*
* bsg_goose_queue - restart queue in case it was stopped
* @q: request q to be restarted
*/
void bsg_goose_queue(struct request_queue *q)
{
if (!q)
return;
blk_run_queue_async(q);
}
EXPORT_SYMBOL_GPL(bsg_goose_queue);
/**
* bsg_request_fn - generic handler for bsg requests
* @q: request queue to manage

View file

@ -1973,7 +1973,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
* reposition in fifo if next is older than rq
*/
if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
time_before(rq_fifo_time(next), rq_fifo_time(rq)) &&
cfqq == RQ_CFQQ(next)) {
list_move(&rq->queuelist, &next->queuelist);
rq_set_fifo_time(rq, rq_fifo_time(next));
}

View file

@ -230,7 +230,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
/*
* rq is expired!
*/
if (time_after(jiffies, rq_fifo_time(rq)))
if (time_after_eq(jiffies, rq_fifo_time(rq)))
return 1;
return 0;

View file

@ -458,6 +458,7 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
struct request *rq)
{
struct request *__rq;
bool ret;
if (blk_queue_nomerges(q))
return false;
@ -471,14 +472,21 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
if (blk_queue_noxmerges(q))
return false;
ret = false;
/*
* See if our hash lookup can find a potential backmerge.
*/
__rq = elv_rqhash_find(q, blk_rq_pos(rq));
if (__rq && blk_attempt_req_merge(q, __rq, rq))
return true;
while (1) {
__rq = elv_rqhash_find(q, blk_rq_pos(rq));
if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
break;
return false;
/* The merged request could be merged with others, try again */
ret = true;
rq = __rq;
}
return ret;
}
void elv_merged_request(struct request_queue *q, struct request *rq, int type)

View file

@ -1245,7 +1245,7 @@ EXPORT_SYMBOL(blk_lookup_devt);
struct gendisk *alloc_disk(int minors)
{
return alloc_disk_node(minors, -1);
return alloc_disk_node(minors, NUMA_NO_NODE);
}
EXPORT_SYMBOL(alloc_disk);

View file

@ -234,8 +234,8 @@ config KARMA_PARTITION
uses a proprietary partition table.
config EFI_PARTITION
bool "EFI GUID Partition support"
depends on PARTITION_ADVANCED
bool "EFI GUID Partition support" if PARTITION_ADVANCED
default y
select CRC32
help
Say Y here if you would like to use hard disks under Linux which