From 1bae2175c212b1fc0d4e36705743626edb1addfe Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 8 Aug 2019 15:03:00 -0400 Subject: [PATCH] BACKPORT: block: annotate refault stalls from IO submission psi tracks the time tasks wait for refaulting pages to become uptodate, but it does not track the time spent submitting the IO. The submission part can be significant if backing storage is contended or when cgroup throttling (io.latency) is in effect - a lot of time is spent in submit_bio(). In that case, we underreport memory pressure. Annotate submit_bio() to account submission time as memory stall when the bio is reading userspace workingset pages. Tested-by: Suren Baghdasaryan Signed-off-by: Johannes Weiner Signed-off-by: Jens Axboe (cherry picked from commit b8e24a9300b0836a9d39f6b20746766b3b81f1bd) Conflicts: include/linux/blk_types.h (1. Manually resolved BIO_WORKINGSET being definition instead of enum.) Bug: 141131229 Test: boot and run act test suite Change-Id: I99cef039844e219f1dc8196feead54b6f5fb26bb Signed-off-by: Suren Baghdasaryan --- block/bio.c | 3 +++ block/blk-core.c | 23 ++++++++++++++++++++++- include/linux/blk_types.h | 13 +++++++------ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/block/bio.c b/block/bio.c index 3d757055305f..f3aca34df9e2 100644 --- a/block/bio.c +++ b/block/bio.c @@ -803,6 +803,9 @@ void __bio_add_page(struct bio *bio, struct page *page, bio->bi_iter.bi_size += len; bio->bi_vcnt++; + + if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page))) + bio_set_flag(bio, BIO_WORKINGSET); } EXPORT_SYMBOL_GPL(__bio_add_page); diff --git a/block/blk-core.c b/block/blk-core.c index 074ae9376189..7c9c882cf713 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -35,6 +35,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2538,6 +2539,10 @@ EXPORT_SYMBOL_GPL(direct_make_request); */ blk_qc_t submit_bio(struct bio *bio) { + bool workingset_read = false; + unsigned long pflags; + blk_qc_t ret; + /* * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. @@ -2553,6 +2558,8 @@ blk_qc_t submit_bio(struct bio *bio) if (op_is_write(bio_op(bio))) { count_vm_events(PGPGOUT, count); } else { + if (bio_flagged(bio, BIO_WORKINGSET)) + workingset_read = true; task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -2567,7 +2574,21 @@ blk_qc_t submit_bio(struct bio *bio) } } - return generic_make_request(bio); + /* + * If we're reading data that is part of the userspace + * workingset, count submission time as memory stall. When the + * device is congested, or the submitting cgroup IO-throttled, + * submission can be a significant part of overall IO time. + */ + if (workingset_read) + psi_memstall_enter(&pflags); + + ret = generic_make_request(bio); + + if (workingset_read) + psi_memstall_leave(&pflags); + + return ret; } EXPORT_SYMBOL(submit_bio); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f6dfb30737d8..5318e28e144f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,14 +220,15 @@ struct bio { #define BIO_BOUNCED 3 /* bio is a bounce bio */ #define BIO_USER_MAPPED 4 /* contains user pages */ #define BIO_NULL_MAPPED 5 /* contains invalid user pages */ -#define BIO_QUIET 6 /* Make BIO Quiet */ -#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ -#define BIO_THROTTLED 9 /* This bio has already been subjected to +#define BIO_WORKINGSET 6 /* contains userspace workingset pages */ +#define BIO_QUIET 7 /* Make BIO Quiet */ +#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ +#define BIO_THROTTLED 10 /* This bio has already been subjected to * throttling rules. Don't do it again. */ -#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion +#define BIO_TRACE_COMPLETION 11 /* bio_endio() should trace the final completion * of this bio. */ -#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ +#define BIO_QUEUE_ENTERED 12 /* can use blk_queue_enter_live() */ /* See BVEC_POOL_OFFSET below before adding new flags */