backport: block: fix silent corruption in Linux kernel 4.15
reproducer: https://www.spinics.net/lists/linux-block/msg28507.html ubuntu bugreport: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1796542 Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
parent
9929833ca3
commit
dbb1ed6d87
4 changed files with 403 additions and 0 deletions
|
@ -0,0 +1,178 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Christoph Hellwig <hch@lst.de>
|
||||
Date: Tue, 9 Oct 2018 17:04:39 +0100
|
||||
Subject: [PATCH] block: add a lower-level bio_add_page interface
|
||||
|
||||
Buglink: https://bugs.launchpad.net/bugs/1796542
|
||||
|
||||
For the upcoming removal of buffer heads in XFS we need to keep track of
|
||||
the number of outstanding writeback requests per page. For this we need
|
||||
to know if bio_add_page merged a region with the previous bvec or not.
|
||||
Instead of adding additional arguments this refactors bio_add_page to
|
||||
be implemented using three lower level helpers which users like XFS can
|
||||
use directly if they care about the merge decisions.
|
||||
|
||||
Signed-off-by: Christoph Hellwig <hch@lst.de>
|
||||
Reviewed-by: Jens Axboe <axboe@kernel.dk>
|
||||
Reviewed-by: Ming Lei <ming.lei@redhat.com>
|
||||
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
|
||||
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
|
||||
(cherry picked from commit 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b)
|
||||
Signed-off-by: Colin Ian King <colin.king@canonical.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
block/bio.c | 98 ++++++++++++++++++++++++++++++++++-------------------
|
||||
include/linux/bio.h | 9 +++++
|
||||
2 files changed, 73 insertions(+), 34 deletions(-)
|
||||
|
||||
diff --git a/block/bio.c b/block/bio.c
|
||||
index 4b48f8eefc4c..2636d15af979 100644
|
||||
--- a/block/bio.c
|
||||
+++ b/block/bio.c
|
||||
@@ -773,7 +773,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
|
||||
return 0;
|
||||
}
|
||||
|
||||
- if (bio->bi_vcnt >= bio->bi_max_vecs)
|
||||
+ if (bio_full(bio))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -821,6 +821,65 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
|
||||
EXPORT_SYMBOL(bio_add_pc_page);
|
||||
|
||||
/**
|
||||
+ * __bio_try_merge_page - try appending data to an existing bvec.
|
||||
+ * @bio: destination bio
|
||||
+ * @page: page to add
|
||||
+ * @len: length of the data to add
|
||||
+ * @off: offset of the data in @page
|
||||
+ *
|
||||
+ * Try to add the data at @page + @off to the last bvec of @bio. This is a
|
||||
+ * a useful optimisation for file systems with a block size smaller than the
|
||||
+ * page size.
|
||||
+ *
|
||||
+ * Return %true on success or %false on failure.
|
||||
+ */
|
||||
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
|
||||
+ unsigned int len, unsigned int off)
|
||||
+{
|
||||
+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
+ return false;
|
||||
+
|
||||
+ if (bio->bi_vcnt > 0) {
|
||||
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
+
|
||||
+ if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
|
||||
+ bv->bv_len += len;
|
||||
+ bio->bi_iter.bi_size += len;
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(__bio_try_merge_page);
|
||||
+
|
||||
+/**
|
||||
+ * __bio_add_page - add page to a bio in a new segment
|
||||
+ * @bio: destination bio
|
||||
+ * @page: page to add
|
||||
+ * @len: length of the data to add
|
||||
+ * @off: offset of the data in @page
|
||||
+ *
|
||||
+ * Add the data at @page + @off to @bio as a new bvec. The caller must ensure
|
||||
+ * that @bio has space for another bvec.
|
||||
+ */
|
||||
+void __bio_add_page(struct bio *bio, struct page *page,
|
||||
+ unsigned int len, unsigned int off)
|
||||
+{
|
||||
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
|
||||
+
|
||||
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
|
||||
+ WARN_ON_ONCE(bio_full(bio));
|
||||
+
|
||||
+ bv->bv_page = page;
|
||||
+ bv->bv_offset = off;
|
||||
+ bv->bv_len = len;
|
||||
+
|
||||
+ bio->bi_iter.bi_size += len;
|
||||
+ bio->bi_vcnt++;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(__bio_add_page);
|
||||
+
|
||||
+/**
|
||||
* bio_add_page - attempt to add page to bio
|
||||
* @bio: destination bio
|
||||
* @page: page to add
|
||||
@@ -833,40 +892,11 @@ EXPORT_SYMBOL(bio_add_pc_page);
|
||||
int bio_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
- struct bio_vec *bv;
|
||||
-
|
||||
- /*
|
||||
- * cloned bio must not modify vec list
|
||||
- */
|
||||
- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
|
||||
- return 0;
|
||||
-
|
||||
- /*
|
||||
- * For filesystems with a blocksize smaller than the pagesize
|
||||
- * we will often be called with the same page as last time and
|
||||
- * a consecutive offset. Optimize this special case.
|
||||
- */
|
||||
- if (bio->bi_vcnt > 0) {
|
||||
- bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
-
|
||||
- if (page == bv->bv_page &&
|
||||
- offset == bv->bv_offset + bv->bv_len) {
|
||||
- bv->bv_len += len;
|
||||
- goto done;
|
||||
- }
|
||||
+ if (!__bio_try_merge_page(bio, page, len, offset)) {
|
||||
+ if (bio_full(bio))
|
||||
+ return 0;
|
||||
+ __bio_add_page(bio, page, len, offset);
|
||||
}
|
||||
-
|
||||
- if (bio->bi_vcnt >= bio->bi_max_vecs)
|
||||
- return 0;
|
||||
-
|
||||
- bv = &bio->bi_io_vec[bio->bi_vcnt];
|
||||
- bv->bv_page = page;
|
||||
- bv->bv_len = len;
|
||||
- bv->bv_offset = offset;
|
||||
-
|
||||
- bio->bi_vcnt++;
|
||||
-done:
|
||||
- bio->bi_iter.bi_size += len;
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_page);
|
||||
diff --git a/include/linux/bio.h b/include/linux/bio.h
|
||||
index a98c6ac575cf..3440870712d4 100644
|
||||
--- a/include/linux/bio.h
|
||||
+++ b/include/linux/bio.h
|
||||
@@ -123,6 +123,11 @@ static inline void *bio_data(struct bio *bio)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+static inline bool bio_full(struct bio *bio)
|
||||
+{
|
||||
+ return bio->bi_vcnt >= bio->bi_max_vecs;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* will die
|
||||
*/
|
||||
@@ -447,6 +452,10 @@ void bio_chain(struct bio *, struct bio *);
|
||||
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
|
||||
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
|
||||
unsigned int, unsigned int);
|
||||
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
|
||||
+ unsigned int len, unsigned int off);
|
||||
+void __bio_add_page(struct bio *bio, struct page *page,
|
||||
+ unsigned int len, unsigned int off);
|
||||
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
|
||||
struct rq_map_data;
|
||||
extern struct bio *bio_map_user_iov(struct request_queue *,
|
|
@ -0,0 +1,77 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Wilck <mwilck@suse.com>
|
||||
Date: Tue, 9 Oct 2018 17:04:40 +0100
|
||||
Subject: [PATCH] block: bio_iov_iter_get_pages: fix size of last iovec
|
||||
|
||||
Buglink: https://bugs.launchpad.net/bugs/1796542
|
||||
|
||||
If the last page of the bio is not "full", the length of the last
|
||||
vector slot needs to be corrected. This slot has the index
|
||||
(bio->bi_vcnt - 1), but only in bio->bi_io_vec. In the "bv" helper
|
||||
array, which is shifted by the value of bio->bi_vcnt at function
|
||||
invocation, the correct index is (nr_pages - 1).
|
||||
|
||||
v2: improved readability following suggestions from Ming Lei.
|
||||
v3: followed a formatting suggestion from Christoph Hellwig.
|
||||
|
||||
Fixes: 2cefe4dbaadf ("block: add bio_iov_iter_get_pages()")
|
||||
Reviewed-by: Hannes Reinecke <hare@suse.com>
|
||||
Reviewed-by: Ming Lei <ming.lei@redhat.com>
|
||||
Reviewed-by: Jan Kara <jack@suse.cz>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Martin Wilck <mwilck@suse.com>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
(cherry picked from commit b403ea2404889e1227812fa9657667a1deb9c694)
|
||||
Signed-off-by: Colin Ian King <colin.king@canonical.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
block/bio.c | 18 ++++++++----------
|
||||
1 file changed, 8 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/block/bio.c b/block/bio.c
|
||||
index 2636d15af979..d76372a6a5fe 100644
|
||||
--- a/block/bio.c
|
||||
+++ b/block/bio.c
|
||||
@@ -911,16 +911,16 @@ EXPORT_SYMBOL(bio_add_page);
|
||||
*/
|
||||
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
|
||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||
struct page **pages = (struct page **)bv;
|
||||
- size_t offset, diff;
|
||||
+ size_t offset;
|
||||
ssize_t size;
|
||||
|
||||
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
|
||||
if (unlikely(size <= 0))
|
||||
return size ? size : -EFAULT;
|
||||
- nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
+ idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Deep magic below: We need to walk the pinned pages backwards
|
||||
@@ -933,17 +933,15 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
bio->bi_iter.bi_size += size;
|
||||
bio->bi_vcnt += nr_pages;
|
||||
|
||||
- diff = (nr_pages * PAGE_SIZE - offset) - size;
|
||||
- while (nr_pages--) {
|
||||
- bv[nr_pages].bv_page = pages[nr_pages];
|
||||
- bv[nr_pages].bv_len = PAGE_SIZE;
|
||||
- bv[nr_pages].bv_offset = 0;
|
||||
+ while (idx--) {
|
||||
+ bv[idx].bv_page = pages[idx];
|
||||
+ bv[idx].bv_len = PAGE_SIZE;
|
||||
+ bv[idx].bv_offset = 0;
|
||||
}
|
||||
|
||||
bv[0].bv_offset += offset;
|
||||
bv[0].bv_len -= offset;
|
||||
- if (diff)
|
||||
- bv[bio->bi_vcnt - 1].bv_len -= diff;
|
||||
+ bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
|
||||
|
||||
iov_iter_advance(iter, size);
|
||||
return 0;
|
|
@ -0,0 +1,50 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Wilck <mwilck@suse.com>
|
||||
Date: Tue, 9 Oct 2018 17:04:41 +0100
|
||||
Subject: [PATCH] blkdev: __blkdev_direct_IO_simple: fix leak in error case
|
||||
|
||||
Buglink: https://bugs.launchpad.net/bugs/1796542
|
||||
|
||||
Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io")
|
||||
Reviewed-by: Ming Lei <ming.lei@redhat.com>
|
||||
Reviewed-by: Hannes Reinecke <hare@suse.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Martin Wilck <mwilck@suse.com>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
(cherry picked from commit 9362dd1109f87a9d0a798fbc890cb339c171ed35)
|
||||
Signed-off-by: Colin Ian King <colin.king@canonical.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
fs/block_dev.c | 9 +++++----
|
||||
1 file changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/fs/block_dev.c b/fs/block_dev.c
|
||||
index 82c823ef06a6..74b4ae9b7ba0 100644
|
||||
--- a/fs/block_dev.c
|
||||
+++ b/fs/block_dev.c
|
||||
@@ -219,7 +219,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
|
||||
|
||||
ret = bio_iov_iter_get_pages(&bio, iter);
|
||||
if (unlikely(ret))
|
||||
- return ret;
|
||||
+ goto out;
|
||||
ret = bio.bi_iter.bi_size;
|
||||
|
||||
if (iov_iter_rw(iter) == READ) {
|
||||
@@ -248,12 +248,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
|
||||
put_page(bvec->bv_page);
|
||||
}
|
||||
|
||||
- if (vecs != inline_vecs)
|
||||
- kfree(vecs);
|
||||
-
|
||||
if (unlikely(bio.bi_status))
|
||||
ret = blk_status_to_errno(bio.bi_status);
|
||||
|
||||
+out:
|
||||
+ if (vecs != inline_vecs)
|
||||
+ kfree(vecs);
|
||||
+
|
||||
bio_uninit(&bio);
|
||||
|
||||
return ret;
|
|
@ -0,0 +1,98 @@
|
|||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Martin Wilck <mwilck@suse.com>
|
||||
Date: Tue, 9 Oct 2018 17:04:42 +0100
|
||||
Subject: [PATCH] block: bio_iov_iter_get_pages: pin more pages for
|
||||
multi-segment IOs
|
||||
|
||||
Buglink: https://bugs.launchpad.net/bugs/1796542
|
||||
|
||||
bio_iov_iter_get_pages() currently only adds pages for the next non-zero
|
||||
segment from the iov_iter to the bio. That's suboptimal for callers,
|
||||
which typically try to pin as many pages as fit into the bio. This patch
|
||||
converts the current bio_iov_iter_get_pages() into a static helper, and
|
||||
introduces a new helper that allocates as many pages as
|
||||
|
||||
1) fit into the bio,
|
||||
2) are present in the iov_iter,
|
||||
3) and can be pinned by MM.
|
||||
|
||||
Error is returned only if zero pages could be pinned. Because of 3), a
|
||||
zero return value doesn't necessarily mean all pages have been pinned.
|
||||
Callers that have to pin every page in the iov_iter must still call this
|
||||
function in a loop (this is currently the case).
|
||||
|
||||
This change matters most for __blkdev_direct_IO_simple(), which calls
|
||||
bio_iov_iter_get_pages() only once. If it obtains less pages than
|
||||
requested, it returns a "short write" or "short read", and
|
||||
__generic_file_write_iter() falls back to buffered writes, which may
|
||||
lead to data corruption.
|
||||
|
||||
Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io")
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Martin Wilck <mwilck@suse.com>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
(cherry picked from commit 17d51b10d7773e4618bcac64648f30f12d4078fb)
|
||||
Signed-off-by: Colin Ian King <colin.king@canonical.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
block/bio.c | 35 ++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 32 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/block/bio.c b/block/bio.c
|
||||
index d76372a6a5fe..415c65b9c590 100644
|
||||
--- a/block/bio.c
|
||||
+++ b/block/bio.c
|
||||
@@ -902,14 +902,16 @@ int bio_add_page(struct bio *bio, struct page *page,
|
||||
EXPORT_SYMBOL(bio_add_page);
|
||||
|
||||
/**
|
||||
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
|
||||
+ * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
|
||||
* @bio: bio to add pages to
|
||||
* @iter: iov iterator describing the region to be mapped
|
||||
*
|
||||
- * Pins as many pages from *iter and appends them to @bio's bvec array. The
|
||||
+ * Pins pages from *iter and appends them to @bio's bvec array. The
|
||||
* pages will have to be released using put_page() when done.
|
||||
+ * For multi-segment *iter, this function only adds pages from the
|
||||
+ * the next non-empty segment of the iov iterator.
|
||||
*/
|
||||
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
+static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
|
||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||
@@ -946,6 +948,33 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
iov_iter_advance(iter, size);
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+/**
|
||||
+ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
|
||||
+ * @bio: bio to add pages to
|
||||
+ * @iter: iov iterator describing the region to be mapped
|
||||
+ *
|
||||
+ * Pins pages from *iter and appends them to @bio's bvec array. The
|
||||
+ * pages will have to be released using put_page() when done.
|
||||
+ * The function tries, but does not guarantee, to pin as many pages as
|
||||
+ * fit into the bio, or are requested in *iter, whatever is smaller.
|
||||
+ * If MM encounters an error pinning the requested pages, it stops.
|
||||
+ * Error is returned only if 0 pages could be pinned.
|
||||
+ */
|
||||
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
+{
|
||||
+ unsigned short orig_vcnt = bio->bi_vcnt;
|
||||
+
|
||||
+ do {
|
||||
+ int ret = __bio_iov_iter_get_pages(bio, iter);
|
||||
+
|
||||
+ if (unlikely(ret))
|
||||
+ return bio->bi_vcnt > orig_vcnt ? 0 : ret;
|
||||
+
|
||||
+ } while (iov_iter_count(iter) && !bio_full(bio));
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
|
||||
|
||||
static void submit_bio_wait_endio(struct bio *bio)
|
Loading…
Reference in a new issue