update to zfs 2.2.0

This commit is contained in:
root 2023-12-19 23:01:30 -05:00
parent eaf1584ae4
commit c409e42c2e
11 changed files with 406 additions and 55 deletions

View file

@ -6,5 +6,6 @@ zfs/0005-Enable-zed-emails.patch
zfs/0006-dont-symlink-zed-scripts.patch zfs/0006-dont-symlink-zed-scripts.patch
zfs/0007-Add-systemd-unit-for-importing-specific-pools.patch zfs/0007-Add-systemd-unit-for-importing-specific-pools.patch
zfs/0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch zfs/0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
zfs/0009-arcstat-Fix-integer-division-with-python3.patch zfs/0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
zfs/0010-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch zfs/0010-zvol-Remove-broken-blk-mq-optimization.patch
zfs/0011-Revert-zvol-Temporally-disable-blk-mq.patch

View file

@ -13,13 +13,13 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in
index be80025a4..20ce8e632 100644 index be2fc6734..7606604ec 100644
--- a/etc/systemd/system/zfs-zed.service.in --- a/etc/systemd/system/zfs-zed.service.in
+++ b/etc/systemd/system/zfs-zed.service.in +++ b/etc/systemd/system/zfs-zed.service.in
@@ -4,7 +4,7 @@ Documentation=man:zed(8) @@ -5,7 +5,7 @@ ConditionPathIsDirectory=/sys/module/zfs
ConditionPathIsDirectory=/sys/module/zfs
[Service] [Service]
EnvironmentFile=-@initconfdir@/zfs
-ExecStart=@sbindir@/zed -F -ExecStart=@sbindir@/zed -F
+ExecStart=/usr/sbin/zed -F +ExecStart=/usr/sbin/zed -F
Restart=always Restart=always

View file

@ -14,13 +14,13 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
index 598ef501b..e4f3a70c1 100644 index c5dd45d87..1c792edf0 100644
--- a/etc/systemd/system/zfs-import-scan.service.in --- a/etc/systemd/system/zfs-import-scan.service.in
+++ b/etc/systemd/system/zfs-import-scan.service.in +++ b/etc/systemd/system/zfs-import-scan.service.in
@@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs @@ -14,7 +14,7 @@ ConditionPathIsDirectory=/sys/module/zfs
[Service]
Type=oneshot Type=oneshot
RemainAfterExit=yes RemainAfterExit=yes
EnvironmentFile=-@initconfdir@/zfs
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS -ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none $ZPOOL_IMPORT_OPTS +ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none $ZPOOL_IMPORT_OPTS

View file

@ -13,10 +13,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index 227b26c26..240d0dbfa 100644 index 78dc1afc7..41d5539ea 100644
--- a/cmd/zed/zed.d/zed.rc --- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc
@@ -42,7 +42,7 @@ ZED_EMAIL_ADDR="root" @@ -41,7 +41,7 @@ ZED_EMAIL_ADDR="root"
## ##
# Minimum number of seconds between notifications for a similar event. # Minimum number of seconds between notifications for a similar event.
# #

View file

@ -3,29 +3,44 @@ From: Antonio Russo <antonio.e.russo@gmail.com>
Date: Fri, 20 Mar 2020 17:28:43 +0100 Date: Fri, 20 Mar 2020 17:28:43 +0100
Subject: [PATCH] dont symlink zed scripts Subject: [PATCH] dont symlink zed scripts
(cherry picked and adapted from 5cee380324d74e640d5dd7a360faba3994c8007f [0]) Of the zedlet scripts shipped by upstream, a subset are enabled by
default, by creating symlinks in /etc/zfs/zed.d. These symlinks are
shipped in the zfs-zed package. dpkg, however, does not support
conffile handling of symlinks, and therefore any changes (removals) to
the symlinks are not preserved on package upgrade.
[0] https://salsa.debian.org/zfsonlinux-team/zfs.git To address this policy violation, we:
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com> 1. During package build, create a list of enabled-by-default zedlets,
instead of creating symlinks.
2. On package removal, identify all enabled-by-default zedlets whose
symlinks do not exist (i.e., were removed by the user). This is done
by creating "whiteout" links to /dev/null in their place).
3. On package installation, create links to enabled-by-default zedlets
UNLESS there is already a file there (i.e., abort if there is a
whiteout link).
4. We also clean up broken symlinks to removed zedlets at package
postinst.
Description: track default symlinks, instead of symlinking Description: track default symlinks, instead of symlinking
Forwarded: no need Forwarded: no need
(cherry picked from https://salsa.debian.org/zfsonlinux-team/zfs/-/commit/5cee380324d7)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
cmd/zed/zed.d/Makefile.am | 2 +- cmd/zed/zed.d/Makefile.am | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am
index 2c8173b3e..ad39292e4 100644 index 812558cf6..f802cf140 100644
--- a/cmd/zed/zed.d/Makefile.am --- a/cmd/zed/zed.d/Makefile.am
+++ b/cmd/zed/zed.d/Makefile.am +++ b/cmd/zed/zed.d/Makefile.am
@@ -49,7 +49,7 @@ install-data-hook: @@ -48,7 +48,7 @@ zed-install-data-hook:
for f in $(zedconfdefaults); do \ set -x; for f in $(zedconfdefaults); do \
test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ [ -f "$(DESTDIR)$(zedconfdir)/$${f}" ] ||\
-L "$(DESTDIR)$(zedconfdir)/$${f}" || \ [ -L "$(DESTDIR)$(zedconfdir)/$${f}" ] || \
- ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ - $(LN_S) "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
+ echo "$${f}" >> "$(DESTDIR)$(zedexecdir)/DEFAULT-ENABLED" ; \ + echo "$${f}" >> "$(DESTDIR)$(zedexecdir)/DEFAULT-ENABLED" ; \
done done
chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc"
SHELLCHECKSCRIPTS += $(dist_zedconf_DATA) $(dist_zedexec_SCRIPTS) $(nodist_zedexec_SCRIPTS)

View file

@ -13,16 +13,28 @@ can contain characters which will be escaped by systemd.
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com> Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
etc/systemd/system/50-zfs.preset.in | 1 + etc/Makefile.am | 1 +
etc/systemd/system/Makefile.am | 1 + etc/systemd/system/50-zfs.preset | 1 +
etc/systemd/system/zfs-import@.service.in | 16 ++++++++++++++++ etc/systemd/system/zfs-import@.service.in | 16 ++++++++++++++++
3 files changed, 18 insertions(+) 3 files changed, 18 insertions(+)
create mode 100644 etc/systemd/system/zfs-import@.service.in create mode 100644 etc/systemd/system/zfs-import@.service.in
diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in diff --git a/etc/Makefile.am b/etc/Makefile.am
index 7187762d3..de131dc87 100644
--- a/etc/Makefile.am
+++ b/etc/Makefile.am
@@ -54,6 +54,7 @@ dist_systemdpreset_DATA = \
systemdunit_DATA = \
%D%/systemd/system/zfs-import-cache.service \
%D%/systemd/system/zfs-import-scan.service \
+ %D%/systemd/system/zfs-import@.service \
%D%/systemd/system/zfs-import.target \
%D%/systemd/system/zfs-mount.service \
%D%/systemd/system/zfs-scrub-monthly@.timer \
diff --git a/etc/systemd/system/50-zfs.preset b/etc/systemd/system/50-zfs.preset
index e4056a92c..030611419 100644 index e4056a92c..030611419 100644
--- a/etc/systemd/system/50-zfs.preset.in --- a/etc/systemd/system/50-zfs.preset
+++ b/etc/systemd/system/50-zfs.preset.in +++ b/etc/systemd/system/50-zfs.preset
@@ -1,6 +1,7 @@ @@ -1,6 +1,7 @@
# ZFS is enabled by default # ZFS is enabled by default
enable zfs-import-cache.service enable zfs-import-cache.service
@ -31,18 +43,6 @@ index e4056a92c..030611419 100644
enable zfs-import.target enable zfs-import.target
enable zfs-mount.service enable zfs-mount.service
enable zfs-share.service enable zfs-share.service
diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am
index 35f833de5..af3ae597c 100644
--- a/etc/systemd/system/Makefile.am
+++ b/etc/systemd/system/Makefile.am
@@ -7,6 +7,7 @@ systemdunit_DATA = \
zfs-zed.service \
zfs-import-cache.service \
zfs-import-scan.service \
+ zfs-import@.service \
zfs-mount.service \
zfs-share.service \
zfs-volume-wait.service \
diff --git a/etc/systemd/system/zfs-import@.service.in b/etc/systemd/system/zfs-import@.service.in diff --git a/etc/systemd/system/zfs-import@.service.in b/etc/systemd/system/zfs-import@.service.in
new file mode 100644 new file mode 100644
index 000000000..9b4ee9371 index 000000000..9b4ee9371

View file

@ -15,36 +15,36 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%) rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
diff --git a/man/Makefile.am b/man/Makefile.am diff --git a/man/Makefile.am b/man/Makefile.am
index 64650c2b9..95a66a62f 100644 index 36c1aede1..94fd96e58 100644
--- a/man/Makefile.am --- a/man/Makefile.am
+++ b/man/Makefile.am +++ b/man/Makefile.am
@@ -8,7 +8,6 @@ dist_man_MANS = \ @@ -2,7 +2,6 @@ dist_noinst_man_MANS = \
man1/ztest.1 \ %D%/man1/cstyle.1
man1/raidz_test.1 \
man1/zvol_wait.1 \ dist_man_MANS = \
- man1/arcstat.1 \ - %D%/man1/arcstat.1 \
\ %D%/man1/raidz_test.1 \
man5/vdev_id.conf.5 \ %D%/man1/test-runner.1 \
\ %D%/man1/zhack.1 \
@@ -22,6 +21,7 @@ dist_man_MANS = \ @@ -22,6 +21,7 @@ dist_man_MANS = \
man7/zpoolconcepts.7 \ %D%/man7/zpoolconcepts.7 \
man7/zpoolprops.7 \ %D%/man7/zpoolprops.7 \
\ \
+ man8/arcstat.8 \ + %D%/man8/arcstat.8 \
man8/fsck.zfs.8 \ %D%/man8/fsck.zfs.8 \
man8/mount.zfs.8 \ %D%/man8/mount.zfs.8 \
man8/vdev_id.8 \ %D%/man8/vdev_id.8 \
diff --git a/man/man1/arcstat.1 b/man/man8/arcstat.8 diff --git a/man/man1/arcstat.1 b/man/man8/arcstat.8
similarity index 99% similarity index 99%
rename from man/man1/arcstat.1 rename from man/man1/arcstat.1
rename to man/man8/arcstat.8 rename to man/man8/arcstat.8
index a69cd8937..dfe9c971b 100644 index 82358fa68..a8fb55498 100644
--- a/man/man1/arcstat.1 --- a/man/man1/arcstat.1
+++ b/man/man8/arcstat.8 +++ b/man/man8/arcstat.8
@@ -13,7 +13,7 @@ @@ -13,7 +13,7 @@
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved. .\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
.\" .\"
.Dd May 26, 2021 .Dd December 23, 2022
-.Dt ARCSTAT 1 -.Dt ARCSTAT 1
+.Dt ARCSTAT 8 +.Dt ARCSTAT 8
.Os .Os

View file

@ -0,0 +1,113 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Wed, 10 Nov 2021 09:29:47 +0100
Subject: [PATCH] arc stat/summary: guard access to l2arc MFU/MRU stats
commit 085321621e79a75bea41c2b6511da6ebfbf2ba0a added printing MFU
and MRU stats for 2.1 user space tools, but those keys are not
available in the 2.0 module. That means it may break the arcstat and
arc_summary tools after upgrade to 2.1 (user space), before a reboot
to the new 2.1 ZFS kernel-module happened, due to python raising a
KeyError on the dict access then.
Move those two keys to a .get accessor with `0` as fallback, as it
should be better to show some possible wrong data for new stat-keys
than throwing an exception.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
also move l2_mfu_asize l2_mru_asize l2_prefetch_asize
l2_bufc_data_asize l2_bufc_metadata_asize to .get accessor
(these are only present with a cache device in the pool)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
cmd/arc_summary | 28 ++++++++++++++--------------
cmd/arcstat.in | 14 +++++++-------
2 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/cmd/arc_summary b/cmd/arc_summary
index 426e02070..9de198150 100755
--- a/cmd/arc_summary
+++ b/cmd/arc_summary
@@ -655,13 +655,13 @@ def section_arc(kstats_dict):
prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached']))
prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible']))
prt_i2('L2 eligible MFU evictions:',
- f_perc(arc_stats['evict_l2_eligible_mfu'],
+ f_perc(arc_stats.get('evict_l2_eligible_mfu', 0), # 2.0 module compat
arc_stats['evict_l2_eligible']),
- f_bytes(arc_stats['evict_l2_eligible_mfu']))
+ f_bytes(arc_stats.get('evict_l2_eligible_mfu', 0)))
prt_i2('L2 eligible MRU evictions:',
- f_perc(arc_stats['evict_l2_eligible_mru'],
+ f_perc(arc_stats.get('evict_l2_eligible_mru', 0), # 2.0 module compat
arc_stats['evict_l2_eligible']),
- f_bytes(arc_stats['evict_l2_eligible_mru']))
+ f_bytes(arc_stats.get('evict_l2_eligible_mru', 0)))
prt_i1('L2 ineligible evictions:',
f_bytes(arc_stats['evict_l2_ineligible']))
print()
@@ -851,20 +851,20 @@ def section_l2arc(kstats_dict):
f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
f_bytes(arc_stats['l2_hdr_size']))
prt_i2('MFU allocated size:',
- f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_mfu_asize']))
+ f_perc(arc_stats.get('l2_mfu_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_mfu_asize', 0))) # 2.0 module compat
prt_i2('MRU allocated size:',
- f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_mru_asize']))
+ f_perc(arc_stats.get('l2_mru_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_mru_asize', 0))) # 2.0 module compat
prt_i2('Prefetch allocated size:',
- f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_prefetch_asize']))
+ f_perc(arc_stats.get('l2_prefetch_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_prefetch_asize',0))) # 2.0 module compat
prt_i2('Data (buffer content) allocated size:',
- f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_bufc_data_asize']))
+ f_perc(arc_stats.get('l2_bufc_data_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_bufc_data_asize', 0))) # 2.0 module compat
prt_i2('Metadata (buffer content) allocated size:',
- f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_bufc_metadata_asize']))
+ f_perc(arc_stats.get('l2_bufc_metadata_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_bufc_metadata_asize', 0))) # 2.0 module compat
print()
prt_1('L2ARC breakdown:', f_hits(l2_access_total))
diff --git a/cmd/arcstat.in b/cmd/arcstat.in
index 8df1c62f7..833348d0e 100755
--- a/cmd/arcstat.in
+++ b/cmd/arcstat.in
@@ -565,8 +565,8 @@ def calculate():
v["el2skip"] = d["evict_l2_skip"] // sint
v["el2cach"] = d["evict_l2_cached"] // sint
v["el2el"] = d["evict_l2_eligible"] // sint
- v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint
- v["el2mru"] = d["evict_l2_eligible_mru"] // sint
+ v["el2mfu"] = d.get("evict_l2_eligible_mfu", 0) // sint
+ v["el2mru"] = d.get("evict_l2_eligible_mru", 0) // sint
v["el2inel"] = d["evict_l2_ineligible"] // sint
v["mtxmis"] = d["mutex_miss"] // sint
@@ -581,11 +581,11 @@ def calculate():
v["l2size"] = cur["l2_size"]
v["l2bytes"] = d["l2_read_bytes"] // sint
- v["l2pref"] = cur["l2_prefetch_asize"]
- v["l2mfu"] = cur["l2_mfu_asize"]
- v["l2mru"] = cur["l2_mru_asize"]
- v["l2data"] = cur["l2_bufc_data_asize"]
- v["l2meta"] = cur["l2_bufc_metadata_asize"]
+ v["l2pref"] = cur.get("l2_prefetch_asize", 0)
+ v["l2mfu"] = cur.get("l2_mfu_asize", 0)
+ v["l2mru"] = cur.get("l2_mru_asize", 0)
+ v["l2data"] = cur.get("l2_bufc_data_asize", 0)
+ v["l2meta"] = cur.get("l2_bufc_metadata_asize", 0)
v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"]
v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"]
v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"]

View file

@ -0,0 +1,99 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Mon, 23 Oct 2023 14:45:06 -0700
Subject: [PATCH] zvol: Remove broken blk-mq optimization
This fix removes a dubious optimization in zfs_uiomove_bvec_rq()
that saved the iterator contents of a rq_for_each_segment(). This
optimization allowed restoring the "saved state" from a previous
rq_for_each_segment() call on the same uio so that you wouldn't
need to iterate though each bvec on every zfs_uiomove_bvec_rq() call.
However, if the kernel is manipulating the requests/bios/bvecs under
the covers between zfs_uiomove_bvec_rq() calls, then it could result
in corruption from using the "saved state". This optimization
results in an unbootable system after installing an OS on a zvol
with blk-mq enabled.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15351
(cherry picked from commit 7c9b6fed16ed5034fd1cdfdaedfad93dc97b1557)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
include/os/linux/spl/sys/uio.h | 8 --------
module/os/linux/zfs/zfs_uio.c | 29 -----------------------------
2 files changed, 37 deletions(-)
diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
index cce097e16..a4b600004 100644
--- a/include/os/linux/spl/sys/uio.h
+++ b/include/os/linux/spl/sys/uio.h
@@ -73,13 +73,6 @@ typedef struct zfs_uio {
size_t uio_skip;
struct request *rq;
-
- /*
- * Used for saving rq_for_each_segment() state between calls
- * to zfs_uiomove_bvec_rq().
- */
- struct req_iterator iter;
- struct bio_vec bv;
} zfs_uio_t;
@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
} else {
uio->uio_bvec = NULL;
uio->uio_iovcnt = 0;
- memset(&uio->iter, 0, sizeof (uio->iter));
}
uio->uio_loffset = io_offset(bio, rq);
diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c
index 3efd4ab15..c2ed67c43 100644
--- a/module/os/linux/zfs/zfs_uio.c
+++ b/module/os/linux/zfs/zfs_uio.c
@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
this_seg_start = orig_loffset;
rq_for_each_segment(bv, rq, iter) {
- if (uio->iter.bio) {
- /*
- * If uio->iter.bio is present, then we know we've saved
- * uio->iter from a previous call to this function, and
- * we can skip ahead in this rq_for_each_segment() loop
- * to where we last left off. That way, we don't need
- * to iterate over tons of segments we've already
- * processed - we can just restore the "saved state".
- */
- iter = uio->iter;
- bv = uio->bv;
- this_seg_start = uio->uio_loffset;
- memset(&uio->iter, 0, sizeof (uio->iter));
- continue;
- }
-
/*
* Lookup what the logical offset of the last byte of this
* segment is.
@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
copied = 1; /* We copied some data */
}
- if (n == 0) {
- /*
- * All done copying. Save our 'iter' value to the uio.
- * This allows us to "save our state" and skip ahead in
- * the rq_for_each_segment() loop the next time we call
- * call zfs_uiomove_bvec_rq() on this uio (which we
- * will be doing for any remaining data in the uio).
- */
- uio->iter = iter; /* make a copy of the struct data */
- uio->bv = bv;
- return (0);
- }
-
this_seg_start = this_seg_end + 1;
}

View file

@ -0,0 +1,123 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Mon, 23 Oct 2023 14:39:59 -0700
Subject: [PATCH] Revert "zvol: Temporally disable blk-mq"
This reverts commit aefb6a2bd6c24597cde655e9ce69edd0a4c34357.
aefb6a2bd temporally disabled blk-mq until we could fix a fix for
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15439
(cherry picked from commit 05c4710e8958832afc2868102c9535a4f18115be)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
man/man4/zfs.4 | 57 ++++++++++++++++++++++++++++
module/os/linux/zfs/zvol_os.c | 12 ++++++
tests/zfs-tests/include/tunables.cfg | 2 +-
3 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index 71a3e67ee..cfadd79d8 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -2317,6 +2317,63 @@ If
.Sy zvol_threads
to the number of CPUs present or 32 (whichever is greater).
.
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
+The number of threads per zvol to use for queuing IO requests.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_blk_mq_threads
+to the number of CPUs present.
+.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of
+.Sy volblocksize Ns -sized blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to
+.Sy 0 ,
+then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+Values are clamped to the kernel's
+.Dv BLKDEV_MIN_RQ
+and
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
+limits.
+.
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
Defines zvol block devices behaviour when
.Sy volmode Ns = Ns Sy default :
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 76521c959..7a95b54bd 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -1620,6 +1620,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
module_param(zvol_volmode, uint, 0644);
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+ "Process volblocksize blocks per thread");
+#endif
+
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
module_param(zvol_open_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index 8010a9451..80e7bcb3b 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -89,7 +89,7 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
-VOL_USE_BLK_MQ UNSUPPORTED UNSUPPORTED
+VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max

2
zfs

@ -1 +1 @@
Subproject commit 86783d7d92cf7a859464719a917fdff845b9a9e1 Subproject commit 95785196f26e92d82cf4445654ba84e4a9671c57