b8de2740ea
This change adds new patches from Proxmox for the ZFS project.
212 lines
5.8 KiB
Diff
212 lines
5.8 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
|
Date: Fri, 17 Dec 2021 09:52:13 -0800
|
|
Subject: [PATCH] Fix zvol_open() lock inversion
|
|
|
|
When restructuring the zvol_open() logic for the Linux 5.13 kernel
|
|
a lock inversion was accidentally introduced. In the updated code
|
|
the spa_namespace_lock is now taken before the zv_suspend_lock
|
|
allowing the following scenario to occur:
|
|
|
|
down_read <=== waiting for zv_suspend_lock
|
|
zvol_open <=== holds spa_namespace_lock
|
|
__blkdev_get
|
|
blkdev_get_by_dev
|
|
blkdev_open
|
|
...
|
|
|
|
mutex_lock <== waiting for spa_namespace_lock
|
|
spa_open_common
|
|
spa_open
|
|
dsl_pool_hold
|
|
dmu_objset_hold_flags
|
|
dmu_objset_hold
|
|
dsl_prop_get
|
|
dsl_prop_get_integer
|
|
zvol_create_minor
|
|
dmu_recv_end
|
|
zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend()
|
|
zfs_ioc_recv
|
|
...
|
|
|
|
This commit resolves the issue by moving the acquisition of the
|
|
spa_namespace_lock back to after the zv_suspend_lock which restores
|
|
the original ordering.
|
|
|
|
Additionally, as part of this change the error exit paths were
|
|
simplified where possible.
|
|
|
|
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
|
|
Reviewed-by: Rich Ercolani <rincebrain@gmail.com>
|
|
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
|
Closes #12863
|
|
(cherry picked from commit 8a02d01e85556bbe3a1c6947bc11b8ef028d4023)
|
|
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
|
---
|
|
module/os/linux/zfs/zvol_os.c | 121 ++++++++++++++++------------------
|
|
1 file changed, 58 insertions(+), 63 deletions(-)
|
|
|
|
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
|
|
index 44caadd58..69479b3f7 100644
|
|
--- a/module/os/linux/zfs/zvol_os.c
|
|
+++ b/module/os/linux/zfs/zvol_os.c
|
|
@@ -496,8 +496,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
|
{
|
|
zvol_state_t *zv;
|
|
int error = 0;
|
|
- boolean_t drop_suspend = B_TRUE;
|
|
- boolean_t drop_namespace = B_FALSE;
|
|
+ boolean_t drop_suspend = B_FALSE;
|
|
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
|
hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
|
|
hrtime_t start = gethrtime();
|
|
@@ -517,7 +516,36 @@ retry:
|
|
return (SET_ERROR(-ENXIO));
|
|
}
|
|
|
|
- if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) {
|
|
+ mutex_enter(&zv->zv_state_lock);
|
|
+ /*
|
|
+ * Make sure zvol is not suspended during first open
|
|
+ * (hold zv_suspend_lock) and respect proper lock acquisition
|
|
+ * ordering - zv_suspend_lock before zv_state_lock
|
|
+ */
|
|
+ if (zv->zv_open_count == 0) {
|
|
+ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
|
+ mutex_exit(&zv->zv_state_lock);
|
|
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
|
|
+ mutex_enter(&zv->zv_state_lock);
|
|
+ /* check to see if zv_suspend_lock is needed */
|
|
+ if (zv->zv_open_count != 0) {
|
|
+ rw_exit(&zv->zv_suspend_lock);
|
|
+ } else {
|
|
+ drop_suspend = B_TRUE;
|
|
+ }
|
|
+ } else {
|
|
+ drop_suspend = B_TRUE;
|
|
+ }
|
|
+ }
|
|
+ rw_exit(&zvol_state_lock);
|
|
+
|
|
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
|
+
|
|
+ if (zv->zv_open_count == 0) {
|
|
+ boolean_t drop_namespace = B_FALSE;
|
|
+
|
|
+ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
|
+
|
|
/*
|
|
* In all other call paths the spa_namespace_lock is taken
|
|
* before the bdev->bd_mutex lock. However, on open(2)
|
|
@@ -542,84 +570,51 @@ retry:
|
|
* the kernel so the only option is to return the error for
|
|
* the caller to handle it.
|
|
*/
|
|
- if (!mutex_tryenter(&spa_namespace_lock)) {
|
|
- rw_exit(&zvol_state_lock);
|
|
+ if (!mutex_owned(&spa_namespace_lock)) {
|
|
+ if (!mutex_tryenter(&spa_namespace_lock)) {
|
|
+ mutex_exit(&zv->zv_state_lock);
|
|
+ rw_exit(&zv->zv_suspend_lock);
|
|
|
|
#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
|
|
- schedule();
|
|
- return (SET_ERROR(-ERESTARTSYS));
|
|
-#else
|
|
- if ((gethrtime() - start) > timeout)
|
|
+ schedule();
|
|
return (SET_ERROR(-ERESTARTSYS));
|
|
+#else
|
|
+ if ((gethrtime() - start) > timeout)
|
|
+ return (SET_ERROR(-ERESTARTSYS));
|
|
|
|
- schedule_timeout(MSEC_TO_TICK(10));
|
|
- goto retry;
|
|
+ schedule_timeout(MSEC_TO_TICK(10));
|
|
+ goto retry;
|
|
#endif
|
|
- } else {
|
|
- drop_namespace = B_TRUE;
|
|
- }
|
|
- }
|
|
-
|
|
- mutex_enter(&zv->zv_state_lock);
|
|
- /*
|
|
- * make sure zvol is not suspended during first open
|
|
- * (hold zv_suspend_lock) and respect proper lock acquisition
|
|
- * ordering - zv_suspend_lock before zv_state_lock
|
|
- */
|
|
- if (zv->zv_open_count == 0) {
|
|
- if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
|
- mutex_exit(&zv->zv_state_lock);
|
|
- rw_enter(&zv->zv_suspend_lock, RW_READER);
|
|
- mutex_enter(&zv->zv_state_lock);
|
|
- /* check to see if zv_suspend_lock is needed */
|
|
- if (zv->zv_open_count != 0) {
|
|
- rw_exit(&zv->zv_suspend_lock);
|
|
- drop_suspend = B_FALSE;
|
|
+ } else {
|
|
+ drop_namespace = B_TRUE;
|
|
}
|
|
}
|
|
- } else {
|
|
- drop_suspend = B_FALSE;
|
|
- }
|
|
- rw_exit(&zvol_state_lock);
|
|
-
|
|
- ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
|
|
|
- if (zv->zv_open_count == 0) {
|
|
- ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
|
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
|
- if (error)
|
|
- goto out_mutex;
|
|
- }
|
|
|
|
- if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
|
- error = -EROFS;
|
|
- goto out_open_count;
|
|
+ if (drop_namespace)
|
|
+ mutex_exit(&spa_namespace_lock);
|
|
}
|
|
|
|
- zv->zv_open_count++;
|
|
-
|
|
- mutex_exit(&zv->zv_state_lock);
|
|
- if (drop_namespace)
|
|
- mutex_exit(&spa_namespace_lock);
|
|
- if (drop_suspend)
|
|
- rw_exit(&zv->zv_suspend_lock);
|
|
-
|
|
- zfs_check_media_change(bdev);
|
|
-
|
|
- return (0);
|
|
+ if (error == 0) {
|
|
+ if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
|
+ if (zv->zv_open_count == 0)
|
|
+ zvol_last_close(zv);
|
|
|
|
-out_open_count:
|
|
- if (zv->zv_open_count == 0)
|
|
- zvol_last_close(zv);
|
|
+ error = SET_ERROR(-EROFS);
|
|
+ } else {
|
|
+ zv->zv_open_count++;
|
|
+ }
|
|
+ }
|
|
|
|
-out_mutex:
|
|
mutex_exit(&zv->zv_state_lock);
|
|
- if (drop_namespace)
|
|
- mutex_exit(&spa_namespace_lock);
|
|
if (drop_suspend)
|
|
rw_exit(&zv->zv_suspend_lock);
|
|
|
|
- return (SET_ERROR(error));
|
|
+ if (error == 0)
|
|
+ zfs_check_media_change(bdev);
|
|
+
|
|
+ return (error);
|
|
}
|
|
|
|
static void
|