Rebase to 5.4.0-126.142 patchset

This commit is contained in:
Antoine Martin 2025-01-20 00:10:57 -05:00
parent 6f98600d96
commit 762dc2095b
Signed by: forge
GPG key ID: D62A472A4AA7D541
23 changed files with 498 additions and 2024 deletions

View file

@ -17,19 +17,28 @@ $KBUILD_BUILD_TIMESTAMP.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
init/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
scripts/mkcompile_h | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index 10b652d33e872658f2d8c8ce3a0b45e9a473fd64..e4dabde27b90c12bd72c6239e15509f9a9448d80 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -29,7 +29,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) := PREEMPT_DYNAMIC
preempt-flag-$(CONFIG_PREEMPT_RT) := PREEMPT_RT
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index 06c1e9e3bc38..6e7012175600 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h
@@ -34,10 +34,14 @@ else
VERSION=$KBUILD_BUILD_VERSION
fi
build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto))
-build-timestamp = $(or $(KBUILD_BUILD_TIMESTAMP), $(build-timestamp-auto))
+build-timestamp = $(or $(KBUILD_BUILD_VERSION_TIMESTAMP), $(KBUILD_BUILD_TIMESTAMP), $(build-timestamp-auto))
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
-if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then
- TIMESTAMP=`date`
+if [ -z "$KBUILD_BUILD_VERSION_TIMESTAMP" ]; then
+ if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then
+ TIMESTAMP=`date`
+ else
+ TIMESTAMP=$KBUILD_BUILD_TIMESTAMP
+ fi
else
- TIMESTAMP=$KBUILD_BUILD_TIMESTAMP
+ TIMESTAMP=$KBUILD_BUILD_VERSION_TIMESTAMP
fi
if test -z "$KBUILD_BUILD_USER"; then
LINUX_COMPILE_BY=$(whoami | sed 's/\\/\\\\/')

View file

@ -19,10 +19,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 75204d36d7f9062306dfc66c3c35448e16257215..1fb5ff73ec1ef3bd79960182c87a0ba312b3635d 100644
index d174d3a566aa..885e18c72c87 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -265,10 +265,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
@@ -256,10 +256,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
return false;
list_for_each_entry(p, &br->port_list, list) {

View file

@ -55,10 +55,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 111 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5ea6b2d54edaff9b7efa20235de92970cabcf769..e36ab4a38709f697860e785c1eb2e8c44f9f7b64 100644
index 1520dc3ad4d2..d3db42dface6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4422,6 +4422,15 @@
@@ -3489,6 +3489,15 @@
Also, it enforces the PCI Local Bus spec
rule that those bits should be 0 in system reset
events (useful for kexec/kdump cases).
@ -75,10 +75,10 @@ index 5ea6b2d54edaff9b7efa20235de92970cabcf769..e36ab4a38709f697860e785c1eb2e8c4
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bf02ee61a933403deba8ba7063d1732fc3ed540e..113e93b623616d787ad1b4d7619a2921069d587b 100644
index ff07d35046b0..81f720abe39e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -300,6 +300,106 @@ static int __init pci_apply_final_quirks(void)
@@ -193,6 +193,106 @@ static int __init pci_apply_final_quirks(void)
}
fs_initcall_sync(pci_apply_final_quirks);
@ -185,7 +185,7 @@ index bf02ee61a933403deba8ba7063d1732fc3ed540e..113e93b623616d787ad1b4d7619a2921
/*
* Decoding should be disabled for a PCI device during BAR sizing to avoid
* conflict. But doing so may cause problems on host bridge and perhaps other
@@ -5121,6 +5221,8 @@ static const struct pci_dev_acs_enabled {
@@ -4948,6 +5048,8 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
/* APM X-Gene */
{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },

View file

@ -13,10 +13,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 16f0c3566f16141af8f5cfeb5dc6b15838ff6ecc..e232b463912db788345e0d38b3128cbee30948ae 100644
index 98edde13ec17..8344711583bc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -80,7 +80,7 @@ module_param(halt_poll_ns, uint, 0644);
@@ -76,7 +76,7 @@ module_param(halt_poll_ns, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */

View file

@ -0,0 +1,32 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Fri, 7 Jun 2019 21:16:42 +0200
Subject: [PATCH] Revert "KVM: VMX: enable nested virtualization by default"
This reverts commit 1e58e5e59148916fa43444a406335a990783fb78
As we're not yet there, and this effectively breaks live migration
for all VMs using host or +vmx which did not manually enabled nesting
Those which already enabled nesting manually have already breakage,
but that was something to expect. The situation will get better in
the future (probably post qemu 4.1).
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/vmx/vmx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 435dfebf5867..82c2afe46ebf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -103,7 +103,7 @@ module_param(enable_apicv, bool, S_IRUGO);
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
* use VMX instructions.
*/
-static bool __read_mostly nested = 1;
+static bool __read_mostly nested = 0;
module_param(nested, bool, S_IRUGO);
static u64 __read_mostly host_xss;

View file

@ -3,26 +3,22 @@ From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Wed, 7 Oct 2020 17:18:28 +0200
Subject: [PATCH] net: core: downgrade unregister_netdevice refcount leak from
emergency to error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
net/core/dev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 25f20c5cc8f55fca8c726df31d8433025e15ebb4..d0fa7a5768d555fce321533a2d46703d647d7474 100644
index f2c6fc836f9d..8940c12333b0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10680,7 +10680,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
if (time_after(jiffies, warning_time +
READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, netdev_refcnt_read(dev));
ref_tracker_dir_print(&dev->refcnt_tracker, 10);
}
@@ -9368,7 +9368,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
refcnt = netdev_refcnt_read(dev);
if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, refcnt);
warning_time = jiffies;
}

View file

@ -1,30 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Tue, 10 Jan 2023 08:52:40 +0100
Subject: [PATCH] Revert "fortify: Do not cast to "unsigned char""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 106b7a61c488d2022f44e3531ce33461c7c0685f.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/linux/fortify-string.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 0d99bf11d260a3482bbe46e35c7553c0ccfb8b94..fe04f7f9357506baf21a0c3cc070c37f00a24d5c 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -62,7 +62,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
#define __compiletime_strlen(p) \
({ \
- char *__p = (char *)(p); \
+ unsigned char *__p = (unsigned char *)(p); \
size_t __ret = SIZE_MAX; \
const size_t __p_size = __member_size(p); \
if (__p_size != SIZE_MAX && \

View file

@ -0,0 +1,122 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 14 May 2020 16:44:23 +0200
Subject: [PATCH] vfs: allow unprivileged whiteout creation
Whiteouts, unlike real device node should not require privileges to create.
The general concern with device nodes is that opening them can have side
effects. The kernel already avoids zero major (see
Documentation/admin-guide/devices.txt). To be on the safe side the patch
explicitly forbids registering a char device with 0/0 number (see
cdev_add()).
This guarantees that a non-O_PATH open on a whiteout will fail with ENODEV;
i.e. it won't have any side effect.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
(cherry picked from commit a3c751a50fe6bbe50eb7622a14b18b361804ee0c)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
fs/char_dev.c | 3 +++
fs/namei.c | 21 +++------------------
include/linux/device_cgroup.h | 3 +++
include/linux/fs.h | 6 +++++-
4 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c5e6eff5a381..ba0ded7842a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
p->dev = dev;
p->count = count;
+ if (WARN_ON(dev == WHITEOUT_DEV))
+ return -EBUSY;
+
error = kobj_map(cdev_map, dev, count, NULL,
exact_match, exact_lock, p);
if (error)
diff --git a/fs/namei.c b/fs/namei.c
index f9c46c7abd80..a5e907558b88 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3690,12 +3690,14 @@ EXPORT_SYMBOL(user_path_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
+ bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
int error = may_create(dir, dentry);
if (error)
return error;
- if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
+ !capable(CAP_MKNOD))
return -EPERM;
if (!dir->i_op->mknod)
@@ -4530,9 +4532,6 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
(flags & RENAME_EXCHANGE))
return -EINVAL;
- if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
- return -EPERM;
-
if (flags & RENAME_EXCHANGE)
target_flags = 0;
@@ -4668,20 +4667,6 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
}
-int vfs_whiteout(struct inode *dir, struct dentry *dentry)
-{
- int error = may_create(dir, dentry);
- if (error)
- return error;
-
- if (!dir->i_op->mknod)
- return -EPERM;
-
- return dir->i_op->mknod(dir, dentry,
- S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
-}
-EXPORT_SYMBOL(vfs_whiteout);
-
int readlink_copy(char __user *buffer, int buflen, const char *link)
{
int len = PTR_ERR(link);
diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h
index 8557efe096dc..fc989487c273 100644
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -62,6 +62,9 @@ static inline int devcgroup_inode_mknod(int mode, dev_t dev)
if (!S_ISBLK(mode) && !S_ISCHR(mode))
return 0;
+ if (S_ISCHR(mode) && dev == WHITEOUT_DEV)
+ return 0;
+
if (S_ISBLK(mode))
type = DEVCG_DEV_BLOCK;
else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2bd06577c02a..fc22bade5b21 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1719,7 +1719,11 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
-extern int vfs_whiteout(struct inode *, struct dentry *);
+
+static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+ return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
int open_flag);

View file

@ -0,0 +1,35 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 30 Jun 2022 16:48:18 -0400
Subject: [PATCH] SUNRPC: Fix READ_PLUS crasher
commit a23dd544debcda4ee4a549ec7de59e85c3c8345c upstream.
Looks like there are still cases when "space_left - frag1bytes" can
legitimately exceed PAGE_SIZE. Ensure that xdr->end always remains
within the current encode buffer.
Reported-by: Bruce Fields <bfields@fieldses.org>
Reported-by: Zorro Lang <zlang@redhat.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216151
Fixes: 6c254bf3b637 ("SUNRPC: Fix the calculation of xdr->end in xdr_get_next_encode_buffer()")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
net/sunrpc/xdr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index cb8740d15633..daa4165f1179 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -608,7 +608,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
*/
xdr->p = (void *)p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- if (space_left - nbytes >= PAGE_SIZE)
+ if (space_left - frag1bytes >= PAGE_SIZE)
xdr->end = (void *)p + PAGE_SIZE;
else
xdr->end = (void *)p + space_left - frag1bytes;

View file

@ -1,135 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Fri, 14 Jul 2023 18:10:32 +0200
Subject: [PATCH] kvm: xsave set: mask-out PKRU bit in xfeatures if vCPU has no
support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Fixes live-migrations & snapshot-rollback of VMs with a restricted
CPU type (e.g., qemu64) from our 5.15 based kernel (default Proxmox
VE 7.4) to the 6.2 (and future newer) of Proxmox VE 8.0.
Previous to ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to
supported bits of XCR0") the PKRU bit of the host could leak into the
state from the guest, which caused trouble when migrating between
hosts with different CPUs, i.e., where the source supported it but
the target did not, causing a general protection fault when the guest
tried to use a pkru related instruction after the migration.
But the fix, while welcome, caused a temporary out-of-sync state when
migrating such a VM from a kernel without the fix to a kernel with
the fix, as it threw of KVM when the CPUID of the guest and most of
the state doesn't report XSAVE and thus any xfeatures, but PKRU and
the related state is set as enabled, causing the vCPU to spin at 100%
without any progress forever.
The fix could be at two sites, either in QEMU or in the kernel, I
choose the kernel as we have all the info there for a targeted
heuristic so that we don't have to adapt QEMU and qemu-server, the
latter even on both sides.
Still, a short summary of the possible fixes and short drawbacks:
* on QEMU-side either
- clear the PKRU state in the migration saved state would be rather
complicated to implement as the vCPU is initialised way before we
have the saved xfeature state available to check what we'd need
to do, plus the user-space only gets a memory blob from ioctl
KVM_GET_XSAVE2 that it passes to KVM_SET_XSAVE ioctl, there are
no ABI guarantees, and while the struct seem stable for 5.15 to
6.5-rc1, that doesn't has to be for future kernels, so off the
table.
- enforce that the CPUID reports PKU support even if it normally
wouldn't. While this works (tested by hard-coding it as POC) it
is a) not really nice and b) needs some interaction from
qemu-server to enable this flag as otherwise we have no good info
to decide when it's OK to do this, which means we need to adapt
both PVE 7 and 8's qemu-server and also pve-qemu, workable but
not optimal
* on Kernel/KVM-side we can hook into the set XSAVE ioctl specific to
the KVM subsystem, which already reduces chance of regression for
all other places. There we have access to the union/struct
definitions of the saved state and thus can savely cast to that.
We also got access to the vCPU's CPUID capabilities, meaning we can
check if the XCR0 (first XSAVE Control Register) reports
that it support the PKRU feature, and if it does *NOT* but the
saved xfeatures register from XSAVE *DOES* report it, we can safely
assume that this combination is due to an migration from an older,
leaky kernel and clear the bit in the xfeature register before
restoring it to the guest vCPU KVM state, avoiding the confusing
situation that made the vCPU spin at 100%.
This should be safe to do, as the guest vCPU CPUID never reported
support for the PKRU feature, and it's also a relatively niche and
newish feature.
If it gains us something we can drop this patch a bit in the future
Proxmox VE 9 major release, but we should ensure that VMs that where
started before PVE 8 cannot be directly live-migrated to the release
that includes that change; so we should rather only drop it if the
maintenance burden is high.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/cpuid.c | 6 ++++++
arch/x86/kvm/cpuid.h | 3 +++
arch/x86/kvm/x86.c | 13 +++++++++++++
3 files changed, 22 insertions(+)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index be2baf851ec7d63a2095247d828f390b9757f905..dc73965aa73b21d26b4cf039336da3ca38e89bc6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -290,6 +290,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
+bool vcpu_supports_xsave_pkru(struct kvm_vcpu *vcpu) {
+ u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(
+ vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ return (guest_supported_xcr0 & XFEATURE_MASK_PKRU) != 0;
+}
+
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index ad479cfb91bc7bc5d400d2c098536abb4d4babe5..e55eecb2f3646ff7ef63c107c5cc5481fabb8a51 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -32,7 +32,10 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only);
+bool vcpu_supports_xsave_pkru(struct kvm_vcpu *vcpu);
+
void __init kvm_init_xstate_sizes(void);
+
u32 xstate_required_size(u64 xstate_bv, bool compacted);
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3667ba359e63579eaff36fea92bf19a84e5df592..4d10fc1a9b4114d1e2edf133717f307043560263 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5633,6 +5633,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
+ if (!vcpu_supports_xsave_pkru(vcpu)) {
+ void *buf = guest_xsave->region;
+ union fpregs_state *ustate = buf;
+ if (ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
+ printk(
+ KERN_NOTICE "clearing PKRU xfeature bit as vCPU from PID %d"
+ " reports no PKRU support - migration from fpu-leaky kernel?",
+ current->pid
+ );
+ ustate->xsave.header.xfeatures &= ~XFEATURE_MASK_PKRU;
+ }
+ }
+
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
guest_xsave->region,
kvm_caps.supported_xcr0,

View file

@ -0,0 +1,42 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 2 Aug 2022 15:48:50 -0400
Subject: [PATCH] NFSv4/pnfs: Fix a use-after-free bug in open
commit 2135e5d56278ffdb1c2e6d325dc6b87f669b9dac upstream.
If someone cancels the open RPC call, then we must not try to free
either the open slot or the layoutget operation arguments, since they
are likely still in use by the hung RPC call.
Fixes: 6949493884fe ("NFSv4: Don't hold the layoutget locks across multiple RPC calls")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
fs/nfs/nfs4proc.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index eee2d67d3ac9..831a16fec616 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3041,12 +3041,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
}
out:
- if (opendata->lgp) {
- nfs4_lgopen_release(opendata->lgp);
- opendata->lgp = NULL;
- }
- if (!opendata->cancelled)
+ if (!opendata->cancelled) {
+ if (opendata->lgp) {
+ nfs4_lgopen_release(opendata->lgp);
+ opendata->lgp = NULL;
+ }
nfs4_sequence_free_slot(&opendata->o_res.seq_res);
+ }
return ret;
}

View file

@ -1,43 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: kiler129 <grzegorz@noflash.pl>
Date: Mon, 18 Sep 2023 15:19:26 +0200
Subject: [PATCH] allow opt-in to allow pass-through on broken hardware..
adapted from https://github.com/kiler129/relax-intel-rmrr , licensed under MIT or GPL 2.0+
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/iommu/intel/iommu.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b857633622ea8550299554e211b84c48196bf902..5cb3b52b350c3d6bd627a29e6000ab10b58fd6e1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -228,6 +228,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_enabled);
static int dmar_map_ipu = 1;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
+static int intel_relaxable_rmrr = 0;
static int iommu_skip_te_disable;
static int disable_igfx_iommu;
@@ -290,6 +291,9 @@ static int __init intel_iommu_setup(char *str)
} else if (!strncmp(str, "tboot_noforce", 13)) {
pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
+ } else if (!strncmp(str, "relax_rmrr", 10)) {
+ pr_info("Intel-IOMMU: assuming all RMRRs are relaxable. This can lead to instability or data loss\n");
+ intel_relaxable_rmrr = 1;
} else {
pr_notice("Unknown option - '%s'\n", str);
}
@@ -2165,7 +2169,7 @@ static bool device_rmrr_is_relaxable(struct device *dev)
return false;
pdev = to_pci_dev(dev);
- if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
+ if (intel_relaxable_rmrr || IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
return true;
else
return false;

View file

@ -1,37 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 18 Oct 2023 12:41:04 -0700
Subject: [PATCH] KVM: nSVM: Advertise support for flush-by-ASID
Advertise support for FLUSHBYASID when nested SVM is enabled, as KVM can
always emulate flushing TLB entries for a vmcb12 ASID, e.g. by running L2
with a new, fresh ASID in vmcb02. Some modern hypervisors, e.g. VMWare
Workstation 17, require FLUSHBYASID support and will refuse to run if it's
not present.
Punt on proper support, as "Honor L1's request to flush an ASID on nested
VMRUN" is one of the TODO items in the (incomplete) list of issues that
need to be addressed in order for KVM to NOT do a full TLB flush on every
nested SVM transition (see nested_svm_transition_tlb_flush()).
Reported-by: Stefan Sterz <s.sterz@proxmox.com>
Closes: https://lkml.kernel.org/r/b9915c9c-4cf6-051a-2d91-44cc6380f455%40proxmox.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/svm/svm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 22513133925e0ca5a889ae6105a829af3957778c..1435e5b0a7f604bd0146b7feb5dd06a4516925a1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5176,6 +5176,7 @@ static __init void svm_set_cpu_caps(void)
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
+ kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
/*
* KVM currently flushes TLBs on *every* nested SVM transition,

View file

@ -0,0 +1,70 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Nilesh Javali <njavali@marvell.com>
Date: Tue, 12 Jul 2022 22:20:36 -0700
Subject: [PATCH] scsi: Revert "scsi: qla2xxx: Fix disk failure to rediscover"
commit 5bc7b01c513a4a9b4cfe306e8d1720cfcfd3b8a3 upstream.
This fixes the regression of NVMe discovery failure during driver load
time.
This reverts commit 6a45c8e137d4e2c72eecf1ac7cf64f2fdfcead99.
Link: https://lore.kernel.org/r/20220713052045.10683-2-njavali@marvell.com
Cc: stable@vger.kernel.org
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/scsi/qla2xxx/qla_init.c | 5 ++---
drivers/scsi/qla2xxx/qla_nvme.c | 5 -----
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 1800eb0aad1c..c662c8af6cc1 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -5422,8 +5422,6 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
if (atomic_read(&fcport->state) == FCS_ONLINE)
return;
- qla2x00_set_fcport_state(fcport, FCS_ONLINE);
-
rport_ids.node_name = wwn_to_u64(fcport->node_name);
rport_ids.port_name = wwn_to_u64(fcport->port_name);
rport_ids.port_id = fcport->d_id.b.domain << 16 |
@@ -5519,7 +5517,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
qla2x00_reg_remote_port(vha, fcport);
break;
case MODE_TARGET:
- qla2x00_set_fcport_state(fcport, FCS_ONLINE);
if (!vha->vha_tgt.qla_tgt->tgt_stop &&
!vha->vha_tgt.qla_tgt->tgt_stopped)
qlt_fc_port_added(vha, fcport);
@@ -5534,6 +5531,8 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
break;
}
+ qla2x00_set_fcport_state(fcport, FCS_ONLINE);
+
if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) {
if (fcport->id_changed) {
fcport->id_changed = 0;
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index a15af048cd82..f0de7089e9ae 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -36,11 +36,6 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport)
(fcport->nvme_flag & NVME_FLAG_REGISTERED))
return 0;
- if (atomic_read(&fcport->state) == FCS_ONLINE)
- return 0;
-
- qla2x00_set_fcport_state(fcport, FCS_ONLINE);
-
fcport->nvme_flag &= ~NVME_FLAG_RESETTING;
memset(&req, 0, sizeof(struct nvme_fc_port_info));

View file

@ -0,0 +1,73 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Li Lingfeng <lilingfeng3@huawei.com>
Date: Fri, 17 Jun 2022 14:25:15 +0800
Subject: [PATCH] ext4: recover csum seed of tmp_inode after migrating to
extents
[ Upstream commit 07ea7a617d6b278fb7acedb5cbe1a81ce2de7d0c ]
When migrating to extents, the checksum seed of temporary inode
need to be replaced by inode's, otherwise the inode checksums
will be incorrect when swapping the inodes data.
However, the temporary inode can not match it's checksum to
itself since it has lost it's own checksum seed.
mkfs.ext4 -F /dev/sdc
mount /dev/sdc /mnt/sdc
xfs_io -fc "pwrite 4k 4k" -c "fsync" /mnt/sdc/testfile
chattr -e /mnt/sdc/testfile
chattr +e /mnt/sdc/testfile
umount /dev/sdc
fsck -fn /dev/sdc
========
...
Pass 1: Checking inodes, blocks, and sizes
Inode 13 passes checks, but checksum does not match inode. Fix? no
...
========
The fix is simple, save the checksum seed of temporary inode, and
recover it after migrating to extents.
Fixes: e81c9302a6c3 ("ext4: set csum seed in tmp inode while migrating to extents")
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20220617062515.2113438-1-lilingfeng3@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
fs/ext4/migrate.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index c5b2ea1a9372..1faa8e4ffb9d 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -435,7 +435,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL;
struct migrate_struct lb;
unsigned long max_entries;
- __u32 goal;
+ __u32 goal, tmp_csum_seed;
uid_t owner[2];
/*
@@ -483,6 +483,7 @@ int ext4_ext_migrate(struct inode *inode)
* the migration.
*/
ei = EXT4_I(inode);
+ tmp_csum_seed = EXT4_I(tmp_inode)->i_csum_seed;
EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
i_size_write(tmp_inode, i_size_read(inode));
/*
@@ -593,6 +594,7 @@ int ext4_ext_migrate(struct inode *inode)
* the inode is not visible to user space.
*/
tmp_inode->i_blocks = 0;
+ EXT4_I(tmp_inode)->i_csum_seed = tmp_csum_seed;
/* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode);

View file

@ -1,44 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Mon, 6 Nov 2023 10:17:02 +0100
Subject: [PATCH] revert "memfd: improve userspace warnings for missing
exec-related flags".
This warning is telling userspace developers to pass MFD_EXEC and
MFD_NOEXEC_SEAL to memfd_create(). Commit 434ed3350f57 ("memfd: improve
userspace warnings for missing exec-related flags") made the warning more
frequent and visible in the hope that this would accelerate the fixing of
errant userspace.
But the overall effect is to generate far too much dmesg noise.
Fixes: 434ed3350f57 ("memfd: improve userspace warnings for missing exec-related flags")
Reported-by: Damian Tometzki <dtometzki@fedoraproject.org>
Closes: https://lkml.kernel.org/r/ZPFzCSIgZ4QuHsSC@fedora.fritz.box
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Daniel Verkamp <dverkamp@chromium.org>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 2562d67b1bdf91c7395b0225d60fdeb26b4bc5a0)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
mm/memfd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/memfd.c b/mm/memfd.c
index c17c3ea701a17e9f3a652e77ba60ca9c58b0ca8e..63340d874f1e4aa139b3cce8e4fffcffc0106884 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -318,7 +318,7 @@ static int check_sysctl_memfd_noexec(unsigned int *flags)
}
if (!(*flags & MFD_NOEXEC_SEAL) && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
- pr_err_ratelimited(
+ pr_warn_once(
"%s[%d]: memfd_create() requires MFD_NOEXEC_SEAL with vm.memfd_noexec=%d\n",
current->comm, task_pid_nr(current), sysctl);
return -EACCES;

View file

@ -1,31 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Wed, 10 Apr 2024 13:21:59 +0200
Subject: [PATCH] apparmor: expect msg_namelen=0 for recvmsg calls
When coming from sys_recvmsg, msg->msg_namelen is explicitly set to
zero early on. (see ____sys_recvmsg in net/socket.c)
We still end up in 'map_addr' where the assumption is that addr !=
NULL means addrlen has a valid size.
This is likely not a final fix, it was suggested by jjohansen on irc
to get things going until this is resolved properly.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
security/apparmor/af_inet.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/apparmor/af_inet.c b/security/apparmor/af_inet.c
index 57b710054a76582346f37671843f3f8d6e99331c..35f905d9b960f62fa2ecb80b5c1a8e9edecd9b5d 100644
--- a/security/apparmor/af_inet.c
+++ b/security/apparmor/af_inet.c
@@ -766,7 +766,7 @@ int aa_inet_msg_perm(const char *op, u32 request, struct socket *sock,
/* do we need early bailout for !family ... */
return sk_has_perm2(sock->sk, op, request, profile, ad,
map_sock_addr(sock, ADDR_LOCAL, &laddr, &ad),
- map_addr(msg->msg_name, msg->msg_namelen, 0,
+ map_addr(msg->msg_namelen == 0 ? NULL : msg->msg_name, msg->msg_namelen, 0,
ADDR_REMOTE, &raddr, &ad),
profile_remote_perm(profile, sock->sk, request,
&raddr, &laddr.maddr, &ad));

View file

@ -0,0 +1,72 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson@intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
[ Upstream commit a1c5a7bf79c1faa5633b918b5c0666545e84c4d1 ]
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable@vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721ac6ea86c0677.1657639152.git.mchehab@kernel.org
(cherry picked from commit 33da97894758737895e90c909f16786052680ef4)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/gpu/drm/i915/gt/intel_gt.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index c8c070375d29..f6d7f5d307d7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -339,6 +339,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(&gt->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -353,7 +367,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,

View file

@ -1,28 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Christian Ebner <c.ebner@proxmox.com>
Date: Wed, 2 Oct 2024 15:24:31 +0200
Subject: [PATCH] netfs: reset subreq iov iter before tail clean
Make sure the iter is at the correct location when cleaning up tail
bytes for incomplete read subrequests.
Fixes: 92b6cc5d ("netfs: Add iov_iters to (sub)requests to describe various buffers")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219237
Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
fs/netfs/io.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index d6ada4eba74455aad26273a63247356a3910dc4e..500119285346be28a87698dd6ac66b5e276a6c66 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -528,6 +528,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
incomplete:
if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
+ netfs_reset_subreq_iter(rreq, subreq);
netfs_clear_unread(subreq);
subreq->transferred = subreq->len;
goto complete;

View file

@ -1,97 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stoiko Ivanov <s.ivanov@proxmox.com>
Date: Mon, 25 Nov 2024 11:10:35 +0100
Subject: [PATCH] Revert "UBUNTU: SAUCE: iommu/intel: disable DMAR for SKL
integrated gfx"
Some of our users use the iGPU for PCI-passthrough on those
platforms, which seems broken with this commit added.
https://forum.proxmox.com/threads/.157266
This reverts both, commit b310f5f58c83 ("UBUNTU: SAUCE: iommu/intel:
disable DMAR for SKL integrated gfx") and also commit 252bf1619fd5
("UBUNTU: SAUCE: iommu/intel: disable DMAR for KBL and CML integrated
gfx").
---
drivers/iommu/intel/iommu.c | 68 -------------------------------------
1 file changed, 68 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5cb3b52b350c3d6bd627a29e6000ab10b58fd6e1..9d2de5cdaeaf5f19d7fea14a21b018033e275ac7 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4780,74 +4780,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
-/* SKL */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1906, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1913, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x190E, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1915, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1902, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x190A, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x190B, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1917, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1916, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1921, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191E, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1912, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191A, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191B, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x191D, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1923, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1926, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1927, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x192A, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x192B, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x192D, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1932, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193A, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193B, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193D, quirk_iommu_igfx);
-
-/* KBL */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5902, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5906, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5908, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x590A, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x590B, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x590E, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5912, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5913, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5915, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5916, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5917, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591A, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591B, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591D, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591E, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5921, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5923, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5926, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5927, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x593B, quirk_iommu_igfx);
-
-/* CML */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9B21, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA2, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA4, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA5, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA8, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BAA, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BAC, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC2, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC4, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC5, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC6, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC8, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BE6, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BF6, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9B41, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BCA, quirk_iommu_igfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BCC, quirk_iommu_igfx);
-
/* disable IPU dmar support */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_iommu_ipu);

View file

@ -1,78 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Wed, 22 May 2024 10:06:24 +0800
Subject: [PATCH] x86/mm: Don't disable PCID when INVLPG has been fixed by
microcode
Per the "Processor Specification Update" documentations referred by
the intel-microcode-20240312 release note, this microcode release has
fixed the issue for all affected models.
So don't disable PCID if the microcode is new enough. The precise
minimum microcode revision fixing the issue was provided by Pawan
Intel.
[ dhansen: comment and changelog tweaks ]
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Link: https://lore.kernel.org/all/168436059559.404.13934972543631851306.tip-bot2@tip-bot2/
Link: https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files/releases/tag/microcode-20240312
Link: https://cdrdv2.intel.com/v1/dl/getContent/740518 # RPL042, rev. 13
Link: https://cdrdv2.intel.com/v1/dl/getContent/682436 # ADL063, rev. 24
Link: https://lore.kernel.org/all/20240325231300.qrltbzf6twm43ftb@desk/
Link: https://lore.kernel.org/all/20240522020625.69418-1-xry111%40xry111.site
(cherry-picked from f24f669d03f884a6ef95cca84317d0f329e93961)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/mm/init.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index eb503f53c3195ca4f299593c0112dab0fb09e7dd..101725c149c4294f22e337845e01c82dfe71cde5 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -263,28 +263,33 @@ static void __init probe_page_size_mask(void)
}
/*
- * INVLPG may not properly flush Global entries
- * on these CPUs when PCIDs are enabled.
+ * INVLPG may not properly flush Global entries on
+ * these CPUs. New microcode fixes the issue.
*/
static const struct x86_cpu_id invlpg_miss_ids[] = {
- X86_MATCH_VFM(INTEL_ALDERLAKE, 0),
- X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0),
- X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0),
- X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0),
+ X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c),
+ X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e),
{}
};
static void setup_pcid(void)
{
+ const struct x86_cpu_id *invlpg_miss_match;
+
if (!IS_ENABLED(CONFIG_X86_64))
return;
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
- if (x86_match_cpu(invlpg_miss_ids)) {
+ invlpg_miss_match = x86_match_cpu(invlpg_miss_ids);
+
+ if (invlpg_miss_match &&
+ boot_cpu_data.microcode < invlpg_miss_match->driver_data) {
pr_info("Incomplete global flushes, disabling PCID");
setup_clear_cpu_cap(X86_FEATURE_PCID);
return;

View file

@ -1,15 +1,12 @@
pve/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch
pve/0002-wireless-Add-Debian-wireless-regdb-certificates.patch
pve/0003-bridge-keep-MAC-of-first-assigned-port.patch
pve/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch
pve/0005-kvm-disable-default-dynamic-halt-polling-growth.patch
pve/0002-bridge-keep-MAC-of-first-assigned-port.patch
pve/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch
pve/0004-kvm-disable-default-dynamic-halt-polling-growth.patch
pve/0005-Revert-KVM-VMX-enable-nested-virtualization-by-defau.patch
pve/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch
pve/0007-Revert-fortify-Do-not-cast-to-unsigned-char.patch
pve/0008-kvm-xsave-set-mask-out-PKRU-bit-in-xfeatures-if-vCPU.patch
pve/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch
pve/0010-KVM-nSVM-Advertise-support-for-flush-by-ASID.patch
pve/0011-revert-memfd-improve-userspace-warnings-for-missing-.patch
pve/0012-apparmor-expect-msg_namelen-0-for-recvmsg-calls.patch
pve/0013-netfs-reset-subreq-iov-iter-before-tail-clean.patch
pve/0014-Revert-UBUNTU-SAUCE-iommu-intel-disable-DMAR-for-SKL.patch
pve/0015-x86-mm-Don-t-disable-PCID-when-INVLPG-has-been-fixed.patch
pve/0007-vfs-allow-unprivileged-whiteout-creation.patch
pve/0008-SUNRPC-Fix-READ_PLUS-crasher.patch
pve/0009-NFSv4-pnfs-Fix-a-use-after-free-bug-in-open.patch
pve/0010-scsi-Revert-scsi-qla2xxx-Fix-disk-failure-to-redisco.patch
pve/0011-ext4-recover-csum-seed-of-tmp_inode-after-migrating-.patch
pve/0012-drm-i915-gt-Serialize-TLB-invalidates-with-GT-resets.patch