Update patch

This commit is contained in:
Jiang Cuo 2023-04-22 20:57:56 +08:00
parent a1bfe2d372
commit 27d4779a96
25 changed files with 2290 additions and 65 deletions

View file

@ -17,19 +17,28 @@ $KBUILD_BUILD_TIMESTAMP.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
init/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
scripts/mkcompile_h | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/init/Makefile b/init/Makefile
index 26de459006c4..3157d9c79901 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -29,7 +29,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) := PREEMPT_DYNAMIC
preempt-flag-$(CONFIG_PREEMPT_RT) := PREEMPT_RT
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index 6a2a04d92f42..6c9430ee1a09 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h
@@ -22,10 +22,14 @@ else
VERSION=$KBUILD_BUILD_VERSION
fi
build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto))
-build-timestamp = $(or $(KBUILD_BUILD_TIMESTAMP), $(build-timestamp-auto))
+build-timestamp = $(or $(KBUILD_BUILD_VERSION_TIMESTAMP), $(KBUILD_BUILD_TIMESTAMP), $(build-timestamp-auto))
# Maximum length of UTS_VERSION is 64 chars
filechk_uts_version = \
-if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then
- TIMESTAMP=`date`
+if [ -z "$KBUILD_BUILD_VERSION_TIMESTAMP" ]; then
+ if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then
+ TIMESTAMP=`date`
+ else
+ TIMESTAMP=$KBUILD_BUILD_TIMESTAMP
+ fi
else
- TIMESTAMP=$KBUILD_BUILD_TIMESTAMP
+ TIMESTAMP=$KBUILD_BUILD_VERSION_TIMESTAMP
fi
if test -z "$KBUILD_BUILD_USER"; then
LINUX_COMPILE_BY=$(whoami | sed 's/\\/\\\\/')

View file

@ -19,7 +19,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 75204d36d7f9..1fb5ff73ec1e 100644
index ba55851fe132..82675e1ecfb8 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -265,10 +265,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)

View file

@ -55,10 +55,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 111 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2e77ecc12692..eae6fdc4c683 100644
index 68e20f4f1ad4..28131c19b1c9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4188,6 +4188,15 @@
@@ -3978,6 +3978,15 @@
Also, it enforces the PCI Local Bus spec
rule that those bits should be 0 in system reset
events (useful for kexec/kdump cases).
@ -75,10 +75,10 @@ index 2e77ecc12692..eae6fdc4c683 100644
Safety option to keep boot IRQs enabled. This
should never be necessary.
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 267e6002e29f..fac76ca1d16a 100644
index 07a187a67982..d0ab14a33378 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -194,6 +194,106 @@ static int __init pci_apply_final_quirks(void)
@@ -193,6 +193,106 @@ static int __init pci_apply_final_quirks(void)
}
fs_initcall_sync(pci_apply_final_quirks);
@ -185,7 +185,7 @@ index 267e6002e29f..fac76ca1d16a 100644
/*
* Decoding should be disabled for a PCI device during BAR sizing to avoid
* conflict. But doing so may cause problems on host bridge and perhaps other
@@ -4959,6 +5059,8 @@ static const struct pci_dev_acs_enabled {
@@ -4947,6 +5047,8 @@ static const struct pci_dev_acs_enabled {
{ PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs },
/* APM X-Gene */
{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },

View file

@ -13,7 +13,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 07aae60288f9..949b7204cf52 100644
index 1cb2530831f4..db159be9d5b8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -79,7 +79,7 @@ module_param(halt_poll_ns, uint, 0644);

View file

@ -3,26 +3,22 @@ From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Wed, 7 Oct 2020 17:18:28 +0200
Subject: [PATCH] net: core: downgrade unregister_netdevice refcount leak from
emergency to error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
net/core/dev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index fce980d531bd..5079a3851798 100644
index 7fc8ae7f3cd5..27cd902b6fbb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10257,7 +10257,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
if (time_after(jiffies, warning_time +
READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, netdev_refcnt_read(dev));
ref_tracker_dir_print(&dev->refcnt_tracker, 10);
}
@@ -10532,7 +10532,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
if (refcnt != 1 &&
time_after(jiffies, warning_time +
netdev_unregister_timeout_secs * HZ)) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, refcnt);
warning_time = jiffies;
}

View file

@ -0,0 +1,104 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Mon, 27 Sep 2021 11:28:39 +0200
Subject: [PATCH] Revert "PCI: Coalesce host bridge contiguous apertures"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit ab20e43b20b60f5cc8e2ea3763ffa388158469ac.
was reverted upstream because of reports similar to
Link: https://bugzilla.proxmox.com/show_bug.cgi?id=3552
Link: https://lore.kernel.org/r/20210709231529.GA3270116@roeck-us.net
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/pci/probe.c | 50 ++++-----------------------------------------
1 file changed, 4 insertions(+), 46 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 103ee42f6717..6b73e9179111 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -20,7 +20,6 @@
#include <linux/irqdomain.h>
#include <linux/pm_runtime.h>
#include <linux/bitfield.h>
-#include <linux/list_sort.h>
#include "pci.h"
#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
@@ -881,31 +880,14 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
dev_set_msi_domain(&bus->dev, d);
}
-static int res_cmp(void *priv, const struct list_head *a,
- const struct list_head *b)
-{
- struct resource_entry *entry1, *entry2;
-
- entry1 = container_of(a, struct resource_entry, node);
- entry2 = container_of(b, struct resource_entry, node);
-
- if (entry1->res->flags != entry2->res->flags)
- return entry1->res->flags > entry2->res->flags;
-
- if (entry1->offset != entry2->offset)
- return entry1->offset > entry2->offset;
-
- return entry1->res->start > entry2->res->start;
-}
-
static int pci_register_host_bridge(struct pci_host_bridge *bridge)
{
struct device *parent = bridge->dev.parent;
- struct resource_entry *window, *next, *n;
+ struct resource_entry *window, *n;
struct pci_bus *bus, *b;
- resource_size_t offset, next_offset;
+ resource_size_t offset;
LIST_HEAD(resources);
- struct resource *res, *next_res;
+ struct resource *res;
char addr[64], *fmt;
const char *name;
int err;
@@ -988,35 +970,11 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
- /* Sort and coalesce contiguous windows */
- list_sort(NULL, &resources, res_cmp);
- resource_list_for_each_entry_safe(window, n, &resources) {
- if (list_is_last(&window->node, &resources))
- break;
-
- next = list_next_entry(window, node);
- offset = window->offset;
- res = window->res;
- next_offset = next->offset;
- next_res = next->res;
-
- if (res->flags != next_res->flags || offset != next_offset)
- continue;
-
- if (res->end + 1 == next_res->start) {
- next_res->start = res->start;
- res->flags = res->start = res->end = 0;
- }
- }
-
/* Add initial resources to the bus */
resource_list_for_each_entry_safe(window, n, &resources) {
+ list_move_tail(&window->node, &bridge->windows);
offset = window->offset;
res = window->res;
- if (!res->end)
- continue;
-
- list_move_tail(&window->node, &bridge->windows);
if (res->flags & IORESOURCE_BUS)
pci_bus_insert_busn_res(bus, bus->number, res->end);

View file

@ -1,29 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Tue, 10 Jan 2023 08:52:40 +0100
Subject: [PATCH] Revert "fortify: Do not cast to "unsigned char""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 106b7a61c488d2022f44e3531ce33461c7c0685f.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
include/linux/fortify-string.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 7cad8bb031e9..acc24887db3e 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
#define __compiletime_strlen(p) \
({ \
- char *__p = (char *)(p); \
+ unsigned char *__p = (unsigned char *)(p); \
size_t __ret = SIZE_MAX; \
size_t __p_size = __member_size(p); \
if (__p_size != SIZE_MAX && \

View file

@ -0,0 +1,112 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 13 Jul 2021 20:50:07 +0800
Subject: [PATCH] PCI: Reinstate "PCI: Coalesce host bridge contiguous
apertures"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Built-in graphics on HP EliteDesk 805 G6 doesn't work because graphics
can't get the BAR it needs:
pci_bus 0000:00: root bus resource [mem 0x10020200000-0x100303fffff window]
pci_bus 0000:00: root bus resource [mem 0x10030400000-0x100401fffff window]
pci 0000:00:08.1: bridge window [mem 0xd2000000-0xd23fffff]
pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100401fffff 64bit pref]
pci 0000:00:08.1: can't claim BAR 15 [mem 0x10030000000-0x100401fffff 64bit pref]: no compatible bridge window
pci 0000:00:08.1: [mem 0x10030000000-0x100401fffff 64bit pref] clipped to [mem 0x10030000000-0x100303fffff 64bit pref]
pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100303fffff 64bit pref]
pci 0000:07:00.0: can't claim BAR 0 [mem 0x10030000000-0x1003fffffff 64bit pref]: no compatible bridge window
pci 0000:07:00.0: can't claim BAR 2 [mem 0x10040000000-0x100401fffff 64bit pref]: no compatible bridge window
However, the root bus has two contiguous apertures that can contain the
child resource requested.
Coalesce contiguous apertures so we can allocate from the entire contiguous
region.
This is the second take of commit 65db04053efe ("PCI: Coalesce host
bridge contiguous apertures"). The original approach sorts the apertures
by address, but that makes NVMe stop working on QEMU ppc:sam460ex:
PCI host bridge to bus 0002:00
pci_bus 0002:00: root bus resource [io 0x0000-0xffff]
pci_bus 0002:00: root bus resource [mem 0xd80000000-0xdffffffff] (bus address [0x80000000-0xffffffff])
pci_bus 0002:00: root bus resource [mem 0xc0ee00000-0xc0eefffff] (bus address [0x00000000-0x000fffff])
After the offending commit:
PCI host bridge to bus 0002:00
pci_bus 0002:00: root bus resource [io 0x0000-0xffff]
pci_bus 0002:00: root bus resource [mem 0xc0ee00000-0xc0eefffff] (bus address [0x00000000-0x000fffff])
pci_bus 0002:00: root bus resource [mem 0xd80000000-0xdffffffff] (bus address [0x80000000-0xffffffff])
Since the apertures on HP EliteDesk 805 G6 are already in ascending
order, doing a precautious sorting is not necessary.
Remove the sorting part to avoid the regression on ppc:sam460ex.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212013
Cc: Guenter Roeck <linux@roeck-us.net>
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/pci/probe.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6b73e9179111..50eef5b211f4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -883,11 +883,11 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
static int pci_register_host_bridge(struct pci_host_bridge *bridge)
{
struct device *parent = bridge->dev.parent;
- struct resource_entry *window, *n;
+ struct resource_entry *window, *next, *n;
struct pci_bus *bus, *b;
- resource_size_t offset;
+ resource_size_t offset, next_offset;
LIST_HEAD(resources);
- struct resource *res;
+ struct resource *res, *next_res;
char addr[64], *fmt;
const char *name;
int err;
@@ -970,11 +970,34 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
+ /* Coalesce contiguous windows */
+ resource_list_for_each_entry_safe(window, n, &resources) {
+ if (list_is_last(&window->node, &resources))
+ break;
+
+ next = list_next_entry(window, node);
+ offset = window->offset;
+ res = window->res;
+ next_offset = next->offset;
+ next_res = next->res;
+
+ if (res->flags != next_res->flags || offset != next_offset)
+ continue;
+
+ if (res->end + 1 == next_res->start) {
+ next_res->start = res->start;
+ res->flags = res->start = res->end = 0;
+ }
+ }
+
/* Add initial resources to the bus */
resource_list_for_each_entry_safe(window, n, &resources) {
- list_move_tail(&window->node, &bridge->windows);
offset = window->offset;
res = window->res;
+ if (!res->end)
+ continue;
+
+ list_move_tail(&window->node, &bridge->windows);
if (res->flags & IORESOURCE_BUS)
pci_bus_insert_busn_res(bus, bus->number, res->end);

View file

@ -0,0 +1,28 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Thu, 14 Sep 2017 11:09:58 +0200
Subject: [PATCH] do not generate split BTF type info per default
This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
It breaks ZFS sometimes:
https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
lib/Kconfig.debug | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dbbd243c865f..406d781fa9ff 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -331,7 +331,7 @@ config PAHOLE_HAS_SPLIT_BTF
def_bool PAHOLE_VERSION >= 119
config DEBUG_INFO_BTF_MODULES
- def_bool y
+ def_bool n
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
help
Generate compact split BTF type information for kernel modules.

View file

@ -0,0 +1,147 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Tue, 11 Jan 2022 09:31:59 +0100
Subject: [PATCH] blk-cgroup: always terminate io.stat lines
With the removal of seq_get_buf in blkcg_print_one_stat, we
cannot make adding the newline conditional on there being
relevant stats because the name was already written out
unconditionally.
Otherwise we may end up with multiple device names in one
line which is confusing and doesn't follow the nested-keyed
file format.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Fixes: 252c651a4c85 ("blk-cgroup: stop using seq_get_buf")
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/blk-cgroup.c | 9 ++-------
block/blk-iocost.c | 5 ++---
block/blk-iolatency.c | 8 +++-----
include/linux/blk-cgroup.h | 2 +-
4 files changed, 8 insertions(+), 16 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3ee4c1217b63..0e15a9345966 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -887,7 +887,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
struct blkg_iostat_set *bis = &blkg->iostat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
- bool has_stats = false;
const char *dname;
unsigned seq;
int i;
@@ -913,14 +912,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
} while (u64_stats_fetch_retry(&bis->sync, seq));
if (rbytes || wbytes || rios || wios) {
- has_stats = true;
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
rbytes, wbytes, rios, wios,
dbytes, dios);
}
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
- has_stats = true;
seq_printf(s, " use_delay=%d delay_nsec=%llu",
atomic_read(&blkg->use_delay),
atomic64_read(&blkg->delay_nsec));
@@ -932,12 +929,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
if (!blkg->pd[i] || !pol->pd_stat_fn)
continue;
- if (pol->pd_stat_fn(blkg->pd[i], s))
- has_stats = true;
+ pol->pd_stat_fn(blkg->pd[i], s);
}
- if (has_stats)
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
static int blkcg_print_stat(struct seq_file *sf, void *v)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index bd7e9ffa5d40..66d68390e882 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3016,13 +3016,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
kfree(iocg);
}
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
if (!ioc->enabled)
- return false;
+ return;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3038,7 +3038,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
iocg->last_stat.wait_us,
iocg->last_stat.indebt_us,
iocg->last_stat.indelay_us);
- return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index bdef8395af6e..a5f13743a280 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -906,7 +906,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
return 0;
}
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
@@ -929,17 +929,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
(unsigned long long)stat.ps.missed,
(unsigned long long)stat.ps.total,
iolat->rq_depth.max_depth);
- return true;
}
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return false;
+ return;
if (iolat->ssd)
return iolatency_ssd_stat(iolat, s);
@@ -952,7 +951,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
else
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
iolat->rq_depth.max_depth, avg_lat, cur_win);
- return true;
}
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index bc5c04d711bb..618359e3beca 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -153,7 +153,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
struct seq_file *s);
struct blkcg_policy {

View file

@ -0,0 +1,34 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Tue, 25 Jan 2022 10:12:19 +0100
Subject: [PATCH] drivers/firmware: Don't mark as busy the simple-framebuffer
IO resource
The sysfb_create_simplefb() function requests a IO memory resource for the
simple-framebuffer platform device, but it also marks it as busy which can
lead to drivers requesting the same memory resource to fail.
Let's drop the IORESOURCE_BUSY flag and let drivers to request it as busy
instead.
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/firmware/sysfb_simplefb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c
index fd4fa923088a..ca907f7e76c6 100644
--- a/drivers/firmware/sysfb_simplefb.c
+++ b/drivers/firmware/sysfb_simplefb.c
@@ -99,7 +99,7 @@ __init struct platform_device *sysfb_create_simplefb(const struct screen_info *s
/* setup IORESOURCE_MEM as framebuffer memory */
memset(&res, 0, sizeof(res));
- res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res.flags = IORESOURCE_MEM;
res.name = simplefb_resname;
res.start = base;
res.end = res.start + length - 1;

View file

@ -0,0 +1,63 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 25 Jan 2022 10:12:20 +0100
Subject: [PATCH] drm/simpledrm: Request memory region in driver
Requesting the framebuffer memory in simpledrm marks the memory
range as busy. This used to be done by the firmware sysfb code,
but the driver is the correct place.
v2:
* use I/O memory if request_mem_region() fails (Jocelyn)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/gpu/drm/tiny/simpledrm.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index f3c2c173ca4b..da58c0d0f44d 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -525,21 +525,33 @@ static int simpledrm_device_init_mm(struct simpledrm_device *sdev)
{
struct drm_device *dev = &sdev->dev;
struct platform_device *pdev = sdev->pdev;
- struct resource *mem;
+ struct resource *res, *mem;
void __iomem *screen_base;
int ret;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!mem)
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
return -EINVAL;
- ret = devm_aperture_acquire_from_firmware(dev, mem->start, resource_size(mem));
+ ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res));
if (ret) {
drm_err(dev, "could not acquire memory range %pr: error %d\n",
- mem, ret);
+ res, ret);
return ret;
}
+ mem = devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
+ sdev->dev.driver->name);
+ if (!mem) {
+ /*
+ * We cannot make this fatal. Sometimes this comes from magic
+ * spaces our resource handlers simply don't know about. Use
+ * the I/O-memory resource as-is and try to map that instead.
+ */
+ drm_warn(dev, "could not acquire memory region %pr\n", res);
+ mem = res;
+ }
+
screen_base = devm_ioremap_wc(&pdev->dev, mem->start,
resource_size(mem));
if (!screen_base)

View file

@ -0,0 +1,148 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 25 Jan 2022 10:12:21 +0100
Subject: [PATCH] fbdev/simplefb: Request memory region in driver
Requesting the framebuffer memory in simpledrm marks the memory
range as busy. This used to be done by the firmware sysfb code,
but the driver is the correct place.
v2:
* store memory region in struct for later cleanup (Javier)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/video/fbdev/simplefb.c | 65 +++++++++++++++++++++++-----------
1 file changed, 45 insertions(+), 20 deletions(-)
diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index a2e3a4690025..8acfb12abfee 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -66,7 +66,21 @@ static int simplefb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
return 0;
}
-struct simplefb_par;
+struct simplefb_par {
+ u32 palette[PSEUDO_PALETTE_SIZE];
+ struct resource *mem;
+#if defined CONFIG_OF && defined CONFIG_COMMON_CLK
+ bool clks_enabled;
+ unsigned int clk_count;
+ struct clk **clks;
+#endif
+#if defined CONFIG_OF && defined CONFIG_REGULATOR
+ bool regulators_enabled;
+ u32 regulator_count;
+ struct regulator **regulators;
+#endif
+};
+
static void simplefb_clocks_destroy(struct simplefb_par *par);
static void simplefb_regulators_destroy(struct simplefb_par *par);
@@ -76,12 +90,18 @@ static void simplefb_regulators_destroy(struct simplefb_par *par);
*/
static void simplefb_destroy(struct fb_info *info)
{
+ struct simplefb_par *par = info->par;
+ struct resource *mem = par->mem;
+
simplefb_regulators_destroy(info->par);
simplefb_clocks_destroy(info->par);
if (info->screen_base)
iounmap(info->screen_base);
framebuffer_release(info);
+
+ if (mem)
+ release_mem_region(mem->start, resource_size(mem));
}
static const struct fb_ops simplefb_ops = {
@@ -175,20 +195,6 @@ static int simplefb_parse_pd(struct platform_device *pdev,
return 0;
}
-struct simplefb_par {
- u32 palette[PSEUDO_PALETTE_SIZE];
-#if defined CONFIG_OF && defined CONFIG_COMMON_CLK
- bool clks_enabled;
- unsigned int clk_count;
- struct clk **clks;
-#endif
-#if defined CONFIG_OF && defined CONFIG_REGULATOR
- bool regulators_enabled;
- u32 regulator_count;
- struct regulator **regulators;
-#endif
-};
-
#if defined CONFIG_OF && defined CONFIG_COMMON_CLK
/*
* Clock handling code.
@@ -411,7 +417,7 @@ static int simplefb_probe(struct platform_device *pdev)
struct simplefb_params params;
struct fb_info *info;
struct simplefb_par *par;
- struct resource *mem;
+ struct resource *res, *mem;
/*
* Generic drivers must not be registered if a framebuffer exists.
@@ -436,15 +442,28 @@ static int simplefb_probe(struct platform_device *pdev)
if (ret)
return ret;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!mem) {
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
dev_err(&pdev->dev, "No memory resource\n");
return -EINVAL;
}
+ mem = request_mem_region(res->start, resource_size(res), "simplefb");
+ if (!mem) {
+ /*
+ * We cannot make this fatal. Sometimes this comes from magic
+ * spaces our resource handlers simply don't know about. Use
+ * the I/O-memory resource as-is and try to map that instead.
+ */
+ dev_warn(&pdev->dev, "simplefb: cannot reserve video memory at %pR\n", res);
+ mem = res;
+ }
+
info = framebuffer_alloc(sizeof(struct simplefb_par), &pdev->dev);
- if (!info)
- return -ENOMEM;
+ if (!info) {
+ ret = -ENOMEM;
+ goto error_release_mem_region;
+ }
platform_set_drvdata(pdev, info);
par = info->par;
@@ -501,6 +520,9 @@ static int simplefb_probe(struct platform_device *pdev)
info->var.xres, info->var.yres,
info->var.bits_per_pixel, info->fix.line_length);
+ if (mem != res)
+ par->mem = mem; /* release in clean-up handler */
+
ret = register_framebuffer(info);
if (ret < 0) {
dev_err(&pdev->dev, "Unable to register simplefb: %d\n", ret);
@@ -519,6 +541,9 @@ static int simplefb_probe(struct platform_device *pdev)
iounmap(info->screen_base);
error_fb_release:
framebuffer_release(info);
+error_release_mem_region:
+ if (mem != res)
+ release_mem_region(mem->start, resource_size(mem));
return ret;
}

View file

@ -0,0 +1,81 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 16 Mar 2022 18:24:26 -0400
Subject: [PATCH] NFSv4.1 provide mount option to toggle trunking discovery
Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
toggle whether or not the client will engage in actively discovery
of trunking locations.
v2 make notrunkdiscovery default
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Fixes: 1976b2b31462 ("NFSv4.1 query for fs_location attr on a new file system")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
(cherry picked from commit a43bf604446414103b7535f38e739b65601c4fb2)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
fs/nfs/client.c | 3 ++-
fs/nfs/fs_context.c | 8 ++++++++
include/linux/nfs_fs_sb.h | 1 +
3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 090b16890e3d..f303e96ce165 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -861,7 +861,8 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
}
if (clp->rpc_ops->discover_trunking != NULL &&
- (server->caps & NFS_CAP_FS_LOCATIONS)) {
+ (server->caps & NFS_CAP_FS_LOCATIONS &&
+ (server->flags & NFS_MOUNT_TRUNK_DISCOVERY))) {
error = clp->rpc_ops->discover_trunking(server, mntfh);
if (error < 0)
return error;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index fb3cad38b149..0166370f088e 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -79,6 +79,7 @@ enum nfs_param {
Opt_source,
Opt_tcp,
Opt_timeo,
+ Opt_trunkdiscovery,
Opt_udp,
Opt_v,
Opt_vers,
@@ -179,6 +180,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_string("source", Opt_source),
fsparam_flag ("tcp", Opt_tcp),
fsparam_u32 ("timeo", Opt_timeo),
+ fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
fsparam_flag ("udp", Opt_udp),
fsparam_flag ("v2", Opt_v),
fsparam_flag ("v3", Opt_v),
@@ -528,6 +530,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags &= ~NFS_MOUNT_NOCTO;
break;
+ case Opt_trunkdiscovery:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY;
+ else
+ ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index da9ef0ab9b4b..5336e494703b 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -156,6 +156,7 @@ struct nfs_server {
#define NFS_MOUNT_SOFTREVAL 0x800000
#define NFS_MOUNT_WRITE_EAGER 0x01000000
#define NFS_MOUNT_WRITE_WAIT 0x02000000
+#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000
unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */

View file

@ -0,0 +1,69 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Marc Bevand <m@zorinaq.com>
Date: Tue, 21 Dec 2021 15:31:12 -0800
Subject: [PATCH] EDAC/amd64: Add PCI device IDs for family 19h model 50h
Add the new family 19h model 50h PCI IDs (device 18h functions 0 and 6)
to support Ryzen 5000 APUs ("Cezanne").
Signed-off-by: Marc Bevand <m@zorinaq.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/edac/amd64_edac.c | 15 +++++++++++++++
drivers/edac/amd64_edac.h | 3 +++
2 files changed, 18 insertions(+)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index c6c58f01067f..f8ef2edf8abf 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2660,6 +2660,16 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
+ [F19_M50H_CPUS] = {
+ .ctl_name = "F19h_M50h",
+ .f0_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F6,
+ .max_mcs = 2,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
};
/*
@@ -3706,6 +3716,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
pvt->ops = &family_types[F17_M70H_CPUS].ops;
fam_type->ctl_name = "F19h_M20h";
break;
+ } else if (pvt->model >= 0x50 && pvt->model <= 0x5f) {
+ fam_type = &family_types[F19_M50H_CPUS];
+ pvt->ops = &family_types[F19_M50H_CPUS].ops;
+ fam_type->ctl_name = "F19h_M50h";
+ break;
} else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) {
fam_type = &family_types[F19_M10H_CPUS];
pvt->ops = &family_types[F19_M10H_CPUS].ops;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 650cab401e21..352bda9803f6 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -128,6 +128,8 @@
#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F0 0x166a
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F6 0x1670
/*
* Function 1 - Address Map
@@ -301,6 +303,7 @@ enum amd_families {
F17_M70H_CPUS,
F19_CPUS,
F19_M10H_CPUS,
+ F19_M50H_CPUS,
NUM_FAMILIES,
};

View file

@ -0,0 +1,53 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:49:59 +0300
Subject: [PATCH] bug: introduce ASSERT_STRUCT_OFFSET
ASSERT_STRUCT_OFFSET allows to assert during the build of
the kernel that a field in a struct have an expected offset.
KVM used to have such macro, but there is almost nothing KVM specific
in it so move it to build_bug.h, so that it can be used in other
places in KVM.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/vmx/vmcs12.h | 5 ++---
include/linux/build_bug.h | 9 +++++++++
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 2a45f026ee11..ba8617964982 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -208,9 +208,8 @@ struct __packed vmcs12 {
/*
* For save/restore compatibility, the vmcs12 field offsets must not change.
*/
-#define CHECK_OFFSET(field, loc) \
- BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \
- "Offset of " #field " in struct vmcs12 has changed.")
+#define CHECK_OFFSET(field, loc) \
+ ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc)
static inline void vmx_check_vmcs12_offsets(void)
{
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index e3a0be2c90ad..3aa3640f8c18 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -77,4 +77,13 @@
#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
+
+/*
+ * Compile time check that field has an expected offset
+ */
+#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \
+ BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \
+ "Offset of " #field " in " #type " has changed.")
+
+
#endif /* _LINUX_BUILD_BUG_H */

View file

@ -0,0 +1,34 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:02 +0300
Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm
This ensures that RIP will be correctly written back,
because the RSM instruction can switch the CPU mode from
32 bit (or less) to 64 bit.
This fixes a guest crash in case the #SMI is received
while the guest runs a code from an address > 32 bit.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index cb96e4354f31..23e4fce033a3 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2638,6 +2638,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
if (ret != X86EMUL_CONTINUE)
goto emulate_shutdown;
+
+ ret = emulator_recalc_and_set_mode(ctxt);
+ if (ret != X86EMUL_CONTINUE)
+ goto emulate_shutdown;
+
/*
* Note, the ctxt->ops callbacks are responsible for handling side
* effects when writing MSRs and CRs, e.g. MMU context resets, CPUID

View file

@ -0,0 +1,280 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:05 +0300
Subject: [PATCH] KVM: x86: emulator/smm: add structs for KVM's smram layout
Those structs will be used to read/write the smram state image.
Also document the differences between KVM's SMRAM layout and SMRAM
layout that is used by real Intel/AMD cpus.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 6 +
arch/x86/kvm/kvm_emulate.h | 218 +++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 225 insertions(+)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 23e4fce033a3..f169be004aab 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5829,3 +5829,9 @@ bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
return true;
}
+
+void __init kvm_emulator_init(void)
+{
+ __check_smram32_offsets();
+ __check_smram64_offsets();
+}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index fb09cd22cb7f..0b2bbcce321a 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -13,6 +13,7 @@
#define _ASM_X86_KVM_X86_EMULATE_H
#include <asm/desc_defs.h>
+#include <linux/build_bug.h>
#include "fpu.h"
struct x86_emulate_ctxt;
@@ -482,6 +483,223 @@ enum x86_intercept {
nr_x86_intercepts
};
+
+/* 32 bit KVM's emulated SMM layout. Loosely based on Intel's layout */
+
+struct kvm_smm_seg_state_32 {
+ u32 flags;
+ u32 limit;
+ u32 base;
+} __packed;
+
+struct kvm_smram_state_32 {
+ u32 reserved1[62];
+ u32 smbase;
+ u32 smm_revision;
+ u32 reserved2[5];
+ u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
+ u32 reserved3[5];
+
+ /*
+ * Segment state is not present/documented in the Intel/AMD SMRAM image
+ * Instead this area on Intel/AMD contains IO/HLT restart flags.
+ */
+ struct kvm_smm_seg_state_32 ds;
+ struct kvm_smm_seg_state_32 fs;
+ struct kvm_smm_seg_state_32 gs;
+ struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */
+ struct kvm_smm_seg_state_32 tr;
+ u32 reserved;
+ struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */
+ struct kvm_smm_seg_state_32 ldtr;
+ struct kvm_smm_seg_state_32 es;
+ struct kvm_smm_seg_state_32 cs;
+ struct kvm_smm_seg_state_32 ss;
+
+ u32 es_sel;
+ u32 cs_sel;
+ u32 ss_sel;
+ u32 ds_sel;
+ u32 fs_sel;
+ u32 gs_sel;
+ u32 ldtr_sel;
+ u32 tr_sel;
+
+ u32 dr7;
+ u32 dr6;
+ u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */
+ u32 eip;
+ u32 eflags;
+ u32 cr3;
+ u32 cr0;
+} __packed;
+
+
+static inline void __check_smram32_offsets(void)
+{
+#define __CHECK_SMRAM32_OFFSET(field, offset) \
+ ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
+
+ __CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
+ __CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
+ __CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
+ __CHECK_SMRAM32_OFFSET(reserved2, 0xFF00);
+ __CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
+ __CHECK_SMRAM32_OFFSET(reserved3, 0xFF18);
+ __CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
+ __CHECK_SMRAM32_OFFSET(fs, 0xFF38);
+ __CHECK_SMRAM32_OFFSET(gs, 0xFF44);
+ __CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
+ __CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
+ __CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
+ __CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
+ __CHECK_SMRAM32_OFFSET(es, 0xFF84);
+ __CHECK_SMRAM32_OFFSET(cs, 0xFF90);
+ __CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
+ __CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
+ __CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
+ __CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
+ __CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
+ __CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
+ __CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
+ __CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
+ __CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
+ __CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
+ __CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
+ __CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
+ __CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
+ __CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
+ __CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
+ __CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
+#undef __CHECK_SMRAM32_OFFSET
+}
+
+
+/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */
+
+struct kvm_smm_seg_state_64 {
+ u16 selector;
+ u16 attributes;
+ u32 limit;
+ u64 base;
+};
+
+struct kvm_smram_state_64 {
+
+ struct kvm_smm_seg_state_64 es;
+ struct kvm_smm_seg_state_64 cs;
+ struct kvm_smm_seg_state_64 ss;
+ struct kvm_smm_seg_state_64 ds;
+ struct kvm_smm_seg_state_64 fs;
+ struct kvm_smm_seg_state_64 gs;
+ struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/
+ struct kvm_smm_seg_state_64 ldtr;
+ struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/
+ struct kvm_smm_seg_state_64 tr;
+
+ /* I/O restart and auto halt restart are not implemented by KVM */
+ u64 io_restart_rip;
+ u64 io_restart_rcx;
+ u64 io_restart_rsi;
+ u64 io_restart_rdi;
+ u32 io_restart_dword;
+ u32 reserved1;
+ u8 io_inst_restart;
+ u8 auto_hlt_restart;
+ u8 reserved2[6];
+
+ u64 efer;
+
+ /*
+ * Two fields below are implemented on AMD only, to store
+ * SVM guest vmcb address if the #SMI was received while in the guest mode.
+ */
+ u64 svm_guest_flag;
+ u64 svm_guest_vmcb_gpa;
+ u64 svm_guest_virtual_int; /* unknown purpose, not implemented */
+
+ u32 reserved3[3];
+ u32 smm_revison;
+ u32 smbase;
+ u32 reserved4[5];
+
+ /* ssp and svm_* fields below are not implemented by KVM */
+ u64 ssp;
+ u64 svm_guest_pat;
+ u64 svm_host_efer;
+ u64 svm_host_cr4;
+ u64 svm_host_cr3;
+ u64 svm_host_cr0;
+
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
+};
+
+
+static inline void __check_smram64_offsets(void)
+{
+#define __CHECK_SMRAM64_OFFSET(field, offset) \
+ ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
+
+ __CHECK_SMRAM64_OFFSET(es, 0xFE00);
+ __CHECK_SMRAM64_OFFSET(cs, 0xFE10);
+ __CHECK_SMRAM64_OFFSET(ss, 0xFE20);
+ __CHECK_SMRAM64_OFFSET(ds, 0xFE30);
+ __CHECK_SMRAM64_OFFSET(fs, 0xFE40);
+ __CHECK_SMRAM64_OFFSET(gs, 0xFE50);
+ __CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
+ __CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
+ __CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
+ __CHECK_SMRAM64_OFFSET(tr, 0xFE90);
+ __CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
+ __CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
+ __CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
+ __CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
+ __CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
+ __CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
+ __CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
+ __CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
+ __CHECK_SMRAM64_OFFSET(reserved2, 0xFECA);
+ __CHECK_SMRAM64_OFFSET(efer, 0xFED0);
+ __CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
+ __CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
+ __CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
+ __CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
+ __CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
+ __CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
+ __CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
+ __CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
+ __CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
+ __CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
+ __CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
+ __CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
+ __CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
+ __CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
+ __CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
+ __CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
+ __CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
+ __CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
+ __CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
+ __CHECK_SMRAM64_OFFSET(rip, 0xFF78);
+ __CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
+#undef __CHECK_SMRAM64_OFFSET
+}
+
+union kvm_smram {
+ struct kvm_smram_state_64 smram64;
+ struct kvm_smram_state_32 smram32;
+ u8 bytes[512];
+};
+
+void __init kvm_emulator_init(void);
+
+
/* Host execution mode. */
#if defined(CONFIG_X86_32)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 386b92c2e4aa..1b2f6cd3cc8d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12642,6 +12642,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
static int __init kvm_x86_init(void)
{
kvm_mmu_x86_module_init();
+ kvm_emulator_init();
mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible();
return 0;
}

View file

@ -0,0 +1,214 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:06 +0300
Subject: [PATCH] KVM: x86: emulator/smm: use smram structs in the common code
Switch from using a raw array to 'union kvm_smram'.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/include/asm/kvm_host.h | 5 +++--
arch/x86/kvm/emulate.c | 12 +++++++-----
arch/x86/kvm/kvm_emulate.h | 3 ++-
arch/x86/kvm/svm/svm.c | 8 ++++++--
arch/x86/kvm/vmx/vmx.c | 4 ++--
arch/x86/kvm/x86.c | 16 ++++++++--------
6 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1172a201d851..c4e382af1853 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -200,6 +200,7 @@ typedef enum exit_fastpath_completion fastpath_t;
struct x86_emulate_ctxt;
struct x86_exception;
+union kvm_smram;
enum x86_intercept;
enum x86_intercept_stage;
@@ -1467,8 +1468,8 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
- int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+ int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+ int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f169be004aab..d3cc1b8e2ea6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2566,16 +2566,18 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
- char buf[512];
+ const union kvm_smram smram;
u64 smbase;
int ret;
+ BUILD_BUG_ON(sizeof(smram) != 512);
+
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
smbase = ctxt->ops->get_smbase(ctxt);
- ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, (void *)&smram, sizeof(smram));
if (ret != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2625,15 +2627,15 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (ctxt->ops->leave_smm(ctxt, buf))
+ if (ctxt->ops->leave_smm(ctxt, &smram))
goto emulate_shutdown;
#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, buf);
+ ret = rsm_load_state_64(ctxt, (const char *)&smram);
else
#endif
- ret = rsm_load_state_32(ctxt, buf);
+ ret = rsm_load_state_32(ctxt, (const char *)&smram);
if (ret != X86EMUL_CONTINUE)
goto emulate_shutdown;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0b2bbcce321a..3b37b3e17379 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -19,6 +19,7 @@
struct x86_emulate_ctxt;
enum x86_intercept;
enum x86_intercept_stage;
+union kvm_smram;
struct x86_exception {
u8 vector;
@@ -233,7 +234,7 @@ struct x86_emulate_ops {
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
- int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
+ int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const union kvm_smram *smram);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
};
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b0b87c36be3d..545e321998d3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4317,12 +4317,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !svm_smi_blocked(vcpu);
}
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map_save;
int ret;
+ char *smstate = (char *)smram;
+
if (!is_guest_mode(vcpu))
return 0;
@@ -4364,7 +4366,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
@@ -4372,6 +4374,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
struct vmcb *vmcb12;
int ret;
+ const char *smstate = (const char *)smram;
+
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
return 0;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7db4c69ac77b..0e52ddd14c57 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7615,7 +7615,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !is_smm(vcpu);
}
-static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7629,7 +7629,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b2f6cd3cc8d..1b7e08590493 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7440,9 +7440,9 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
}
static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ const union kvm_smram *smram)
{
- return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
+ return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smram);
}
static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
@@ -9321,25 +9321,25 @@ static void enter_smm(struct kvm_vcpu *vcpu)
struct kvm_segment cs, ds;
struct desc_ptr dt;
unsigned long cr0;
- char buf[512];
+ union kvm_smram smram;
- memset(buf, 0, 512);
+ memset(smram.bytes, 0, sizeof(smram.bytes));
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, buf);
+ enter_smm_save_state_64(vcpu, (char *)&smram);
else
#endif
- enter_smm_save_state_32(vcpu, buf);
+ enter_smm_save_state_32(vcpu, (char *)&smram);
/*
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
* state (e.g. leave guest mode) after we've saved the state into the
* SMM state-save area.
*/
- static_call(kvm_x86_enter_smm)(vcpu, buf);
+ static_call(kvm_x86_enter_smm)(vcpu, &smram);
kvm_smm_changed(vcpu, true);
- kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram));
if (static_call(kvm_x86_get_nmi_mask)(vcpu))
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;

View file

@ -0,0 +1,268 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:07 +0300
Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 32 bit smram
load/restore
Use kvm_smram_state_32 struct to save/restore 32 bit SMM state
(used when X86_FEATURE_LM is not present in the guest CPUID).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 81 +++++++++++++++---------------------------
arch/x86/kvm/x86.c | 75 +++++++++++++++++---------------------
2 files changed, 60 insertions(+), 96 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d3cc1b8e2ea6..0dd18d66f3b7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2343,25 +2343,17 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
desc->type = (flags >> 8) & 15;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
+ const struct kvm_smm_seg_state_32 *state,
+ u16 selector,
int n)
{
struct desc_struct desc;
- int offset;
- u16 selector;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+ set_desc_base(&desc, state->base);
+ set_desc_limit(&desc, state->limit);
+ rsm_set_desc_flags(&desc, state->flags);
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
- return X86EMUL_CONTINUE;
}
#ifdef CONFIG_X86_64
@@ -2432,63 +2424,46 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
}
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ const struct kvm_smram_state_32 *smstate)
{
- struct desc_struct desc;
struct desc_ptr dt;
- u16 selector;
- u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
+ ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
+ ctxt->_eip = smstate->eip;
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
- val = GET_SMSTATE(u32, smstate, 0x7fcc);
+ *reg_write(ctxt, i) = smstate->gprs[i];
- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
return X86EMUL_UNHANDLEABLE;
- selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+ rsm_load_seg_32(ctxt, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
+ rsm_load_seg_32(ctxt, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
- selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
+ dt.address = smstate->gdtr.base;
+ dt.size = smstate->gdtr.limit;
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
+ dt.address = smstate->idtr.base;
+ dt.size = smstate->idtr.limit;
ctxt->ops->set_idt(ctxt, &dt);
- for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
+ rsm_load_seg_32(ctxt, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
+ rsm_load_seg_32(ctxt, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
+ rsm_load_seg_32(ctxt, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
- cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+ rsm_load_seg_32(ctxt, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
+ rsm_load_seg_32(ctxt, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
+ rsm_load_seg_32(ctxt, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+ ctxt->ops->set_smbase(ctxt, smstate->smbase);
- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ return rsm_enter_protected_mode(ctxt, smstate->cr0,
+ smstate->cr3, smstate->cr4);
}
#ifdef CONFIG_X86_64
@@ -2635,7 +2610,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
ret = rsm_load_state_64(ctxt, (const char *)&smram);
else
#endif
- ret = rsm_load_state_32(ctxt, (const char *)&smram);
+ ret = rsm_load_state_32(ctxt, &smram.smram32);
if (ret != X86EMUL_CONTINUE)
goto emulate_shutdown;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b7e08590493..678318e5e0b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9175,22 +9175,18 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
return flags;
}
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
+ struct kvm_smm_seg_state_32 *state,
+ u32 *selector,
+ int n)
{
struct kvm_segment seg;
- int offset;
kvm_get_segment(vcpu, &seg, n);
- put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- put_smstate(u32, buf, offset + 8, seg.base);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+ *selector = seg.selector;
+ state->base = seg.base;
+ state->limit = seg.limit;
+ state->flags = enter_smm_get_segment_flags(&seg);
}
#ifdef CONFIG_X86_64
@@ -9211,54 +9207,47 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
}
#endif
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_state_32 *smram)
{
struct desc_ptr dt;
- struct kvm_segment seg;
unsigned long val;
int i;
- put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+ smram->cr0 = kvm_read_cr0(vcpu);
+ smram->cr3 = kvm_read_cr3(vcpu);
+ smram->eflags = kvm_get_rflags(vcpu);
+ smram->eip = kvm_rip_read(vcpu);
for (i = 0; i < 8; i++)
- put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+ smram->gprs[i] = kvm_register_read_raw(vcpu, i);
kvm_get_dr(vcpu, 6, &val);
- put_smstate(u32, buf, 0x7fcc, (u32)val);
+ smram->dr6 = (u32)val;
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u32, buf, 0x7fc8, (u32)val);
+ smram->dr7 = (u32)val;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u32, buf, 0x7fc4, seg.selector);
- put_smstate(u32, buf, 0x7f64, seg.base);
- put_smstate(u32, buf, 0x7f60, seg.limit);
- put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u32, buf, 0x7fc0, seg.selector);
- put_smstate(u32, buf, 0x7f80, seg.base);
- put_smstate(u32, buf, 0x7f7c, seg.limit);
- put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+ enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
+ enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f74, dt.address);
- put_smstate(u32, buf, 0x7f70, dt.size);
+ smram->gdtr.base = dt.address;
+ smram->gdtr.limit = dt.size;
static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f58, dt.address);
- put_smstate(u32, buf, 0x7f54, dt.size);
+ smram->idtr.base = dt.address;
+ smram->idtr.limit = dt.size;
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_32(vcpu, buf, i);
+ enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
+ enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
+ enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
- put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+ enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
+ enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
+ enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
- /* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020000);
- put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ smram->cr4 = kvm_read_cr4(vcpu);
+ smram->smm_revision = 0x00020000;
+ smram->smbase = vcpu->arch.smbase;
}
#ifdef CONFIG_X86_64
@@ -9329,7 +9318,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
enter_smm_save_state_64(vcpu, (char *)&smram);
else
#endif
- enter_smm_save_state_32(vcpu, (char *)&smram);
+ enter_smm_save_state_32(vcpu, &smram.smram32);
/*
* Give enter_smm() a chance to make ISA-specific changes to the vCPU

View file

@ -0,0 +1,279 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:08 +0300
Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 64 bit smram
load/restore
Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state
(used when X86_FEATURE_LM is present in the guest CPUID,
regardless of 32-bitness of the guest).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 88 ++++++++++++++----------------------------
arch/x86/kvm/x86.c | 75 ++++++++++++++++-------------------
2 files changed, 62 insertions(+), 101 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0dd18d66f3b7..37c1662b5508 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2357,24 +2357,16 @@ static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
}
#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
- int n)
+static void rsm_load_seg_64(struct x86_emulate_ctxt *ctxt,
+ const struct kvm_smm_seg_state_64 *state,
+ int n)
{
struct desc_struct desc;
- int offset;
- u16 selector;
- u32 base3;
-
- offset = 0x7e00 + n * 16;
-
- selector = GET_SMSTATE(u16, smstate, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- base3 = GET_SMSTATE(u32, smstate, offset + 12);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
- return X86EMUL_CONTINUE;
+ rsm_set_desc_flags(&desc, state->attributes << 8);
+ set_desc_limit(&desc, state->limit);
+ set_desc_base(&desc, (u32)state->base);
+ ctxt->ops->set_segment(ctxt, state->selector, &desc, state->base >> 32, n);
}
#endif
@@ -2468,71 +2460,49 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ const struct kvm_smram_state_64 *smstate)
{
- struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr3, cr4;
- u32 base3;
- u16 selector;
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
+ *reg_write(ctxt, i) = smstate->gprs[15 - i];
- ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+ ctxt->_eip = smstate->rip;
+ ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u64, smstate, 0x7f68);
-
- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u64, smstate, 0x7f60);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
return X86EMUL_UNHANDLEABLE;
- cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
- cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
- cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
- val = GET_SMSTATE(u64, smstate, 0x7ed0);
+ ctxt->ops->set_smbase(ctxt, smstate->smbase);
- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ if (ctxt->ops->set_msr(ctxt, MSR_EFER, smstate->efer & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;
- selector = GET_SMSTATE(u32, smstate, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
- base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+ rsm_load_seg_64(ctxt, &smstate->tr, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
+ dt.size = smstate->idtr.limit;
+ dt.address = smstate->idtr.base;
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smstate, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
- base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+ rsm_load_seg_64(ctxt, &smstate->ldtr, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
+ dt.size = smstate->gdtr.limit;
+ dt.address = smstate->gdtr.base;
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ r = rsm_enter_protected_mode(ctxt, smstate->cr0, smstate->cr3, smstate->cr4);
if (r != X86EMUL_CONTINUE)
return r;
- for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
+ rsm_load_seg_64(ctxt, &smstate->es, VCPU_SREG_ES);
+ rsm_load_seg_64(ctxt, &smstate->cs, VCPU_SREG_CS);
+ rsm_load_seg_64(ctxt, &smstate->ss, VCPU_SREG_SS);
+ rsm_load_seg_64(ctxt, &smstate->ds, VCPU_SREG_DS);
+ rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
+ rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
return X86EMUL_CONTINUE;
}
@@ -2607,7 +2577,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, (const char *)&smram);
+ ret = rsm_load_state_64(ctxt, &smram.smram64);
else
#endif
ret = rsm_load_state_32(ctxt, &smram.smram32);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 678318e5e0b4..de1b88836442 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9190,20 +9190,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
}
#ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
+ struct kvm_smm_seg_state_64 *state,
+ int n)
{
struct kvm_segment seg;
- int offset;
- u16 flags;
kvm_get_segment(vcpu, &seg, n);
- offset = 0x7e00 + n * 16;
-
- flags = enter_smm_get_segment_flags(&seg) >> 8;
- put_smstate(u16, buf, offset, seg.selector);
- put_smstate(u16, buf, offset + 2, flags);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u64, buf, offset + 8, seg.base);
+ state->selector = seg.selector;
+ state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
+ state->limit = seg.limit;
+ state->base = seg.base;
}
#endif
@@ -9251,57 +9248,51 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
}
#ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_state_64 *smram)
{
struct desc_ptr dt;
- struct kvm_segment seg;
unsigned long val;
int i;
for (i = 0; i < 16; i++)
- put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+ smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
+
+ smram->rip = kvm_rip_read(vcpu);
+ smram->rflags = kvm_get_rflags(vcpu);
- put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
kvm_get_dr(vcpu, 6, &val);
- put_smstate(u64, buf, 0x7f68, val);
+ smram->dr6 = val;
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u64, buf, 0x7f60, val);
-
- put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+ smram->dr7 = val;
- put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+ smram->cr0 = kvm_read_cr0(vcpu);
+ smram->cr3 = kvm_read_cr3(vcpu);
+ smram->cr4 = kvm_read_cr4(vcpu);
- /* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020064);
+ smram->smbase = vcpu->arch.smbase;
+ smram->smm_revison = 0x00020064;
- put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+ smram->efer = vcpu->arch.efer;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u16, buf, 0x7e90, seg.selector);
- put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e94, seg.limit);
- put_smstate(u64, buf, 0x7e98, seg.base);
+ enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e84, dt.size);
- put_smstate(u64, buf, 0x7e88, dt.address);
+ smram->idtr.limit = dt.size;
+ smram->idtr.base = dt.address;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u16, buf, 0x7e70, seg.selector);
- put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e74, seg.limit);
- put_smstate(u64, buf, 0x7e78, seg.base);
+ enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e64, dt.size);
- put_smstate(u64, buf, 0x7e68, dt.address);
+ smram->gdtr.limit = dt.size;
+ smram->gdtr.base = dt.address;
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_64(vcpu, buf, i);
+ enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
+ enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
+ enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
+ enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
+ enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
+ enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
}
#endif
@@ -9315,7 +9306,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
memset(smram.bytes, 0, sizeof(smram.bytes));
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, (char *)&smram);
+ enter_smm_save_state_64(vcpu, &smram.smram64);
else
#endif
enter_smm_save_state_32(vcpu, &smram.smram32);

View file

@ -0,0 +1,98 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:09 +0300
Subject: [PATCH] KVM: x86: SVM: use smram structs
This removes the last user of put_smstate/GET_SMSTATE so
remove these functions as well.
Also add a sanity check that we don't attempt to enter the SMM
on non long mode capable guest CPU with a running nested guest.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/include/asm/kvm_host.h | 6 ------
arch/x86/kvm/svm/svm.c | 21 ++++++---------------
2 files changed, 6 insertions(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c4e382af1853..932c0f659468 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1937,12 +1937,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#endif
}
-#define put_smstate(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
-
-#define GET_SMSTATE(type, buf, offset) \
- (*(type *)((buf) + (offset) - 0x7e00))
-
int kvm_cpu_dirty_log_size(void);
int alloc_all_memslots_rmaps(struct kvm *kvm);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 545e321998d3..fe340de913b2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4323,15 +4323,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
struct kvm_host_map map_save;
int ret;
- char *smstate = (char *)smram;
-
if (!is_guest_mode(vcpu))
return 0;
- /* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1);
- /* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ smram->smram64.svm_guest_flag = 1;
+ smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4370,28 +4366,23 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
- u64 saved_efer, vmcb12_gpa;
struct vmcb *vmcb12;
int ret;
- const char *smstate = (const char *)smram;
-
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
return 0;
/* Non-zero if SMI arrived while vCPU was in guest mode. */
- if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+ if (!smram->smram64.svm_guest_flag)
return 0;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
return 1;
- saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
- if (!(saved_efer & EFER_SVME))
+ if (!(smram->smram64.efer & EFER_SVME))
return 1;
- vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram->smram64.svm_guest_vmcb_gpa), &map) == -EINVAL)
return 1;
ret = 1;
@@ -4416,7 +4407,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
vmcb12 = map.hva;
nested_load_control_from_vmcb12(svm, &vmcb12->control);
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+ ret = enter_svm_guest_mode(vcpu, smram->smram64.svm_guest_vmcb_gpa, vmcb12, false);
if (ret)
goto unmap_save;

View file

@ -0,0 +1,40 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:10 +0300
Subject: [PATCH] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not
long mode capable
When the guest CPUID doesn't have support for long mode, 32 bit SMRAM
layout is used and it has no support for preserving EFER and/or SVM
state.
Note that this isn't relevant to running 32 bit guests on VM which is
long mode capable - such VM can still run 32 bit guests in compatibility
mode.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/svm/svm.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fe340de913b2..9d65aaa6dd76 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4326,6 +4326,15 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
if (!is_guest_mode(vcpu))
return 0;
+ /*
+ * 32 bit SMRAM format doesn't preserve EFER and SVM state.
+ * SVM should not be enabled by the userspace without marking
+ * the CPU as at least long mode capable.
+ */
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ return 1;
+
smram->smram64.svm_guest_flag = 1;
smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;

View file

@ -0,0 +1,180 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:11 +0300
Subject: [PATCH] KVM: x86: emulator/smm: preserve interrupt shadow in SMRAM
When #SMI is asserted, the CPU can be in interrupt shadow
due to sti or mov ss.
It is not mandatory in Intel/AMD prm to have the #SMI
blocked during the shadow, and on top of
that, since neither SVM nor VMX has true support for SMI
window, waiting for one instruction would mean single stepping
the guest.
Instead, allow #SMI in this case, but both reset the interrupt
window and stash its value in SMRAM to restore it on exit
from SMM.
This fixes rare failures seen mostly on windows guests on VMX,
when #SMI falls on the sti instruction which mainfest in
VM entry failure due to EFLAGS.IF not being set, but STI interrupt
window still being set in the VMCS.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 17 ++++++++++++++---
arch/x86/kvm/kvm_emulate.h | 10 ++++++----
arch/x86/kvm/x86.c | 12 ++++++++++++
3 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 37c1662b5508..b70adbee03b7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2419,7 +2419,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
const struct kvm_smram_state_32 *smstate)
{
struct desc_ptr dt;
- int i;
+ int i, r;
ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
ctxt->_eip = smstate->eip;
@@ -2454,8 +2454,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->ops->set_smbase(ctxt, smstate->smbase);
- return rsm_enter_protected_mode(ctxt, smstate->cr0,
- smstate->cr3, smstate->cr4);
+ r = rsm_enter_protected_mode(ctxt, smstate->cr0,
+ smstate->cr3, smstate->cr4);
+
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
+ ctxt->ops->set_int_shadow(ctxt, 0);
+ ctxt->interruptibility = (u8)smstate->int_shadow;
+
+ return X86EMUL_CONTINUE;
}
#ifdef CONFIG_X86_64
@@ -2504,6 +2512,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
+ ctxt->ops->set_int_shadow(ctxt, 0);
+ ctxt->interruptibility = (u8)smstate->int_shadow;
+
return X86EMUL_CONTINUE;
}
#endif
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 3b37b3e17379..a64c190abf28 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -231,6 +231,7 @@ struct x86_emulate_ops {
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
+ void (*set_int_shadow)(struct x86_emulate_ctxt *ctxt, u8 shadow);
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
@@ -497,7 +498,8 @@ struct kvm_smram_state_32 {
u32 reserved1[62];
u32 smbase;
u32 smm_revision;
- u32 reserved2[5];
+ u32 reserved2[4];
+ u32 int_shadow; /* KVM extension */
u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
u32 reserved3[5];
@@ -545,6 +547,7 @@ static inline void __check_smram32_offsets(void)
__CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
__CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
__CHECK_SMRAM32_OFFSET(reserved2, 0xFF00);
+ __CHECK_SMRAM32_OFFSET(int_shadow, 0xFF10);
__CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
__CHECK_SMRAM32_OFFSET(reserved3, 0xFF18);
__CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
@@ -604,7 +607,7 @@ struct kvm_smram_state_64 {
u64 io_restart_rsi;
u64 io_restart_rdi;
u32 io_restart_dword;
- u32 reserved1;
+ u32 int_shadow;
u8 io_inst_restart;
u8 auto_hlt_restart;
u8 reserved2[6];
@@ -642,7 +645,6 @@ struct kvm_smram_state_64 {
u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
};
-
static inline void __check_smram64_offsets(void)
{
#define __CHECK_SMRAM64_OFFSET(field, offset) \
@@ -663,7 +665,7 @@ static inline void __check_smram64_offsets(void)
__CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
__CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
__CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
- __CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
+ __CHECK_SMRAM64_OFFSET(int_shadow, 0xFEC4);
__CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
__CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
__CHECK_SMRAM64_OFFSET(reserved2, 0xFECA);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index de1b88836442..e95c254b450f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7427,6 +7427,11 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
}
+static void emulator_set_int_shadow(struct x86_emulate_ctxt *ctxt, u8 shadow)
+{
+ static_call(kvm_x86_set_interrupt_shadow)(emul_to_vcpu(ctxt), shadow);
+}
+
static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
{
return emul_to_vcpu(ctxt)->arch.hflags;
@@ -7496,6 +7501,7 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_has_fxsr = emulator_guest_has_fxsr,
.guest_has_rdpid = emulator_guest_has_rdpid,
.set_nmi_mask = emulator_set_nmi_mask,
+ .set_int_shadow = emulator_set_int_shadow,
.get_hflags = emulator_get_hflags,
.exiting_smm = emulator_exiting_smm,
.leave_smm = emulator_leave_smm,
@@ -9245,6 +9251,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
smram->cr4 = kvm_read_cr4(vcpu);
smram->smm_revision = 0x00020000;
smram->smbase = vcpu->arch.smbase;
+
+ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
}
#ifdef CONFIG_X86_64
@@ -9293,6 +9301,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_stat
enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
+
+ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
}
#endif
@@ -9329,6 +9339,8 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0x8000);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
static_call(kvm_x86_set_cr0)(vcpu, cr0);
vcpu->arch.cr0 = cr0;

View file

@ -4,4 +4,21 @@ pve/0003-bridge-keep-MAC-of-first-assigned-port.patch
pve/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch
pve/0005-kvm-disable-default-dynamic-halt-polling-growth.patch
pve/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch
pve/0007-Revert-fortify-Do-not-cast-to-unsigned-char.patch
pve/0007-Revert-PCI-Coalesce-host-bridge-contiguous-apertures.patch
pve/0008-PCI-Reinstate-PCI-Coalesce-host-bridge-contiguous-ap.patch
pve/0009-do-not-generate-split-BTF-type-info-per-default.patch
pve/0010-blk-cgroup-always-terminate-io.stat-lines.patch
pve/0011-drivers-firmware-Don-t-mark-as-busy-the-simple-frame.patch
pve/0012-drm-simpledrm-Request-memory-region-in-driver.patch
pve/0013-fbdev-simplefb-Request-memory-region-in-driver.patch
pve/0014-NFSv4.1-provide-mount-option-to-toggle-trunking-disc.patch
pve/0015-EDAC-amd64-Add-PCI-device-IDs-for-family-19h-model-5.patch
pve/0016-bug-introduce-ASSERT_STRUCT_OFFSET.patch
pve/0017-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch
pve/0018-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch
pve/0019-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch
pve/0020-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch
pve/0021-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch
pve/0022-KVM-x86-SVM-use-smram-structs.patch
pve/0023-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch
pve/0024-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch