Add Linux 6.0 (#324)

This change adds a release for Linux 6.0 for the Proxmox Edge kernels.
This commit is contained in:
Fabian Mastenbroek 2022-10-27 11:39:29 +02:00 committed by GitHub
commit 0ae6ef51d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
55 changed files with 4523 additions and 2229 deletions

View file

@ -11,7 +11,7 @@ jobs:
- name: Clean Workspace - name: Clean Workspace
run: rm -rf *.deb *.ddeb *.build *.buildinfo *.changes run: rm -rf *.deb *.ddeb *.build *.buildinfo *.changes
- name: Checkout Sources - name: Checkout Sources
uses: actions/checkout@v2 uses: actions/checkout@v3
with: with:
submodules: recursive submodules: recursive
path: pve-edge-kernel path: pve-edge-kernel
@ -24,7 +24,7 @@ jobs:
debian/rules debian/control debian/rules debian/control
debuild -e CCACHE_DIR=/var/cache/ccache --prepend-path=/usr/lib/ccache --jobs=auto -b -uc -us debuild -e CCACHE_DIR=/var/cache/ccache --prepend-path=/usr/lib/ccache --jobs=auto -b -uc -us
- name: Upload Artifacts - name: Upload Artifacts
uses: actions/upload-artifact@v2-preview uses: actions/upload-artifact@v3
with: with:
name: debs name: debs
path: "*.deb" path: "*.deb"

View file

@ -15,7 +15,7 @@ jobs:
if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true }} if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true }}
strategy: strategy:
matrix: matrix:
debian: [buster, bullseye] debian: [bullseye]
include: include:
- build_profile: 'generic' - build_profile: 'generic'
build_cc: gcc build_cc: gcc
@ -24,7 +24,7 @@ jobs:
- name: Clean Workspace - name: Clean Workspace
run: rm -rf *.deb *.ddeb *.build *.buildinfo *.changes run: rm -rf *.deb *.ddeb *.build *.buildinfo *.changes
- name: Checkout Sources - name: Checkout Sources
uses: actions/checkout@v2 uses: actions/checkout@v3
with: with:
submodules: recursive submodules: recursive
path: pve-edge-kernel path: pve-edge-kernel
@ -44,7 +44,7 @@ jobs:
PVE_KERNEL_CFLAGS: ${{ matrix.build_cflags }} PVE_KERNEL_CFLAGS: ${{ matrix.build_cflags }}
PVE_ZFS_CC: ${{ matrix.build_cc }} PVE_ZFS_CC: ${{ matrix.build_cc }}
- name: Upload Artifacts - name: Upload Artifacts
uses: actions/upload-artifact@v2 uses: actions/upload-artifact@v3
with: with:
name: ${{ matrix.debian }}-${{ matrix.build_profile }} name: ${{ matrix.debian }}-${{ matrix.build_profile }}
path: "*.deb" path: "*.deb"
@ -55,11 +55,11 @@ jobs:
needs: build needs: build
steps: steps:
- name: Checkout Sources - name: Checkout Sources
uses: actions/checkout@v2 uses: actions/checkout@v3
with: with:
submodules: recursive submodules: recursive
- name: Download Artifacts - name: Download Artifacts
uses: actions/download-artifact@v2 uses: actions/download-artifact@v3
with: with:
path: artifacts path: artifacts
- name: Delete Debug Symbols - name: Delete Debug Symbols
@ -81,14 +81,10 @@ jobs:
tag: v${{ steps.format_release.outputs.release }} tag: v${{ steps.format_release.outputs.release }}
body: ${{ steps.format_release.outputs.changelog }} body: ${{ steps.format_release.outputs.changelog }}
token: ${{ secrets.PAT }} token: ${{ secrets.PAT }}
artifacts: "artifacts/buster-generic/pve-headers-*.deb,artifacts/buster-generic/pve-kernel-*.deb,artifacts/buster-generic/linux-tools-*.deb" artifacts: "artifacts/bullseye-generic/pve-headers-*.deb,artifacts/bullseye-generic/pve-kernel-*.deb,artifacts/bullseye-generic/linux-tools-*.deb"
- name: Release to CloudSmith - name: Release to CloudSmith
run: | run: |
pip install --upgrade cloudsmith-cli pip install --upgrade cloudsmith-cli
find artifacts/buster-generic \
-name '*.deb' \
-not -name "*dbgsym*" \
-exec cloudsmith push deb pve-edge/kernel/debian/buster {} --republish \;
find artifacts/bullseye-generic \ find artifacts/bullseye-generic \
-name '*.deb' \ -name '*.deb' \
-not -name "*dbgsym*" \ -not -name "*dbgsym*" \

View file

@ -19,7 +19,7 @@ jobs:
runs-on: [ubuntu-latest] runs-on: [ubuntu-latest]
steps: steps:
- name: Checkout Sources - name: Checkout Sources
uses: actions/checkout@v2 uses: actions/checkout@v3
with: with:
submodules: recursive submodules: recursive
- name: Setup System Dependencies - name: Setup System Dependencies
@ -42,7 +42,7 @@ jobs:
echo "::set-output name=changelog::$changelog" echo "::set-output name=changelog::$changelog"
echo "::set-output name=branch::${GITHUB_REF##*/}" echo "::set-output name=branch::${GITHUB_REF##*/}"
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v3 uses: peter-evans/create-pull-request@v4
with: with:
token: ${{ secrets.PAT }} # Custom token needed to recursively trigger workflows token: ${{ secrets.PAT }} # Custom token needed to recursively trigger workflows
author: Fabian Mastenbroek <mail.fabianm@gmail.com> author: Fabian Mastenbroek <mail.fabianm@gmail.com>

View file

@ -11,10 +11,10 @@ jobs:
runs-on: [ubuntu-latest] runs-on: [ubuntu-latest]
strategy: strategy:
matrix: matrix:
branch: ['v5.19.x'] branch: ['v6.0.x']
steps: steps:
- name: Checkout Sources - name: Checkout Sources
uses: actions/checkout@v2 uses: actions/checkout@v3
with: with:
ref: ${{ matrix.branch }} ref: ${{ matrix.branch }}
submodules: recursive submodules: recursive

View file

@ -1,9 +1,9 @@
# Proxmox Edge kernels # Proxmox Edge kernels
Custom Linux kernels for Proxmox VE. Custom Linux kernels for Proxmox VE 7.
#### Available Versions #### Available Versions
1. Linux 5.19 1. Linux 6.0
1. Linux 5.18 2. Linux 5.19 **[EOL]**
Older builds are still available at the [Releases](https://github.com/fabianishere/pve-edge-kernel/releases) page. Older builds are still available at the [Releases](https://github.com/fabianishere/pve-edge-kernel/releases) page.
@ -16,18 +16,13 @@ First, set up our Debian repository on your Proxmox installation:
curl -1sLf 'https://dl.cloudsmith.io/public/pve-edge/kernel/gpg.8EC01CCF309B98E7.key' | gpg --dearmor -o /usr/share/keyrings/pve-edge-kernel.gpg curl -1sLf 'https://dl.cloudsmith.io/public/pve-edge/kernel/gpg.8EC01CCF309B98E7.key' | gpg --dearmor -o /usr/share/keyrings/pve-edge-kernel.gpg
``` ```
2. **Set up the `pve-edge-kernel` repository:** 2. **Set up the `pve-edge-kernel` repository:**
If you are still on _Proxmox VE 6_, pick the Buster-based repository:
```bash
echo "deb [signed-by=/usr/share/keyrings/pve-edge-kernel.gpg] https://dl.cloudsmith.io/public/pve-edge/kernel/deb/debian buster main" > /etc/apt/sources.list.d/pve-edge-kernel.list
```
If you are already on _Proxmox VE 7_, pick the Bullseye-based repository:
```bash ```bash
echo "deb [signed-by=/usr/share/keyrings/pve-edge-kernel.gpg] https://dl.cloudsmith.io/public/pve-edge/kernel/deb/debian bullseye main" > /etc/apt/sources.list.d/pve-edge-kernel.list echo "deb [signed-by=/usr/share/keyrings/pve-edge-kernel.gpg] https://dl.cloudsmith.io/public/pve-edge/kernel/deb/debian bullseye main" > /etc/apt/sources.list.d/pve-edge-kernel.list
``` ```
3. **Install a kernel package:** 3. **Install a kernel package:**
```bash ```bash
apt update apt update
apt install pve-kernel-5.18-edge apt install pve-kernel-6.0-edge
``` ```
Package repository hosting is graciously provided by [Cloudsmith](https://cloudsmith.com). Package repository hosting is graciously provided by [Cloudsmith](https://cloudsmith.com).
@ -44,17 +39,6 @@ Then, you can install the package as follows:
apt install ./pve-kernel-VERSION_amd64.deb apt install ./pve-kernel-VERSION_amd64.deb
``` ```
## AppArmor intervention
Previously, these kernels required changing the AppArmor feature file to a non-default version.
This issue has been fixed since version 5.16.
If you have used the workaround, please update back to the default configuration in `/etc/apparmor/parser.conf` as follows:
```diff
## Pin feature set (avoid regressions when policy is lagging behind
## the kernel)
- compile-features=/usr/share/apparmor-features/features.stock
+ compile-features=/usr/share/apparmor-features/features
```
## Building manually ## Building manually
You may also choose to manually build one of these kernels yourself. You may also choose to manually build one of these kernels yourself.
@ -65,24 +49,6 @@ packages installed:
```bash ```bash
apt install devscripts debhelper equivs git apt install devscripts debhelper equivs git
``` ```
In case you are building a kernel version >= 5.8, make sure you have installed
at least [dwarves >= 1.16.0](https://packages.debian.org/bullseye/dwarves).
This version is currently is not available in the main repository.
To work around this issue, we describe two options:
1. You may add the Debian Buster Backports repository to your APT sources as described
[here](https://backports.debian.org/Instructions/) and install the
newer `dwarves` package as follows:
```shell
apt install -t buster-backports dwarves
```
2. Alternatively, you may [download](https://packages.debian.org/bullseye/dwarves)
the newer `dwarves` (>= 1.16) package from the Debian website and install the
package manually, for example:
```shell
wget http://ftp.us.debian.org/debian/pool/main/d/dwarves-dfsg/dwarves_1.17-1_amd64.deb
apt install ./dwarves_1.17-1_amd64.deb
```
#### Obtaining the source #### Obtaining the source
Obtain the source code as follows: Obtain the source code as follows:
@ -90,9 +56,9 @@ Obtain the source code as follows:
git clone https://github.com/fabianishere/pve-edge-kernel git clone https://github.com/fabianishere/pve-edge-kernel
cd pve-edge-kernel cd pve-edge-kernel
``` ```
Then, select the branch of your likings (e.g. `v5.10.x`) and update the submodules: Then, select the branch of your likings (e.g. `v6.0.x`) and update the submodules:
```bash ```bash
git checkout v5.10.x git checkout v6.0.x
git submodule update --init --depth=1 --recursive linux git submodule update --init --depth=1 --recursive linux
git submodule update --init --recursive git submodule update --init --recursive
``` ```
@ -129,7 +95,7 @@ to remove all packages from a particular kernel release, use the following
command: command:
```bash ```bash
apt remove pve-kernel-5.19*edge pve-headers-5.19*edge apt remove pve-kernel-6.0*edge pve-headers-6.0*edge
``` ```
## Contributing ## Contributing

Binary file not shown.

7
debian/changelog vendored
View file

@ -1,3 +1,10 @@
pve-kernel (6.0.0-1) edge; urgency=medium
* Update to Linux 6.0
* Update to ZFS 2.1.6
-- Fabian Mastenbroek <mail.fabianm@gmail.com> Mon, 03 Oct 2022 16:00:00 +0000
pve-kernel (5.19.0-1) edge; urgency=medium pve-kernel (5.19.0-1) edge; urgency=medium
* Update to Linux 5.19 * Update to Linux 5.19

View file

@ -21,10 +21,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 7 insertions(+), 3 deletions(-) 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index 4ae735039daf..5a1abe7b4169 100755 index ca40a5258c87..6ae930a732f0 100755
--- a/scripts/mkcompile_h --- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h +++ b/scripts/mkcompile_h
@@ -24,10 +24,14 @@ else @@ -23,10 +23,14 @@ else
VERSION=$KBUILD_BUILD_VERSION VERSION=$KBUILD_BUILD_VERSION
fi fi

View file

@ -19,7 +19,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 4 deletions(-) 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index ba55851fe132..82675e1ecfb8 100644 index 75204d36d7f9..1fb5ff73ec1e 100644
--- a/net/bridge/br_stp_if.c --- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c
@@ -265,10 +265,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) @@ -265,10 +265,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)

View file

@ -13,10 +13,10 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2caba2828982..2080de085df5 100644 index c56861ed0e38..3ba11c5e7acd 100644
--- a/virt/kvm/kvm_main.c --- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c
@@ -77,7 +77,7 @@ module_param(halt_poll_ns, uint, 0644); @@ -79,7 +79,7 @@ module_param(halt_poll_ns, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns); EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */ /* Default doubles per-vcpu halt_poll_ns. */

View file

@ -10,12 +10,12 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c diff --git a/net/core/dev.c b/net/core/dev.c
index 2771fd22dc6a..37e18f93de23 100644 index ecaeb3ef8e5c..81f117f43bbe 100644
--- a/net/core/dev.c --- a/net/core/dev.c
+++ b/net/core/dev.c +++ b/net/core/dev.c
@@ -10177,7 +10177,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) @@ -10268,7 +10268,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
if (time_after(jiffies, warning_time + if (time_after(jiffies, warning_time +
netdev_unregister_timeout_secs * HZ)) { READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) { list_for_each_entry(dev, list, todo_list) {
- pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", - pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
+ pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", + pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",

View file

@ -1,104 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Mon, 27 Sep 2021 11:28:39 +0200
Subject: [PATCH] Revert "PCI: Coalesce host bridge contiguous apertures"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit ab20e43b20b60f5cc8e2ea3763ffa388158469ac.
was reverted upstream because of reports similar to
Link: https://bugzilla.proxmox.com/show_bug.cgi?id=3552
Link: https://lore.kernel.org/r/20210709231529.GA3270116@roeck-us.net
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/pci/probe.c | 50 ++++-----------------------------------------
1 file changed, 4 insertions(+), 46 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cb70d2605e97..258350f80f6c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -20,7 +20,6 @@
#include <linux/irqdomain.h>
#include <linux/pm_runtime.h>
#include <linux/bitfield.h>
-#include <linux/list_sort.h>
#include "pci.h"
#define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
@@ -881,31 +880,14 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
dev_set_msi_domain(&bus->dev, d);
}
-static int res_cmp(void *priv, const struct list_head *a,
- const struct list_head *b)
-{
- struct resource_entry *entry1, *entry2;
-
- entry1 = container_of(a, struct resource_entry, node);
- entry2 = container_of(b, struct resource_entry, node);
-
- if (entry1->res->flags != entry2->res->flags)
- return entry1->res->flags > entry2->res->flags;
-
- if (entry1->offset != entry2->offset)
- return entry1->offset > entry2->offset;
-
- return entry1->res->start > entry2->res->start;
-}
-
static int pci_register_host_bridge(struct pci_host_bridge *bridge)
{
struct device *parent = bridge->dev.parent;
- struct resource_entry *window, *next, *n;
+ struct resource_entry *window, *n;
struct pci_bus *bus, *b;
- resource_size_t offset, next_offset;
+ resource_size_t offset;
LIST_HEAD(resources);
- struct resource *res, *next_res;
+ struct resource *res;
char addr[64], *fmt;
const char *name;
int err;
@@ -988,35 +970,11 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
- /* Sort and coalesce contiguous windows */
- list_sort(NULL, &resources, res_cmp);
- resource_list_for_each_entry_safe(window, n, &resources) {
- if (list_is_last(&window->node, &resources))
- break;
-
- next = list_next_entry(window, node);
- offset = window->offset;
- res = window->res;
- next_offset = next->offset;
- next_res = next->res;
-
- if (res->flags != next_res->flags || offset != next_offset)
- continue;
-
- if (res->end + 1 == next_res->start) {
- next_res->start = res->start;
- res->flags = res->start = res->end = 0;
- }
- }
-
/* Add initial resources to the bus */
resource_list_for_each_entry_safe(window, n, &resources) {
+ list_move_tail(&window->node, &bridge->windows);
offset = window->offset;
res = window->res;
- if (!res->end)
- continue;
-
- list_move_tail(&window->node, &bridge->windows);
if (res->flags & IORESOURCE_BUS)
pci_bus_insert_busn_res(bus, bus->number, res->end);

View file

@ -0,0 +1,28 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Thu, 14 Sep 2017 11:09:58 +0200
Subject: [PATCH] do not generate split BTF type info per default
This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d.
It breaks ZFS sometimes:
https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
lib/Kconfig.debug | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e24db4bff19..7766a24d0f19 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -357,7 +357,7 @@ config PAHOLE_HAS_BTF_TAG
these attributes, so make the config depend on CC_IS_CLANG.
config DEBUG_INFO_BTF_MODULES
- def_bool y
+ def_bool n
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
help
Generate compact split BTF type information for kernel modules.

View file

@ -1,112 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 13 Jul 2021 20:50:07 +0800
Subject: [PATCH] PCI: Reinstate "PCI: Coalesce host bridge contiguous
apertures"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Built-in graphics on HP EliteDesk 805 G6 doesn't work because graphics
can't get the BAR it needs:
pci_bus 0000:00: root bus resource [mem 0x10020200000-0x100303fffff window]
pci_bus 0000:00: root bus resource [mem 0x10030400000-0x100401fffff window]
pci 0000:00:08.1: bridge window [mem 0xd2000000-0xd23fffff]
pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100401fffff 64bit pref]
pci 0000:00:08.1: can't claim BAR 15 [mem 0x10030000000-0x100401fffff 64bit pref]: no compatible bridge window
pci 0000:00:08.1: [mem 0x10030000000-0x100401fffff 64bit pref] clipped to [mem 0x10030000000-0x100303fffff 64bit pref]
pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100303fffff 64bit pref]
pci 0000:07:00.0: can't claim BAR 0 [mem 0x10030000000-0x1003fffffff 64bit pref]: no compatible bridge window
pci 0000:07:00.0: can't claim BAR 2 [mem 0x10040000000-0x100401fffff 64bit pref]: no compatible bridge window
However, the root bus has two contiguous apertures that can contain the
child resource requested.
Coalesce contiguous apertures so we can allocate from the entire contiguous
region.
This is the second take of commit 65db04053efe ("PCI: Coalesce host
bridge contiguous apertures"). The original approach sorts the apertures
by address, but that makes NVMe stop working on QEMU ppc:sam460ex:
PCI host bridge to bus 0002:00
pci_bus 0002:00: root bus resource [io 0x0000-0xffff]
pci_bus 0002:00: root bus resource [mem 0xd80000000-0xdffffffff] (bus address [0x80000000-0xffffffff])
pci_bus 0002:00: root bus resource [mem 0xc0ee00000-0xc0eefffff] (bus address [0x00000000-0x000fffff])
After the offending commit:
PCI host bridge to bus 0002:00
pci_bus 0002:00: root bus resource [io 0x0000-0xffff]
pci_bus 0002:00: root bus resource [mem 0xc0ee00000-0xc0eefffff] (bus address [0x00000000-0x000fffff])
pci_bus 0002:00: root bus resource [mem 0xd80000000-0xdffffffff] (bus address [0x80000000-0xffffffff])
Since the apertures on HP EliteDesk 805 G6 are already in ascending
order, doing a precautious sorting is not necessary.
Remove the sorting part to avoid the regression on ppc:sam460ex.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212013
Cc: Guenter Roeck <linux@roeck-us.net>
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/pci/probe.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 258350f80f6c..7ff9fcec365b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -883,11 +883,11 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus)
static int pci_register_host_bridge(struct pci_host_bridge *bridge)
{
struct device *parent = bridge->dev.parent;
- struct resource_entry *window, *n;
+ struct resource_entry *window, *next, *n;
struct pci_bus *bus, *b;
- resource_size_t offset;
+ resource_size_t offset, next_offset;
LIST_HEAD(resources);
- struct resource *res;
+ struct resource *res, *next_res;
char addr[64], *fmt;
const char *name;
int err;
@@ -970,11 +970,34 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE)
dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n");
+ /* Coalesce contiguous windows */
+ resource_list_for_each_entry_safe(window, n, &resources) {
+ if (list_is_last(&window->node, &resources))
+ break;
+
+ next = list_next_entry(window, node);
+ offset = window->offset;
+ res = window->res;
+ next_offset = next->offset;
+ next_res = next->res;
+
+ if (res->flags != next_res->flags || offset != next_offset)
+ continue;
+
+ if (res->end + 1 == next_res->start) {
+ next_res->start = res->start;
+ res->flags = res->start = res->end = 0;
+ }
+ }
+
/* Add initial resources to the bus */
resource_list_for_each_entry_safe(window, n, &resources) {
- list_move_tail(&window->node, &bridge->windows);
offset = window->offset;
res = window->res;
+ if (!res->end)
+ continue;
+
+ list_move_tail(&window->node, &bridge->windows);
if (res->flags & IORESOURCE_BUS)
pci_bus_insert_busn_res(bus, bus->number, res->end);

View file

@ -0,0 +1,31 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Sat, 12 Mar 2022 16:19:10 +0100
Subject: [PATCH] Ubuntu: remove leftover reference to ubuntu/hio driver
A single reference to the hio driver was forgotten when it was removed
recently. While this reference is not a problem for the build itself, it
breaks the __clean target from 'scripts/Makefile.clean' here, as make
cannot enter the "ubuntu/hio" folder for cleaning due to ENOENT.
Fixes: 4ea6dd9afa0a0d ("UBUNTU: Remove ubuntu/hio driver")
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
ubuntu/Makefile | 4 ----
1 file changed, 4 deletions(-)
diff --git a/ubuntu/Makefile b/ubuntu/Makefile
index 27fa95ba242a..3bfc4494c069 100644
--- a/ubuntu/Makefile
+++ b/ubuntu/Makefile
@@ -15,10 +15,6 @@
##
##
##
-obj-$(CONFIG_HIO) += hio/
-##
-##
-##
obj-$(CONFIG_UBUNTU_HOST) += ubuntu-host/
##
##

View file

@ -0,0 +1,529 @@
From c0c8e5258f6aa8e5fa65ce118333b7c1e484c607 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:19 +0300
Subject: [PATCH] KVM: x86: start moving SMM-related functions to new files
Create a new header and source with code related to system management
mode emulation. Entry and exit will move there too; for now,
opportunistically rename put_smstate to PUT_SMSTATE while moving
it to smm.h, and adjust the SMM state saving code.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 6 --
arch/x86/kvm/Makefile | 1 +
arch/x86/kvm/emulate.c | 1 +
arch/x86/kvm/kvm_cache_regs.h | 5 --
arch/x86/kvm/lapic.c | 1 +
arch/x86/kvm/mmu/mmu.c | 1 +
arch/x86/kvm/smm.c | 37 ++++++++
arch/x86/kvm/smm.h | 25 ++++++
arch/x86/kvm/svm/nested.c | 1 +
arch/x86/kvm/svm/svm.c | 5 +-
arch/x86/kvm/vmx/nested.c | 1 +
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 148 ++++++++++++--------------------
arch/x86/kvm/x86.h | 1 +
14 files changed, 128 insertions(+), 106 deletions(-)
create mode 100644 arch/x86/kvm/smm.c
create mode 100644 arch/x86/kvm/smm.h
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa381ab69a19..eed72a164a5c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2077,12 +2077,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#endif
}
-#define put_smstate(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
-
-#define GET_SMSTATE(type, buf, offset) \
- (*(type *)((buf) + (offset) - 0x7e00))
-
int kvm_cpu_dirty_log_size(void);
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 30f244b64523..ec6f7656254b 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,6 +20,7 @@ endif
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o
+kvm-y += smm.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index aacb28c83e43..3c3bf6f66a7e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,6 +30,7 @@
#include "tss.h"
#include "mmu.h"
#include "pmu.h"
+#include "smm.h"
/*
* Operand types
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 3febc342360c..c09174f73a34 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -200,9 +200,4 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
return vcpu->arch.hflags & HF_GUEST_MASK;
}
-static inline bool is_smm(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hflags & HF_SMM_MASK;
-}
-
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9dda989a1cf0..7460d9566119 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -42,6 +42,7 @@
#include "x86.h"
#include "cpuid.h"
#include "hyperv.h"
+#include "smm.h"
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3552e6af3684..60a2c5c75095 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -22,6 +22,7 @@
#include "tdp_mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
+#include "smm.h"
#include "kvm_emulate.h"
#include "cpuid.h"
#include "spte.h"
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
new file mode 100644
index 000000000000..b91c48d91f6e
--- /dev/null
+++ b/arch/x86/kvm/smm.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/kvm_host.h>
+#include "x86.h"
+#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
+#include "smm.h"
+#include "trace.h"
+
+void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
+{
+ trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
+
+ if (entering_smm) {
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ } else {
+ vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
+
+ /* Process a latched INIT or SMI, if any. */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ /*
+ * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
+ * on SMM exit we still need to reload them from
+ * guest memory
+ */
+ vcpu->arch.pdptrs_from_userspace = false;
+ }
+
+ kvm_mmu_reset_context(vcpu);
+}
+
+void process_smi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.smi_pending = true;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
new file mode 100644
index 000000000000..d85d4ccd32dd
--- /dev/null
+++ b/arch/x86/kvm/smm.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_KVM_SMM_H
+#define ASM_KVM_SMM_H
+
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
+#define PUT_SMSTATE(type, buf, offset, val) \
+ *(type *)((buf) + (offset) - 0x7e00) = val
+
+static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
+{
+ kvm_make_request(KVM_REQ_SMI, vcpu);
+ return 0;
+}
+
+static inline bool is_smm(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.hflags & HF_SMM_MASK;
+}
+
+void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
+void process_smi(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 76dcc8a3e849..d6cc9963b04a 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,7 @@
#include "trace.h"
#include "mmu.h"
#include "x86.h"
+#include "smm.h"
#include "cpuid.h"
#include "lapic.h"
#include "svm.h"
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f3813dbacb9f..f4ed4a02b109 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -6,6 +6,7 @@
#include "mmu.h"
#include "kvm_cache_regs.h"
#include "x86.h"
+#include "smm.h"
#include "cpuid.h"
#include "pmu.h"
@@ -4443,9 +4444,9 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
/* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1);
+ PUT_SMSTATE(u64, smstate, 0x7ed8, 1);
/* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ddd4367d4826..e8197915b8b0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -16,6 +16,7 @@
#include "trace.h"
#include "vmx.h"
#include "x86.h"
+#include "smm.h"
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c9b49a09e6b5..dc75de78ceb6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -66,6 +66,7 @@
#include "vmcs12.h"
#include "vmx.h"
#include "x86.h"
+#include "smm.h"
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b0c47b41c264..7e60b4c12b91 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -30,6 +30,7 @@
#include "hyperv.h"
#include "lapic.h"
#include "xen.h"
+#include "smm.h"
#include <linux/clocksource.h>
#include <linux/interrupt.h>
@@ -119,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
-static void process_smi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
@@ -4878,13 +4878,6 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
return 0;
}
-static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
-{
- kvm_make_request(KVM_REQ_SMI, vcpu);
-
- return 0;
-}
-
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
@@ -5095,8 +5088,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
memset(&events->reserved, 0, sizeof(events->reserved));
}
-static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
-
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
@@ -5536,7 +5527,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SMI: {
- r = kvm_vcpu_ioctl_smi(vcpu);
+ r = kvm_inject_smi(vcpu);
break;
}
case KVM_SET_CPUID: {
@@ -8470,29 +8461,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
static int complete_emulated_pio(struct kvm_vcpu *vcpu);
-static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
-{
- trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
-
- if (entering_smm) {
- vcpu->arch.hflags |= HF_SMM_MASK;
- } else {
- vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
-
- /* Process a latched INIT or SMI, if any. */
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
- /*
- * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
- * on SMM exit we still need to reload them from
- * guest memory
- */
- vcpu->arch.pdptrs_from_userspace = false;
- }
-
- kvm_mmu_reset_context(vcpu);
-}
-
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
@@ -9853,16 +9821,16 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
int offset;
kvm_get_segment(vcpu, &seg, n);
- put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- put_smstate(u32, buf, offset + 8, seg.base);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+ PUT_SMSTATE(u32, buf, offset + 8, seg.base);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
}
#ifdef CONFIG_X86_64
@@ -9876,10 +9844,10 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
offset = 0x7e00 + n * 16;
flags = enter_smm_get_segment_flags(&seg) >> 8;
- put_smstate(u16, buf, offset, seg.selector);
- put_smstate(u16, buf, offset + 2, flags);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u64, buf, offset + 8, seg.base);
+ PUT_SMSTATE(u16, buf, offset, seg.selector);
+ PUT_SMSTATE(u16, buf, offset + 2, flags);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u64, buf, offset + 8, seg.base);
}
#endif
@@ -9890,47 +9858,47 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
unsigned long val;
int i;
- put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
for (i = 0; i < 8; i++)
- put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+ PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
kvm_get_dr(vcpu, 6, &val);
- put_smstate(u32, buf, 0x7fcc, (u32)val);
+ PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u32, buf, 0x7fc8, (u32)val);
+ PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u32, buf, 0x7fc4, seg.selector);
- put_smstate(u32, buf, 0x7f64, seg.base);
- put_smstate(u32, buf, 0x7f60, seg.limit);
- put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+ PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u32, buf, 0x7fc0, seg.selector);
- put_smstate(u32, buf, 0x7f80, seg.base);
- put_smstate(u32, buf, 0x7f7c, seg.limit);
- put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+ PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f74, dt.address);
- put_smstate(u32, buf, 0x7f70, dt.size);
+ PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f58, dt.address);
- put_smstate(u32, buf, 0x7f54, dt.size);
+ PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
for (i = 0; i < 6; i++)
enter_smm_save_seg_32(vcpu, buf, i);
- put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
/* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020000);
- put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
+ PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
#ifdef CONFIG_X86_64
@@ -9942,46 +9910,46 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
int i;
for (i = 0; i < 16; i++)
- put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+ PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
- put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
kvm_get_dr(vcpu, 6, &val);
- put_smstate(u64, buf, 0x7f68, val);
+ PUT_SMSTATE(u64, buf, 0x7f68, val);
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u64, buf, 0x7f60, val);
+ PUT_SMSTATE(u64, buf, 0x7f60, val);
- put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
- put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+ PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
/* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020064);
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
- put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+ PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u16, buf, 0x7e90, seg.selector);
- put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e94, seg.limit);
- put_smstate(u64, buf, 0x7e98, seg.base);
+ PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e84, dt.size);
- put_smstate(u64, buf, 0x7e88, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u16, buf, 0x7e70, seg.selector);
- put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e74, seg.limit);
- put_smstate(u64, buf, 0x7e78, seg.base);
+ PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e64, dt.size);
- put_smstate(u64, buf, 0x7e68, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
@@ -10067,12 +10035,6 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
-static void process_smi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.smi_pending = true;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-}
-
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
unsigned long *vcpu_bitmap)
{
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 1926d2cb8e79..cb64fa05405f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -7,6 +7,7 @@
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
+#include "smm.h"
struct kvm_caps {
/* control of guest tsc rate supported? */
--
2.38.1

View file

@ -1,13 +0,0 @@
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25dc20175bba..7ff51a3f65e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -314,7 +314,7 @@ config PAHOLE_HAS_SPLIT_BTF
def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
config DEBUG_INFO_BTF_MODULES
- def_bool y
+ def_bool n
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
help
Generate compact split BTF type information for kernel modules.

View file

@ -0,0 +1,559 @@
From 44a9857098c34b64ebe2ce3f60284e5f56412ff8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:20 +0300
Subject: [PATCH] KVM: x86: move SMM entry to a new file
Some users of KVM implement the UEFI variable store through a paravirtual
device that does not require the "SMM lockbox" component of edk2, and
would like to compile out system management mode. In preparation for
that, move the SMM entry code out of x86.c and into a new file.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/smm.c | 235 +++++++++++++++++++++++++++++++
arch/x86/kvm/smm.h | 1 +
arch/x86/kvm/x86.c | 239 +-------------------------------
4 files changed, 239 insertions(+), 237 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eed72a164a5c..5b466eb0feca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1832,6 +1832,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index b91c48d91f6e..26a6859e421f 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -5,6 +5,7 @@
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
#include "smm.h"
+#include "cpuid.h"
#include "trace.h"
void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
@@ -35,3 +36,237 @@ void process_smi(struct kvm_vcpu *vcpu)
vcpu->arch.smi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
+
+static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
+{
+ u32 flags = 0;
+ flags |= seg->g << 23;
+ flags |= seg->db << 22;
+ flags |= seg->l << 21;
+ flags |= seg->avl << 20;
+ flags |= seg->present << 15;
+ flags |= seg->dpl << 13;
+ flags |= seg->s << 12;
+ flags |= seg->type << 8;
+ return flags;
+}
+
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+ struct kvm_segment seg;
+ int offset;
+
+ kvm_get_segment(vcpu, &seg, n);
+ PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ PUT_SMSTATE(u32, buf, offset + 8, seg.base);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+}
+
+#ifdef CONFIG_X86_64
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+ struct kvm_segment seg;
+ int offset;
+ u16 flags;
+
+ kvm_get_segment(vcpu, &seg, n);
+ offset = 0x7e00 + n * 16;
+
+ flags = enter_smm_get_segment_flags(&seg) >> 8;
+ PUT_SMSTATE(u16, buf, offset, seg.selector);
+ PUT_SMSTATE(u16, buf, offset + 2, flags);
+ PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+ PUT_SMSTATE(u64, buf, offset + 8, seg.base);
+}
+#endif
+
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+{
+ struct desc_ptr dt;
+ struct kvm_segment seg;
+ unsigned long val;
+ int i;
+
+ PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+
+ for (i = 0; i < 8; i++)
+ PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+
+ kvm_get_dr(vcpu, 6, &val);
+ PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
+ kvm_get_dr(vcpu, 7, &val);
+ PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+ PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+ PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
+ PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
+ PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
+ PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
+
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
+ PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
+
+ for (i = 0; i < 6; i++)
+ enter_smm_save_seg_32(vcpu, buf, i);
+
+ PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+
+ /* revision id */
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
+ PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
+}
+
+#ifdef CONFIG_X86_64
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+{
+ struct desc_ptr dt;
+ struct kvm_segment seg;
+ unsigned long val;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+
+ PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+ PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+
+ kvm_get_dr(vcpu, 6, &val);
+ PUT_SMSTATE(u64, buf, 0x7f68, val);
+ kvm_get_dr(vcpu, 7, &val);
+ PUT_SMSTATE(u64, buf, 0x7f60, val);
+
+ PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+ PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+
+ PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
+
+ /* revision id */
+ PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
+
+ PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+ PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
+
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
+
+ kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+ PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
+ PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+ PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
+ PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
+
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
+ PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
+ PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
+
+ for (i = 0; i < 6; i++)
+ enter_smm_save_seg_64(vcpu, buf, i);
+}
+#endif
+
+void enter_smm(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs, ds;
+ struct desc_ptr dt;
+ unsigned long cr0;
+ char buf[512];
+
+ memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ enter_smm_save_state_64(vcpu, buf);
+ else
+#endif
+ enter_smm_save_state_32(vcpu, buf);
+
+ /*
+ * Give enter_smm() a chance to make ISA-specific changes to the vCPU
+ * state (e.g. leave guest mode) after we've saved the state into the
+ * SMM state-save area.
+ */
+ static_call(kvm_x86_enter_smm)(vcpu, buf);
+
+ kvm_smm_changed(vcpu, true);
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+ if (static_call(kvm_x86_get_nmi_mask)(vcpu))
+ vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+ else
+ static_call(kvm_x86_set_nmi_mask)(vcpu, true);
+
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+ kvm_rip_write(vcpu, 0x8000);
+
+ cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+ static_call(kvm_x86_set_cr0)(vcpu, cr0);
+ vcpu->arch.cr0 = cr0;
+
+ static_call(kvm_x86_set_cr4)(vcpu, 0);
+
+ /* Undocumented: IDT limit is set to zero on entry to SMM. */
+ dt.address = dt.size = 0;
+ static_call(kvm_x86_set_idt)(vcpu, &dt);
+
+ kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+
+ cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+ cs.base = vcpu->arch.smbase;
+
+ ds.selector = 0;
+ ds.base = 0;
+
+ cs.limit = ds.limit = 0xffffffff;
+ cs.type = ds.type = 0x3;
+ cs.dpl = ds.dpl = 0;
+ cs.db = ds.db = 0;
+ cs.s = ds.s = 1;
+ cs.l = ds.l = 0;
+ cs.g = ds.g = 1;
+ cs.avl = ds.avl = 0;
+ cs.present = ds.present = 1;
+ cs.unusable = ds.unusable = 0;
+ cs.padding = ds.padding = 0;
+
+ kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+ kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ static_call(kvm_x86_set_efer)(vcpu, 0);
+#endif
+
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_mmu_reset_context(vcpu);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index d85d4ccd32dd..aacc6dac2c99 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -20,6 +20,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
}
void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
+void enter_smm(struct kvm_vcpu *vcpu);
void process_smi(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7e60b4c12b91..f86b6be363b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -120,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
-static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
static int sync_regs(struct kvm_vcpu *vcpu);
@@ -7013,8 +7012,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
return handled;
}
-static void kvm_set_segment(struct kvm_vcpu *vcpu,
- struct kvm_segment *var, int seg)
+void kvm_set_segment(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg)
{
static_call(kvm_x86_set_segment)(vcpu, var, seg);
}
@@ -9801,240 +9800,6 @@ static void process_nmi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
-static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
-{
- u32 flags = 0;
- flags |= seg->g << 23;
- flags |= seg->db << 22;
- flags |= seg->l << 21;
- flags |= seg->avl << 20;
- flags |= seg->present << 15;
- flags |= seg->dpl << 13;
- flags |= seg->s << 12;
- flags |= seg->type << 8;
- return flags;
-}
-
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
-{
- struct kvm_segment seg;
- int offset;
-
- kvm_get_segment(vcpu, &seg, n);
- PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- PUT_SMSTATE(u32, buf, offset + 8, seg.base);
- PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
- PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
-}
-
-#ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
-{
- struct kvm_segment seg;
- int offset;
- u16 flags;
-
- kvm_get_segment(vcpu, &seg, n);
- offset = 0x7e00 + n * 16;
-
- flags = enter_smm_get_segment_flags(&seg) >> 8;
- PUT_SMSTATE(u16, buf, offset, seg.selector);
- PUT_SMSTATE(u16, buf, offset + 2, flags);
- PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
- PUT_SMSTATE(u64, buf, offset + 8, seg.base);
-}
-#endif
-
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
-{
- struct desc_ptr dt;
- struct kvm_segment seg;
- unsigned long val;
- int i;
-
- PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
-
- for (i = 0; i < 8; i++)
- PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
-
- kvm_get_dr(vcpu, 6, &val);
- PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
- kvm_get_dr(vcpu, 7, &val);
- PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
- PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
- PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
- PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
- PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
- PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
- PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
-
- static_call(kvm_x86_get_gdt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
- PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
-
- static_call(kvm_x86_get_idt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
- PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
-
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_32(vcpu, buf, i);
-
- PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
-
- /* revision id */
- PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
- PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
-}
-
-#ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
-{
- struct desc_ptr dt;
- struct kvm_segment seg;
- unsigned long val;
- int i;
-
- for (i = 0; i < 16; i++)
- PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
-
- PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
-
- kvm_get_dr(vcpu, 6, &val);
- PUT_SMSTATE(u64, buf, 0x7f68, val);
- kvm_get_dr(vcpu, 7, &val);
- PUT_SMSTATE(u64, buf, 0x7f60, val);
-
- PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
-
- PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
-
- /* revision id */
- PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
-
- PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
- PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
- PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
-
- static_call(kvm_x86_get_idt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
- PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
- PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
- PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
-
- static_call(kvm_x86_get_gdt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
- PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
-
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_64(vcpu, buf, i);
-}
-#endif
-
-static void enter_smm(struct kvm_vcpu *vcpu)
-{
- struct kvm_segment cs, ds;
- struct desc_ptr dt;
- unsigned long cr0;
- char buf[512];
-
- memset(buf, 0, 512);
-#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, buf);
- else
-#endif
- enter_smm_save_state_32(vcpu, buf);
-
- /*
- * Give enter_smm() a chance to make ISA-specific changes to the vCPU
- * state (e.g. leave guest mode) after we've saved the state into the
- * SMM state-save area.
- */
- static_call(kvm_x86_enter_smm)(vcpu, buf);
-
- kvm_smm_changed(vcpu, true);
- kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
-
- if (static_call(kvm_x86_get_nmi_mask)(vcpu))
- vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
- else
- static_call(kvm_x86_set_nmi_mask)(vcpu, true);
-
- kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
- kvm_rip_write(vcpu, 0x8000);
-
- cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
- static_call(kvm_x86_set_cr0)(vcpu, cr0);
- vcpu->arch.cr0 = cr0;
-
- static_call(kvm_x86_set_cr4)(vcpu, 0);
-
- /* Undocumented: IDT limit is set to zero on entry to SMM. */
- dt.address = dt.size = 0;
- static_call(kvm_x86_set_idt)(vcpu, &dt);
-
- kvm_set_dr(vcpu, 7, DR7_FIXED_1);
-
- cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
- cs.base = vcpu->arch.smbase;
-
- ds.selector = 0;
- ds.base = 0;
-
- cs.limit = ds.limit = 0xffffffff;
- cs.type = ds.type = 0x3;
- cs.dpl = ds.dpl = 0;
- cs.db = ds.db = 0;
- cs.s = ds.s = 1;
- cs.l = ds.l = 0;
- cs.g = ds.g = 1;
- cs.avl = ds.avl = 0;
- cs.present = ds.present = 1;
- cs.unusable = ds.unusable = 0;
- cs.padding = ds.padding = 0;
-
- kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
- kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
-
-#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- static_call(kvm_x86_set_efer)(vcpu, 0);
-#endif
-
- kvm_update_cpuid_runtime(vcpu);
- kvm_mmu_reset_context(vcpu);
-}
-
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
unsigned long *vcpu_bitmap)
{
--
2.38.1

View file

@ -1,147 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Tue, 11 Jan 2022 09:31:59 +0100
Subject: [PATCH] blk-cgroup: always terminate io.stat lines
With the removal of seq_get_buf in blkcg_print_one_stat, we
cannot make adding the newline conditional on there being
relevant stats because the name was already written out
unconditionally.
Otherwise we may end up with multiple device names in one
line which is confusing and doesn't follow the nested-keyed
file format.
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
Fixes: 252c651a4c85 ("blk-cgroup: stop using seq_get_buf")
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/blk-cgroup.c | 9 ++-------
block/blk-iocost.c | 5 ++---
block/blk-iolatency.c | 8 +++-----
include/linux/blk-cgroup.h | 2 +-
4 files changed, 8 insertions(+), 16 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 07a2524e6efd..fd09c20a5543 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -887,7 +887,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
struct blkg_iostat_set *bis = &blkg->iostat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
- bool has_stats = false;
const char *dname;
unsigned seq;
int i;
@@ -913,14 +912,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
} while (u64_stats_fetch_retry(&bis->sync, seq));
if (rbytes || wbytes || rios || wios) {
- has_stats = true;
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
rbytes, wbytes, rios, wios,
dbytes, dios);
}
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
- has_stats = true;
seq_printf(s, " use_delay=%d delay_nsec=%llu",
atomic_read(&blkg->use_delay),
atomic64_read(&blkg->delay_nsec));
@@ -932,12 +929,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
if (!blkg->pd[i] || !pol->pd_stat_fn)
continue;
- if (pol->pd_stat_fn(blkg->pd[i], s))
- has_stats = true;
+ pol->pd_stat_fn(blkg->pd[i], s);
}
- if (has_stats)
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
static int blkcg_print_stat(struct seq_file *sf, void *v)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 10851493940c..21db328c0bcc 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3005,13 +3005,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
kfree(iocg);
}
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
if (!ioc->enabled)
- return false;
+ return;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3027,7 +3027,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
iocg->last_stat.wait_us,
iocg->last_stat.indebt_us,
iocg->last_stat.indelay_us);
- return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index c0545f9da549..d33460f3d43d 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -890,7 +890,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
return 0;
}
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
@@ -913,17 +913,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
(unsigned long long)stat.ps.missed,
(unsigned long long)stat.ps.total,
iolat->rq_depth.max_depth);
- return true;
}
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return false;
+ return;
if (iolat->ssd)
return iolatency_ssd_stat(iolat, s);
@@ -936,7 +935,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
else
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
iolat->rq_depth.max_depth, avg_lat, cur_win);
- return true;
}
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index bc5c04d711bb..618359e3beca 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -153,7 +153,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
struct seq_file *s);
struct blkcg_policy {

View file

@ -0,0 +1,835 @@
From 887f248c4502610baed368a9f198cfbb9eabdc10 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:21 +0300
Subject: [PATCH] KVM: x86: move SMM exit to a new file
Some users of KVM implement the UEFI variable store through a paravirtual
device that does not require the "SMM lockbox" component of edk2, and
would like to compile out system management mode. In preparation for
that, move the SMM exit code out of emulate.c and into a new file.
The code is still written as a series of invocations of the emulator
callbacks, but the two exiting_smm and leave_smm callbacks are merged
into one, and all the code from em_rsm is now part of the callback.
This removes all knowledge of the format of the SMM save state area
from the emulator. Further patches will clean up the code and
invoke KVM's own functions to access control registers, descriptor
caches, etc.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/emulate.c | 356 +------------------------------------
arch/x86/kvm/kvm_emulate.h | 34 +++-
arch/x86/kvm/smm.c | 316 ++++++++++++++++++++++++++++++++
arch/x86/kvm/smm.h | 1 +
arch/x86/kvm/x86.c | 14 --
5 files changed, 351 insertions(+), 370 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3c3bf6f66a7e..853262d4fb6c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,7 +30,6 @@
#include "tss.h"
#include "mmu.h"
#include "pmu.h"
-#include "smm.h"
/*
* Operand types
@@ -243,37 +242,6 @@ enum x86_transfer_type {
X86_TRANSFER_TASK_SWITCH,
};
-static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
- if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
- nr &= NR_EMULATOR_GPRS - 1;
-
- if (!(ctxt->regs_valid & (1 << nr))) {
- ctxt->regs_valid |= 1 << nr;
- ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
- }
- return ctxt->_regs[nr];
-}
-
-static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
- if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
- nr &= NR_EMULATOR_GPRS - 1;
-
- BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
- BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
-
- ctxt->regs_valid |= 1 << nr;
- ctxt->regs_dirty |= 1 << nr;
- return &ctxt->_regs[nr];
-}
-
-static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
- reg_read(ctxt, nr);
- return reg_write(ctxt, nr);
-}
-
static void writeback_registers(struct x86_emulate_ctxt *ctxt)
{
unsigned long dirty = ctxt->regs_dirty;
@@ -2308,334 +2276,14 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
return rc;
}
-static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
-{
-#ifdef CONFIG_X86_64
- return ctxt->ops->guest_has_long_mode(ctxt);
-#else
- return false;
-#endif
-}
-
-static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
-{
- desc->g = (flags >> 23) & 1;
- desc->d = (flags >> 22) & 1;
- desc->l = (flags >> 21) & 1;
- desc->avl = (flags >> 20) & 1;
- desc->p = (flags >> 15) & 1;
- desc->dpl = (flags >> 13) & 3;
- desc->s = (flags >> 12) & 1;
- desc->type = (flags >> 8) & 15;
-}
-
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
- int n)
-{
- struct desc_struct desc;
- int offset;
- u16 selector;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
- return X86EMUL_CONTINUE;
-}
-
-#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
- int n)
-{
- struct desc_struct desc;
- int offset;
- u16 selector;
- u32 base3;
-
- offset = 0x7e00 + n * 16;
-
- selector = GET_SMSTATE(u16, smstate, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- base3 = GET_SMSTATE(u32, smstate, offset + 12);
-
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
- return X86EMUL_CONTINUE;
-}
-#endif
-
-static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr3, u64 cr4)
-{
- int bad;
- u64 pcid;
-
- /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
- pcid = 0;
- if (cr4 & X86_CR4_PCIDE) {
- pcid = cr3 & 0xfff;
- cr3 &= ~0xfff;
- }
-
- bad = ctxt->ops->set_cr(ctxt, 3, cr3);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
-
- /*
- * First enable PAE, long mode needs it before CR0.PG = 1 is set.
- * Then enable protected mode. However, PCID cannot be enabled
- * if EFER.LMA=0, so set it separately.
- */
- bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
-
- bad = ctxt->ops->set_cr(ctxt, 0, cr0);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
-
- if (cr4 & X86_CR4_PCIDE) {
- bad = ctxt->ops->set_cr(ctxt, 4, cr4);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
- if (pcid) {
- bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
- if (bad)
- return X86EMUL_UNHANDLEABLE;
- }
-
- }
-
- return X86EMUL_CONTINUE;
-}
-
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
-{
- struct desc_struct desc;
- struct desc_ptr dt;
- u16 selector;
- u32 val, cr0, cr3, cr4;
- int i;
-
- cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
-
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
- val = GET_SMSTATE(u32, smstate, 0x7fcc);
-
- if (ctxt->ops->set_dr(ctxt, 6, val))
- return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
- return X86EMUL_UNHANDLEABLE;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
-
- selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
-
- dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
- ctxt->ops->set_gdt(ctxt, &dt);
-
- dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
- ctxt->ops->set_idt(ctxt, &dt);
-
- for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
-
- cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
-
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
-
- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-}
-
-#ifdef CONFIG_X86_64
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
-{
- struct desc_struct desc;
- struct desc_ptr dt;
- u64 val, cr0, cr3, cr4;
- u32 base3;
- u16 selector;
- int i, r;
-
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
-
- ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
-
- val = GET_SMSTATE(u64, smstate, 0x7f68);
-
- if (ctxt->ops->set_dr(ctxt, 6, val))
- return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u64, smstate, 0x7f60);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
- return X86EMUL_UNHANDLEABLE;
-
- cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
- cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
- cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
- val = GET_SMSTATE(u64, smstate, 0x7ed0);
-
- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
- return X86EMUL_UNHANDLEABLE;
-
- selector = GET_SMSTATE(u32, smstate, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
- base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
-
- dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
- ctxt->ops->set_idt(ctxt, &dt);
-
- selector = GET_SMSTATE(u32, smstate, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
- base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
-
- dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
- ctxt->ops->set_gdt(ctxt, &dt);
-
- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
- if (r != X86EMUL_CONTINUE)
- return r;
-
- for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
-
- return X86EMUL_CONTINUE;
-}
-#endif
-
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
- unsigned long cr0, cr4, efer;
- char buf[512];
- u64 smbase;
- int ret;
-
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
- smbase = ctxt->ops->get_smbase(ctxt);
-
- ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
- if (ret != X86EMUL_CONTINUE)
- return X86EMUL_UNHANDLEABLE;
-
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
-
- ctxt->ops->exiting_smm(ctxt);
-
- /*
- * Get back to real mode, to prepare a safe state in which to load
- * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
- * supports long mode.
- */
- if (emulator_has_longmode(ctxt)) {
- struct desc_struct cs_desc;
-
- /* Zero CR4.PCIDE before CR0.PG. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
- if (cr4 & X86_CR4_PCIDE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
-
- /* A 32-bit code segment is required to clear EFER.LMA. */
- memset(&cs_desc, 0, sizeof(cs_desc));
- cs_desc.type = 0xb;
- cs_desc.s = cs_desc.g = cs_desc.p = 1;
- ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
- }
-
- /* For the 64-bit case, this will clear EFER.LMA. */
- cr0 = ctxt->ops->get_cr(ctxt, 0);
- if (cr0 & X86_CR0_PE)
- ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
-
- if (emulator_has_longmode(ctxt)) {
- /* Clear CR4.PAE before clearing EFER.LME. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
- if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
- /* And finally go back to 32-bit mode. */
- efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
- }
-
- /*
- * Give leave_smm() a chance to make ISA-specific changes to the vCPU
- * state (e.g. enter guest mode) before loading state from the SMM
- * state-save area.
- */
- if (ctxt->ops->leave_smm(ctxt, buf))
- goto emulate_shutdown;
-
-#ifdef CONFIG_X86_64
- if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, buf);
- else
-#endif
- ret = rsm_load_state_32(ctxt, buf);
-
- if (ret != X86EMUL_CONTINUE)
- goto emulate_shutdown;
-
- /*
- * Note, the ctxt->ops callbacks are responsible for handling side
- * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
- * runtime updates, etc... If that changes, e.g. this flow is moved
- * out of the emulator to make it look more like enter_smm(), then
- * those side effects need to be explicitly handled for both success
- * and shutdown.
- */
- return X86EMUL_CONTINUE;
+ if (ctxt->ops->leave_smm(ctxt))
+ ctxt->ops->triple_fault(ctxt);
-emulate_shutdown:
- ctxt->ops->triple_fault(ctxt);
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 89246446d6aa..d7afbc448dd2 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -234,8 +234,7 @@ struct x86_emulate_ops {
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
- void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
- int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
+ int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
};
@@ -526,4 +525,35 @@ void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);
+static inline ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
+ nr &= NR_EMULATOR_GPRS - 1;
+
+ if (!(ctxt->regs_valid & (1 << nr))) {
+ ctxt->regs_valid |= 1 << nr;
+ ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+ }
+ return ctxt->_regs[nr];
+}
+
+static inline ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
+ nr &= NR_EMULATOR_GPRS - 1;
+
+ BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
+ BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
+
+ ctxt->regs_valid |= 1 << nr;
+ ctxt->regs_dirty |= 1 << nr;
+ return &ctxt->_regs[nr];
+}
+
+static inline ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+ reg_read(ctxt, nr);
+ return reg_write(ctxt, nr);
+}
+
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 26a6859e421f..773e07b6397d 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -270,3 +270,319 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_update_cpuid_runtime(vcpu);
kvm_mmu_reset_context(vcpu);
}
+
+static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
+{
+#ifdef CONFIG_X86_64
+ return ctxt->ops->guest_has_long_mode(ctxt);
+#else
+ return false;
+#endif
+}
+
+static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+{
+ desc->g = (flags >> 23) & 1;
+ desc->d = (flags >> 22) & 1;
+ desc->l = (flags >> 21) & 1;
+ desc->avl = (flags >> 20) & 1;
+ desc->p = (flags >> 15) & 1;
+ desc->dpl = (flags >> 13) & 3;
+ desc->s = (flags >> 12) & 1;
+ desc->type = (flags >> 8) & 15;
+}
+
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+
+ selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+ return X86EMUL_CONTINUE;
+}
+
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+ u32 base3;
+
+ offset = 0x7e00 + n * 16;
+
+ selector = GET_SMSTATE(u16, smstate, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ base3 = GET_SMSTATE(u32, smstate, offset + 12);
+
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+ return X86EMUL_CONTINUE;
+}
+#endif
+
+static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+ u64 cr0, u64 cr3, u64 cr4)
+{
+ int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ /*
+ * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+ * Then enable protected mode. However, PCID cannot be enabled
+ * if EFER.LMA=0, so set it separately.
+ */
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ if (cr4 & X86_CR4_PCIDE) {
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u16 selector;
+ u32 val, cr0, cr3, cr4;
+ int i;
+
+ cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
+ cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
+
+ for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
+
+ val = GET_SMSTATE(u32, smstate, 0x7fcc);
+
+ if (ctxt->ops->set_dr(ctxt, 6, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ val = GET_SMSTATE(u32, smstate, 0x7fc8);
+
+ if (ctxt->ops->set_dr(ctxt, 7, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+
+ selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ for (i = 0; i < 6; i++) {
+ int r = rsm_load_seg_32(ctxt, smstate, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+}
+
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u64 val, cr0, cr3, cr4;
+ u32 base3;
+ u16 selector;
+ int i, r;
+
+ for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
+
+ ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+
+ val = GET_SMSTATE(u64, smstate, 0x7f68);
+
+ if (ctxt->ops->set_dr(ctxt, 6, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ val = GET_SMSTATE(u64, smstate, 0x7f60);
+
+ if (ctxt->ops->set_dr(ctxt, 7, val))
+ return X86EMUL_UNHANDLEABLE;
+
+ cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
+ cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ val = GET_SMSTATE(u64, smstate, 0x7ed0);
+
+ if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ return X86EMUL_UNHANDLEABLE;
+
+ selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
+ for (i = 0; i < 6; i++) {
+ r = rsm_load_seg_64(ctxt, smstate, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+#endif
+
+int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ unsigned long cr0, cr4, efer;
+ char buf[512];
+ u64 smbase;
+ int ret;
+
+ smbase = ctxt->ops->get_smbase(ctxt);
+
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ kvm_smm_changed(vcpu, false);
+
+ /*
+ * Get back to real mode, to prepare a safe state in which to load
+ * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
+ * supports long mode.
+ *
+ * The ctxt->ops callbacks will handle all side effects when writing
+ * writing MSRs and CRs, e.g. MMU context resets, CPUID
+ * runtime updates, etc.
+ */
+ if (emulator_has_longmode(ctxt)) {
+ struct desc_struct cs_desc;
+
+ /* Zero CR4.PCIDE before CR0.PG. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PCIDE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+
+ /* A 32-bit code segment is required to clear EFER.LMA. */
+ memset(&cs_desc, 0, sizeof(cs_desc));
+ cs_desc.type = 0xb;
+ cs_desc.s = cs_desc.g = cs_desc.p = 1;
+ ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+ }
+
+ /* For the 64-bit case, this will clear EFER.LMA. */
+ cr0 = ctxt->ops->get_cr(ctxt, 0);
+ if (cr0 & X86_CR0_PE)
+ ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+
+ if (emulator_has_longmode(ctxt)) {
+ /* Clear CR4.PAE before clearing EFER.LME. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+ /* And finally go back to 32-bit mode. */
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ }
+
+ /*
+ * Give leave_smm() a chance to make ISA-specific changes to the vCPU
+ * state (e.g. enter guest mode) before loading state from the SMM
+ * state-save area.
+ */
+ if (static_call(kvm_x86_leave_smm)(vcpu, buf))
+ return X86EMUL_UNHANDLEABLE;
+
+#ifdef CONFIG_X86_64
+ if (emulator_has_longmode(ctxt))
+ return rsm_load_state_64(ctxt, buf);
+ else
+#endif
+ return rsm_load_state_32(ctxt, buf);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index aacc6dac2c99..b0602a92e511 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -21,6 +21,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
void enter_smm(struct kvm_vcpu *vcpu);
+int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
void process_smi(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f86b6be363b0..dae68ef0c3c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8050,19 +8050,6 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
return emul_to_vcpu(ctxt)->arch.hflags;
}
-static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
- kvm_smm_changed(vcpu, false);
-}
-
-static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
-{
- return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
-}
-
static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
{
kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
@@ -8126,7 +8113,6 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_has_rdpid = emulator_guest_has_rdpid,
.set_nmi_mask = emulator_set_nmi_mask,
.get_hflags = emulator_get_hflags,
- .exiting_smm = emulator_exiting_smm,
.leave_smm = emulator_leave_smm,
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
--
2.38.1

View file

@ -1,34 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Tue, 25 Jan 2022 10:12:19 +0100
Subject: [PATCH] drivers/firmware: Don't mark as busy the simple-framebuffer
IO resource
The sysfb_create_simplefb() function requests a IO memory resource for the
simple-framebuffer platform device, but it also marks it as busy which can
lead to drivers requesting the same memory resource to fail.
Let's drop the IORESOURCE_BUSY flag and let drivers to request it as busy
instead.
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/firmware/sysfb_simplefb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c
index 757cc8b9f3de..bda8712bfd8c 100644
--- a/drivers/firmware/sysfb_simplefb.c
+++ b/drivers/firmware/sysfb_simplefb.c
@@ -99,7 +99,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si,
/* setup IORESOURCE_MEM as framebuffer memory */
memset(&res, 0, sizeof(res));
- res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res.flags = IORESOURCE_MEM;
res.name = simplefb_resname;
res.start = base;
res.end = res.start + length - 1;

View file

@ -0,0 +1,500 @@
From fa637dcaf7f19a746fe507349db8b56f49ab03b0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 26 Oct 2022 14:47:45 +0200
Subject: [PATCH] KVM: x86: do not go through ctxt->ops when emulating rsm
Now that RSM is implemented in a single emulator callback, there is no
point in going through other callbacks for the sake of modifying
processor state. Just invoke KVM's own internal functions directly,
and remove the callbacks that were only used by em_rsm; the only
substantial difference is in the handling of the segment registers
and descriptor cache, which have to be parsed into a struct kvm_segment
instead of a struct desc_struct.
This also fixes a bug where emulator_set_segment was shifting the
limit left by 12 if the G bit is set, but the limit had not been
shifted right upon entry to SMM.
The emulator context is still used to restore EIP and the general
purpose registers.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/kvm_emulate.h | 13 ---
arch/x86/kvm/smm.c | 177 +++++++++++++++++--------------------
arch/x86/kvm/x86.c | 33 -------
3 files changed, 81 insertions(+), 142 deletions(-)
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index d7afbc448dd2..84b1f2661463 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -116,16 +116,6 @@ struct x86_emulate_ops {
unsigned int bytes,
struct x86_exception *fault, bool system);
- /*
- * read_phys: Read bytes of standard (non-emulated/special) memory.
- * Used for descriptor reading.
- * @addr: [IN ] Physical address from which to read.
- * @val: [OUT] Value read from memory.
- * @bytes: [IN ] Number of bytes to read from memory.
- */
- int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
- void *val, unsigned int bytes);
-
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for descriptor writing.
@@ -209,11 +199,8 @@ struct x86_emulate_ops {
int (*cpl)(struct x86_emulate_ctxt *ctxt);
void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
- u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
- void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
- int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 773e07b6397d..41ca128478fc 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -271,71 +271,59 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
-static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
-{
-#ifdef CONFIG_X86_64
- return ctxt->ops->guest_has_long_mode(ctxt);
-#else
- return false;
-#endif
-}
-
-static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
- desc->d = (flags >> 22) & 1;
+ desc->db = (flags >> 22) & 1;
desc->l = (flags >> 21) & 1;
desc->avl = (flags >> 20) & 1;
- desc->p = (flags >> 15) & 1;
+ desc->present = (flags >> 15) & 1;
desc->dpl = (flags >> 13) & 3;
desc->s = (flags >> 12) & 1;
desc->type = (flags >> 8) & 15;
+
+ desc->unusable = !desc->present;
+ desc->padding = 0;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
int n)
{
- struct desc_struct desc;
+ struct kvm_segment desc;
int offset;
- u16 selector;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+ desc.base = GET_SMSTATE(u32, smstate, offset + 8);
+ desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+ kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}
#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
int n)
{
- struct desc_struct desc;
+ struct kvm_segment desc;
int offset;
- u16 selector;
- u32 base3;
offset = 0x7e00 + n * 16;
- selector = GET_SMSTATE(u16, smstate, offset);
+ desc.selector = GET_SMSTATE(u16, smstate, offset);
rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- base3 = GET_SMSTATE(u32, smstate, offset + 12);
-
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+ desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
+ desc.base = GET_SMSTATE(u64, smstate, offset + 8);
+ kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}
#endif
-static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
u64 cr0, u64 cr3, u64 cr4)
{
int bad;
@@ -348,7 +336,7 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
cr3 &= ~0xfff;
}
- bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ bad = kvm_set_cr3(vcpu, cr3);
if (bad)
return X86EMUL_UNHANDLEABLE;
@@ -357,20 +345,20 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
* Then enable protected mode. However, PCID cannot be enabled
* if EFER.LMA=0, so set it separately.
*/
- bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
if (bad)
return X86EMUL_UNHANDLEABLE;
- bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+ bad = kvm_set_cr0(vcpu, cr0);
if (bad)
return X86EMUL_UNHANDLEABLE;
if (cr4 & X86_CR4_PCIDE) {
- bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ bad = kvm_set_cr4(vcpu, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
if (pcid) {
- bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ bad = kvm_set_cr3(vcpu, cr3 | pcid);
if (bad)
return X86EMUL_UNHANDLEABLE;
}
@@ -383,9 +371,9 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
- struct desc_struct desc;
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_segment desc;
struct desc_ptr dt;
- u16 selector;
u32 val, cr0, cr3, cr4;
int i;
@@ -399,56 +387,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
val = GET_SMSTATE(u32, smstate, 0x7fcc);
- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (kvm_set_dr(vcpu, 6, val))
return X86EMUL_UNHANDLEABLE;
val = GET_SMSTATE(u32, smstate, 0x7fc8);
- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (kvm_set_dr(vcpu, 7, val))
return X86EMUL_UNHANDLEABLE;
- selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ desc.base = GET_SMSTATE(u32, smstate, 0x7f64);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7f60);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
- selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ desc.base = GET_SMSTATE(u32, smstate, 0x7f80);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
- ctxt->ops->set_gdt(ctxt, &dt);
+ static_call(kvm_x86_set_gdt)(vcpu, &dt);
dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
- ctxt->ops->set_idt(ctxt, &dt);
+ static_call(kvm_x86_set_idt)(vcpu, &dt);
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smstate, i);
+ int r = rsm_load_seg_32(vcpu, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+ vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);
- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
}
#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
- struct desc_struct desc;
+ struct kvm_vcpu *vcpu = ctxt->vcpu;
+ struct kvm_segment desc;
struct desc_ptr dt;
u64 val, cr0, cr3, cr4;
- u32 base3;
- u16 selector;
int i, r;
for (i = 0; i < NR_EMULATOR_GPRS; i++)
@@ -459,51 +446,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
val = GET_SMSTATE(u64, smstate, 0x7f68);
- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (kvm_set_dr(vcpu, 6, val))
return X86EMUL_UNHANDLEABLE;
val = GET_SMSTATE(u64, smstate, 0x7f60);
- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (kvm_set_dr(vcpu, 7, val))
return X86EMUL_UNHANDLEABLE;
cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00);
val = GET_SMSTATE(u64, smstate, 0x7ed0);
- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;
- selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7e90);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
- base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7e94);
+ desc.base = GET_SMSTATE(u64, smstate, 0x7e98);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
- ctxt->ops->set_idt(ctxt, &dt);
+ static_call(kvm_x86_set_idt)(vcpu, &dt);
- selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ desc.selector = GET_SMSTATE(u32, smstate, 0x7e70);
rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
- base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+ desc.limit = GET_SMSTATE(u32, smstate, 0x7e74);
+ desc.base = GET_SMSTATE(u64, smstate, 0x7e78);
+ kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
- ctxt->ops->set_gdt(ctxt, &dt);
+ static_call(kvm_x86_set_gdt)(vcpu, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;
for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smstate, i);
+ r = rsm_load_seg_64(vcpu, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
@@ -520,14 +505,14 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
u64 smbase;
int ret;
- smbase = ctxt->ops->get_smbase(ctxt);
+ smbase = vcpu->arch.smbase;
- ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
- if (ret != X86EMUL_CONTINUE)
+ ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret < 0)
return X86EMUL_UNHANDLEABLE;
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
+ if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
+ static_call(kvm_x86_set_nmi_mask)(vcpu, false);
kvm_smm_changed(vcpu, false);
@@ -535,41 +520,41 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
- *
- * The ctxt->ops callbacks will handle all side effects when writing
- * writing MSRs and CRs, e.g. MMU context resets, CPUID
- * runtime updates, etc.
*/
- if (emulator_has_longmode(ctxt)) {
- struct desc_struct cs_desc;
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ struct kvm_segment cs_desc;
/* Zero CR4.PCIDE before CR0.PG. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
+ cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PCIDE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
cs_desc.type = 0xb;
- cs_desc.s = cs_desc.g = cs_desc.p = 1;
- ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+ cs_desc.s = cs_desc.g = cs_desc.present = 1;
+ kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
}
+#endif
/* For the 64-bit case, this will clear EFER.LMA. */
- cr0 = ctxt->ops->get_cr(ctxt, 0);
+ cr0 = kvm_read_cr0(vcpu);
if (cr0 & X86_CR0_PE)
- ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+ kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
- if (emulator_has_longmode(ctxt)) {
+#ifdef CONFIG_X86_64
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
/* Clear CR4.PAE before clearing EFER.LME. */
- cr4 = ctxt->ops->get_cr(ctxt, 4);
+ cr4 = kvm_read_cr4(vcpu);
if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+ kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
/* And finally go back to 32-bit mode. */
efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ kvm_set_msr(vcpu, MSR_EFER, efer);
}
+#endif
/*
* Give leave_smm() a chance to make ISA-specific changes to the vCPU
@@ -580,7 +565,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;
#ifdef CONFIG_X86_64
- if (emulator_has_longmode(ctxt))
+ if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
return rsm_load_state_64(ctxt, buf);
else
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dae68ef0c3c8..77e0ca43ee27 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7171,15 +7171,6 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
}
-static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
- unsigned long addr, void *val, unsigned int bytes)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
-
- return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
-}
-
static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu, u64 access,
struct x86_exception *exception)
@@ -7956,26 +7947,6 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
}
-static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
- u32 msr_index, u64 data)
-{
- return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
-}
-
-static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
- return vcpu->arch.smbase;
-}
-
-static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
-{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
- vcpu->arch.smbase = smbase;
-}
-
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
@@ -8074,7 +8045,6 @@ static const struct x86_emulate_ops emulate_ops = {
.write_gpr = emulator_write_gpr,
.read_std = emulator_read_std,
.write_std = emulator_write_std,
- .read_phys = kvm_read_guest_phys_system,
.fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
@@ -8094,11 +8064,8 @@ static const struct x86_emulate_ops emulate_ops = {
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
- .get_smbase = emulator_get_smbase,
- .set_smbase = emulator_set_smbase,
.set_msr_with_filter = emulator_set_msr_with_filter,
.get_msr_with_filter = emulator_get_msr_with_filter,
- .set_msr = emulator_set_msr,
.get_msr = emulator_get_msr,
.check_pmc = emulator_check_pmc,
.read_pmc = emulator_read_pmc,
--
2.38.1

View file

@ -1,63 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 25 Jan 2022 10:12:20 +0100
Subject: [PATCH] drm/simpledrm: Request memory region in driver
Requesting the framebuffer memory in simpledrm marks the memory
range as busy. This used to be done by the firmware sysfb code,
but the driver is the correct place.
v2:
* use I/O memory if request_mem_region() fails (Jocelyn)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/gpu/drm/tiny/simpledrm.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 3e3f9ba1e885..806fdc3237b3 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -525,21 +525,33 @@ static int simpledrm_device_init_mm(struct simpledrm_device *sdev)
{
struct drm_device *dev = &sdev->dev;
struct platform_device *pdev = sdev->pdev;
- struct resource *mem;
+ struct resource *res, *mem;
void __iomem *screen_base;
int ret;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!mem)
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
return -EINVAL;
- ret = devm_aperture_acquire_from_firmware(dev, mem->start, resource_size(mem));
+ ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res));
if (ret) {
drm_err(dev, "could not acquire memory range %pr: error %d\n",
- mem, ret);
+ res, ret);
return ret;
}
+ mem = devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
+ sdev->dev.driver->name);
+ if (!mem) {
+ /*
+ * We cannot make this fatal. Sometimes this comes from magic
+ * spaces our resource handlers simply don't know about. Use
+ * the I/O-memory resource as-is and try to map that instead.
+ */
+ drm_warn(dev, "could not acquire memory region %pr\n", res);
+ mem = res;
+ }
+
screen_base = devm_ioremap_wc(&pdev->dev, mem->start,
resource_size(mem));
if (!screen_base)

View file

@ -0,0 +1,189 @@
From 14fe6763b2618afb73a1109d7fda337cb06af0a2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:23 +0300
Subject: [PATCH] KVM: allow compiling out SMM support
Some users of KVM implement the UEFI variable store through a paravirtual device
that does not require the "SMM lockbox" component of edk2; allow them to
compile out system management mode, which is not a full implementation
especially in how it interacts with nested virtualization.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kvm/Kconfig | 11 ++++++++++
arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/smm.h | 13 ++++++++++++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/vmx/vmx.c | 2 ++
arch/x86/kvm/x86.c | 21 +++++++++++++++++--
tools/testing/selftests/kvm/x86_64/smm_test.c | 2 ++
7 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index e3cbd7706136..20d5aea868a4 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,6 +86,17 @@ config KVM_INTEL
To compile this as a module, choose M here: the module
will be called kvm-intel.
+config KVM_SMM
+ bool "System Management Mode emulation"
+ default y
+ depends on KVM
+ help
+ Provides support for KVM to emulate System Management Mode (SMM)
+ in virtual machines. This can be used by the virtual machine
+ firmware to implement UEFI secure boot.
+
+ If unsure, say Y.
+
config X86_SGX_KVM
bool "Software Guard eXtensions (SGX) Virtualization"
depends on X86_SGX && KVM_INTEL
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ec6f7656254b..6cf40f668277 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,7 +20,7 @@ endif
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o
-kvm-y += smm.o
+kvm-$(CONFIG_KVM_SMM) += smm.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index b0602a92e511..4c699fee4492 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -8,6 +8,7 @@
#define PUT_SMSTATE(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+#ifdef CONFIG_KVM_SMM
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
{
kvm_make_request(KVM_REQ_SMI, vcpu);
@@ -23,5 +24,17 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
void enter_smm(struct kvm_vcpu *vcpu);
int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
void process_smi(struct kvm_vcpu *vcpu);
+#else
+static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
+static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
+static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
+static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
+static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
+
+/*
+ * emulator_leave_smm is used as a function pointer, so the
+ * stub is defined in x86.c.
+ */
+#endif
#endif
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f4ed4a02b109..a6807492bfae 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4151,6 +4151,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
case MSR_IA32_SMBASE:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ return false;
/* SEV-ES guests do not support SMM, so report false */
if (kvm && sev_es_guest(kvm))
return false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index dc75de78ceb6..ce22860156c5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6849,6 +6849,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_SMBASE:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ return false;
/*
* We cannot do SMM unless we can run the guest in big
* real mode.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77e0ca43ee27..14ef42c6efbd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3631,7 +3631,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
}
case MSR_IA32_SMBASE:
- if (!msr_info->host_initiated)
+ if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
return 1;
vcpu->arch.smbase = data;
break;
@@ -4047,7 +4047,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.ia32_misc_enable_msr;
break;
case MSR_IA32_SMBASE:
- if (!msr_info->host_initiated)
+ if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
return 1;
msr_info->data = vcpu->arch.smbase;
break;
@@ -4421,6 +4421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
case KVM_CAP_X86_SMM:
+ if (!IS_ENABLED(CONFIG_KVM_SMM))
+ break;
+
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
* so do not report SMM to be available if real mode is
@@ -5146,6 +5149,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+ if (!IS_ENABLED(CONFIG_KVM_SMM) &&
+ (events->smi.smm ||
+ events->smi.pending ||
+ events->smi.smm_inside_nmi))
+ return -EINVAL;
+
if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
@@ -8021,6 +8030,14 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
return emul_to_vcpu(ctxt)->arch.hflags;
}
+#ifndef CONFIG_KVM_SMM
+static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+ WARN_ON_ONCE(1);
+ return X86EMUL_UNHANDLEABLE;
+}
+#endif
+
static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
{
kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 1f136a81858e..cb38a478e1f6 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -137,6 +137,8 @@ int main(int argc, char *argv[])
struct kvm_x86_state *state;
int stage, stage_reported;
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
--
2.38.1

View file

@ -1,144 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 25 Jan 2022 10:12:21 +0100
Subject: [PATCH] fbdev/simplefb: Request memory region in driver
Requesting the framebuffer memory in simpledrm marks the memory
range as busy. This used to be done by the firmware sysfb code,
but the driver is the correct place.
v2:
* store memory region in struct for later cleanup (Javier)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/video/fbdev/simplefb.c | 65 +++++++++++++++++++++++-----------
1 file changed, 45 insertions(+), 20 deletions(-)
diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index b63074fd892e..6885ac0203de 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -66,16 +66,36 @@ static int simplefb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
return 0;
}
-struct simplefb_par;
+struct simplefb_par {
+ u32 palette[PSEUDO_PALETTE_SIZE];
+ struct resource *mem;
+#if defined CONFIG_OF && defined CONFIG_COMMON_CLK
+ bool clks_enabled;
+ unsigned int clk_count;
+ struct clk **clks;
+#endif
+#if defined CONFIG_OF && defined CONFIG_REGULATOR
+ bool regulators_enabled;
+ u32 regulator_count;
+ struct regulator **regulators;
+#endif
+};
+
static void simplefb_clocks_destroy(struct simplefb_par *par);
static void simplefb_regulators_destroy(struct simplefb_par *par);
static void simplefb_destroy(struct fb_info *info)
{
+ struct simplefb_par *par = info->par;
+ struct resource *mem = par->mem;
+
simplefb_regulators_destroy(info->par);
simplefb_clocks_destroy(info->par);
if (info->screen_base)
iounmap(info->screen_base);
+
+ if (mem)
+ release_mem_region(mem->start, resource_size(mem));
}
static const struct fb_ops simplefb_ops = {
@@ -169,20 +189,6 @@ static int simplefb_parse_pd(struct platform_device *pdev,
return 0;
}
-struct simplefb_par {
- u32 palette[PSEUDO_PALETTE_SIZE];
-#if defined CONFIG_OF && defined CONFIG_COMMON_CLK
- bool clks_enabled;
- unsigned int clk_count;
- struct clk **clks;
-#endif
-#if defined CONFIG_OF && defined CONFIG_REGULATOR
- bool regulators_enabled;
- u32 regulator_count;
- struct regulator **regulators;
-#endif
-};
-
#if defined CONFIG_OF && defined CONFIG_COMMON_CLK
/*
* Clock handling code.
@@ -405,7 +411,7 @@ static int simplefb_probe(struct platform_device *pdev)
struct simplefb_params params;
struct fb_info *info;
struct simplefb_par *par;
- struct resource *mem;
+ struct resource *res, *mem;
/*
* Generic drivers must not be registered if a framebuffer exists.
@@ -430,15 +436,28 @@ static int simplefb_probe(struct platform_device *pdev)
if (ret)
return ret;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!mem) {
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
dev_err(&pdev->dev, "No memory resource\n");
return -EINVAL;
}
+ mem = request_mem_region(res->start, resource_size(res), "simplefb");
+ if (!mem) {
+ /*
+ * We cannot make this fatal. Sometimes this comes from magic
+ * spaces our resource handlers simply don't know about. Use
+ * the I/O-memory resource as-is and try to map that instead.
+ */
+ dev_warn(&pdev->dev, "simplefb: cannot reserve video memory at %pR\n", res);
+ mem = res;
+ }
+
info = framebuffer_alloc(sizeof(struct simplefb_par), &pdev->dev);
- if (!info)
- return -ENOMEM;
+ if (!info) {
+ ret = -ENOMEM;
+ goto error_release_mem_region;
+ }
platform_set_drvdata(pdev, info);
par = info->par;
@@ -495,6 +514,9 @@ static int simplefb_probe(struct platform_device *pdev)
info->var.xres, info->var.yres,
info->var.bits_per_pixel, info->fix.line_length);
+ if (mem != res)
+ par->mem = mem; /* release in clean-up handler */
+
ret = register_framebuffer(info);
if (ret < 0) {
dev_err(&pdev->dev, "Unable to register simplefb: %d\n", ret);
@@ -513,6 +535,9 @@ static int simplefb_probe(struct platform_device *pdev)
iounmap(info->screen_base);
error_fb_release:
framebuffer_release(info);
+error_release_mem_region:
+ if (mem != res)
+ release_mem_region(mem->start, resource_size(mem));
return ret;
}

View file

@ -0,0 +1,194 @@
From 3b69dd23b153e6f4a512a9495612a2664d236872 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:24 +0300
Subject: [PATCH] KVM: x86: compile out vendor-specific code if SMM is disabled
Vendor-specific code that deals with SMI injection and saving/restoring
SMM state is not needed if CONFIG_KVM_SMM is disabled, so remove the
four callbacks smi_allowed, enter_smm, leave_smm and enable_smi_window.
The users in svm/nested.c and x86.c also have to be compiled out; the
amount of #ifdef'ed code is small and it's not worth moving it to
smm.c.
enter_smm is now used only within #ifdef CONFIG_KVM_SMM, and the stub
can therefore be removed.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 ++
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/smm.h | 1 -
arch/x86/kvm/svm/nested.c | 2 ++
arch/x86/kvm/svm/svm.c | 4 ++++
arch/x86/kvm/vmx/vmx.c | 4 ++++
arch/x86/kvm/x86.c | 4 ++++
7 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 51f777071584..d92ec198db2a 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
KVM_X86_OP_OPTIONAL(set_hv_timer)
KVM_X86_OP_OPTIONAL(cancel_hv_timer)
KVM_X86_OP(setup_mce)
+#ifdef CONFIG_KVM_SMM
KVM_X86_OP(smi_allowed)
KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
+#endif
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5b466eb0feca..3f6a31175db1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1600,10 +1600,12 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM_SMM
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+#endif
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 4c699fee4492..7ccce6b655ca 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
-static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
/*
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d6cc9963b04a..ec4d6be70639 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1384,6 +1384,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return 0;
}
+#ifdef CONFIG_KVM_SMM
if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
if (block_nested_events)
return -EBUSY;
@@ -1392,6 +1393,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
return 0;
}
+#endif
if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
if (block_nested_events)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a6807492bfae..e69390909d08 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4409,6 +4409,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
vcpu->arch.mcg_cap &= 0x1ff;
}
+#ifdef CONFIG_KVM_SMM
bool svm_smi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4558,6 +4559,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
/* We must be in SMM; RSM will cause a vmexit anyway. */
}
}
+#endif
static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
@@ -4841,10 +4843,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.pi_update_irte = avic_pi_update_irte,
.setup_mce = svm_setup_mce,
+#ifdef CONFIG_KVM_SMM
.smi_allowed = svm_smi_allowed,
.enter_smm = svm_enter_smm,
.leave_smm = svm_leave_smm,
.enable_smi_window = svm_enable_smi_window,
+#endif
.mem_enc_ioctl = sev_mem_enc_ioctl,
.mem_enc_register_region = sev_mem_enc_register_region,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ce22860156c5..8cfb40cfad10 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7913,6 +7913,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
~FEAT_CTL_LMCE_ENABLED;
}
+#ifdef CONFIG_KVM_SMM
static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
/* we need a nested vmexit to enter SMM, postpone if run is pending */
@@ -7967,6 +7968,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
{
/* RSM will cause a vmexit anyway. */
}
+#endif
static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{
@@ -8134,10 +8136,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.setup_mce = vmx_setup_mce,
+#ifdef CONFIG_KVM_SMM
.smi_allowed = vmx_smi_allowed,
.enter_smm = vmx_enter_smm,
.leave_smm = vmx_leave_smm,
.enable_smi_window = vmx_enable_smi_window,
+#endif
.can_emulate_instruction = vmx_can_emulate_instruction,
.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 14ef42c6efbd..33c8fb8f4c61 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9696,6 +9696,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
* in order to make progress and get back here for another iteration.
* The kvm_x86_ops hooks communicate this by returning -EBUSY.
*/
+#ifdef CONFIG_KVM_SMM
if (vcpu->arch.smi_pending) {
r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
if (r < 0)
@@ -9708,6 +9709,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
} else
static_call(kvm_x86_enable_smi_window)(vcpu);
}
+#endif
if (vcpu->arch.nmi_pending) {
r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
@@ -12300,10 +12302,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
static_call(kvm_x86_nmi_allowed)(vcpu, false)))
return true;
+#ifdef CONFIG_KVM_SMM
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
(vcpu->arch.smi_pending &&
static_call(kvm_x86_smi_allowed)(vcpu, false)))
return true;
+#endif
if (kvm_arch_interrupt_allowed(vcpu) &&
(kvm_cpu_has_interrupt(vcpu) ||
--
2.38.1

View file

@ -1,81 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 16 Mar 2022 18:24:26 -0400
Subject: [PATCH] NFSv4.1 provide mount option to toggle trunking discovery
Introduce a new mount option -- trunkdiscovery,notrunkdiscovery -- to
toggle whether or not the client will engage in actively discovery
of trunking locations.
v2 make notrunkdiscovery default
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Fixes: 1976b2b31462 ("NFSv4.1 query for fs_location attr on a new file system")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
(cherry picked from commit a43bf604446414103b7535f38e739b65601c4fb2)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
fs/nfs/client.c | 3 ++-
fs/nfs/fs_context.c | 8 ++++++++
include/linux/nfs_fs_sb.h | 1 +
3 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 090b16890e3d..f303e96ce165 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -861,7 +861,8 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
}
if (clp->rpc_ops->discover_trunking != NULL &&
- (server->caps & NFS_CAP_FS_LOCATIONS)) {
+ (server->caps & NFS_CAP_FS_LOCATIONS &&
+ (server->flags & NFS_MOUNT_TRUNK_DISCOVERY))) {
error = clp->rpc_ops->discover_trunking(server, mntfh);
if (error < 0)
return error;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 0d444a90f513..da5217abc2e5 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -79,6 +79,7 @@ enum nfs_param {
Opt_source,
Opt_tcp,
Opt_timeo,
+ Opt_trunkdiscovery,
Opt_udp,
Opt_v,
Opt_vers,
@@ -179,6 +180,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_string("source", Opt_source),
fsparam_flag ("tcp", Opt_tcp),
fsparam_u32 ("timeo", Opt_timeo),
+ fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
fsparam_flag ("udp", Opt_udp),
fsparam_flag ("v2", Opt_v),
fsparam_flag ("v3", Opt_v),
@@ -528,6 +530,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags &= ~NFS_MOUNT_NOCTO;
break;
+ case Opt_trunkdiscovery:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY;
+ else
+ ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 9a6e70ccde56..10c347416dd2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -156,6 +156,7 @@ struct nfs_server {
#define NFS_MOUNT_SOFTREVAL 0x800000
#define NFS_MOUNT_WRITE_EAGER 0x01000000
#define NFS_MOUNT_WRITE_WAIT 0x02000000
+#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000
unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */

View file

@ -1,69 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Marc Bevand <m@zorinaq.com>
Date: Tue, 21 Dec 2021 15:31:12 -0800
Subject: [PATCH] EDAC/amd64: Add PCI device IDs for family 19h model 50h
Add the new family 19h model 50h PCI IDs (device 18h functions 0 and 6)
to support Ryzen 5000 APUs ("Cezanne").
Signed-off-by: Marc Bevand <m@zorinaq.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
drivers/edac/amd64_edac.c | 15 +++++++++++++++
drivers/edac/amd64_edac.h | 3 +++
2 files changed, 18 insertions(+)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index c6c58f01067f..f8ef2edf8abf 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2660,6 +2660,16 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
+ [F19_M50H_CPUS] = {
+ .ctl_name = "F19h_M50h",
+ .f0_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F6,
+ .max_mcs = 2,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
};
/*
@@ -3706,6 +3716,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
pvt->ops = &family_types[F17_M70H_CPUS].ops;
fam_type->ctl_name = "F19h_M20h";
break;
+ } else if (pvt->model >= 0x50 && pvt->model <= 0x5f) {
+ fam_type = &family_types[F19_M50H_CPUS];
+ pvt->ops = &family_types[F19_M50H_CPUS].ops;
+ fam_type->ctl_name = "F19h_M50h";
+ break;
} else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) {
fam_type = &family_types[F19_M10H_CPUS];
pvt->ops = &family_types[F19_M10H_CPUS].ops;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 650cab401e21..352bda9803f6 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -128,6 +128,8 @@
#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F0 0x166a
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F6 0x1670
/*
* Function 1 - Address Map
@@ -301,6 +303,7 @@ enum amd_families {
F17_M70H_CPUS,
F19_CPUS,
F19_M10H_CPUS,
+ F19_M50H_CPUS,
NUM_FAMILIES,
};

View file

@ -0,0 +1,41 @@
From 8f5e12c8768ecff91ccf335e2242ab64482c01fb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:25 +0300
Subject: [PATCH] KVM: x86: remove SMRAM address space if SMM is not supported
If CONFIG_KVM_SMM is not defined HF_SMM_MASK will always be zero, and
we can spare userspace the hassle of setting up the SMRAM address space
simply by reporting that only one address space is supported.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f6a31175db1..dcaa0b43baef 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1988,11 +1988,14 @@ enum {
#define HF_SMM_MASK (1 << 6)
#define HF_SMM_INSIDE_NMI_MASK (1 << 7)
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
-
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#ifdef CONFIG_KVM_SMM
+# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+# define KVM_ADDRESS_SPACE_NUM 2
+# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#else
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
+#endif
#define KVM_ARCH_WANT_MMU_NOTIFIER
--
2.38.1

View file

@ -0,0 +1,80 @@
From 20e250dd42abe942ed3498c5b5be02699e38cc4a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 25 Oct 2022 15:47:26 +0300
Subject: [PATCH] KVM: x86: do not define KVM_REQ_SMI if SMM disabled
This ensures that all the relevant code is compiled out, in fact
the process_smi stub can be removed too.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/smm.h | 1 -
arch/x86/kvm/x86.c | 6 ++++++
3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dcaa0b43baef..87ee187b3f26 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,7 +81,9 @@
#define KVM_REQ_NMI KVM_ARCH_REQ(9)
#define KVM_REQ_PMU KVM_ARCH_REQ(10)
#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#ifdef CONFIG_KVM_SMM
#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#endif
#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
#define KVM_REQ_MCLOCK_INPROGRESS \
KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 7ccce6b655ca..a6795b93ba30 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu);
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); }
-static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
/*
* emulator_leave_smm is used as a function pointer, so the
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33c8fb8f4c61..7b109120f47d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5013,8 +5013,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
{
process_nmi(vcpu);
+#ifdef CONFIG_KVM_SMM
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
+#endif
/*
* In guest mode, payload delivery should be deferred,
@@ -10027,8 +10029,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
record_steal_time(vcpu);
+#ifdef CONFIG_KVM_SMM
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
+#endif
if (kvm_check_request(KVM_REQ_NMI, vcpu))
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_PMU, vcpu))
@@ -12351,7 +12355,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+#ifdef CONFIG_KVM_SMM
kvm_test_request(KVM_REQ_SMI, vcpu) ||
+#endif
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;
--
2.38.1

View file

@ -1,6 +1,6 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From cd22e3c62bdf90babba3bdf1bc2b48e4e2e664d5 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com> From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:49:59 +0300 Date: Tue, 25 Oct 2022 15:47:27 +0300
Subject: [PATCH] bug: introduce ASSERT_STRUCT_OFFSET Subject: [PATCH] bug: introduce ASSERT_STRUCT_OFFSET
ASSERT_STRUCT_OFFSET allows to assert during the build of ASSERT_STRUCT_OFFSET allows to assert during the build of
@ -11,14 +11,13 @@ in it so move it to build_bug.h, so that it can be used in other
places in KVM. places in KVM.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
arch/x86/kvm/vmx/vmcs12.h | 5 ++--- arch/x86/kvm/vmx/vmcs12.h | 5 ++---
include/linux/build_bug.h | 9 +++++++++ include/linux/build_bug.h | 9 +++++++++
2 files changed, 11 insertions(+), 3 deletions(-) 2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 2a45f026ee11..ba8617964982 100644 index 746129ddd5ae..01936013428b 100644
--- a/arch/x86/kvm/vmx/vmcs12.h --- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -208,9 +208,8 @@ struct __packed vmcs12 { @@ -208,9 +208,8 @@ struct __packed vmcs12 {
@ -51,3 +50,6 @@ index e3a0be2c90ad..3aa3640f8c18 100644
+ +
+ +
#endif /* _LINUX_BUILD_BUG_H */ #endif /* _LINUX_BUILD_BUG_H */
--
2.38.1

View file

@ -1,10 +1,10 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From 8e1a89b66d8a8b80d135a072a0ec4147f79ac2f3 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com> From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:00 +0300 Date: Tue, 25 Oct 2022 15:47:28 +0300
Subject: [PATCH] KVM: x86: emulator: em_sysexit should update ctxt->mode Subject: [PATCH] KVM: x86: emulator: em_sysexit should update ctxt->mode
This is one of the instructions that can change the SYSEXIT is one of the instructions that can change the
processor mode. processor mode, thus ctxt->mode should be updated after it.
Note that this is likely a benign bug, because the only problematic Note that this is likely a benign bug, because the only problematic
mode change is from 32 bit to 64 bit which can lead to truncation of RIP, mode change is from 32 bit to 64 bit which can lead to truncation of RIP,
@ -12,16 +12,15 @@ and it is not possible to do with sysexit,
since sysexit running in 32 bit mode will be limited to 32 bit version. since sysexit running in 32 bit mode will be limited to 32 bit version.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
arch/x86/kvm/emulate.c | 1 + arch/x86/kvm/emulate.c | 1 +
1 file changed, 1 insertion(+) 1 file changed, 1 insertion(+)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 318a78379ca6..35b12692739c 100644 index 853262d4fb6c..5ee1998dd38e 100644
--- a/arch/x86/kvm/emulate.c --- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c
@@ -2862,6 +2862,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) @@ -2523,6 +2523,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
ctxt->_eip = rdx; ctxt->_eip = rdx;
@ -29,3 +28,6 @@ index 318a78379ca6..35b12692739c 100644
*reg_write(ctxt, VCPU_REGS_RSP) = rcx; *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
--
2.38.1

View file

@ -1,30 +1,29 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From d2c4fc069a073d621d2cbc97ffe9547754784639 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com> From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:01 +0300 Date: Tue, 25 Oct 2022 15:47:29 +0300
Subject: [PATCH] KVM: x86: emulator: introduce emulator_recalc_and_set_mode Subject: [PATCH] KVM: x86: emulator: introduce emulator_recalc_and_set_mode
Some instructions update the cpu execution mode, which needs Some instructions update the cpu execution mode, which needs to update the
to update the emulation mode. emulation mode.
Extract this code, and make assign_eip_far use it. Extract this code, and make assign_eip_far use it.
assign_eip_far now reads CS, instead of getting it via a parameter, assign_eip_far now reads CS, instead of getting it via a parameter,
which is ok, because callers always assign CS to the which is ok, because callers always assign CS to the same value
same value before calling it. before calling this function.
No functional change is intended. No functional change is intended.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
arch/x86/kvm/emulate.c | 85 ++++++++++++++++++++++++++++-------------- arch/x86/kvm/emulate.c | 85 ++++++++++++++++++++++++++++--------------
1 file changed, 57 insertions(+), 28 deletions(-) 1 file changed, 57 insertions(+), 28 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 35b12692739c..6a597d68d456 100644 index 5ee1998dd38e..d75d33d60cb8 100644
--- a/arch/x86/kvm/emulate.c --- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c
@@ -795,8 +795,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, @@ -760,8 +760,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ctxt->mode, linear); ctxt->mode, linear);
} }
@ -34,7 +33,7 @@ index 35b12692739c..6a597d68d456 100644
{ {
ulong linear; ulong linear;
int rc; int rc;
@@ -806,41 +805,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, @@ -771,41 +770,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
if (ctxt->op_bytes != sizeof(unsigned long)) if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
@ -54,7 +53,7 @@ index 35b12692739c..6a597d68d456 100644
+ +
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+ +
+ if (!ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE) { + if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
+ /* Real mode. cpu must not have long mode active */ + /* Real mode. cpu must not have long mode active */
+ if (efer & EFER_LMA) + if (efer & EFER_LMA)
+ return X86EMUL_UNHANDLEABLE; + return X86EMUL_UNHANDLEABLE;
@ -129,7 +128,7 @@ index 35b12692739c..6a597d68d456 100644
} }
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -2154,7 +2183,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) @@ -2139,7 +2168,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
@ -138,7 +137,7 @@ index 35b12692739c..6a597d68d456 100644
/* Error handling is not implemented. */ /* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
@@ -2235,7 +2264,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) @@ -2217,7 +2246,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
&new_desc); &new_desc);
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
@ -147,7 +146,7 @@ index 35b12692739c..6a597d68d456 100644
/* Error handling is not implemented. */ /* Error handling is not implemented. */
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE; return X86EMUL_UNHANDLEABLE;
@@ -3459,7 +3488,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) @@ -3117,7 +3146,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
return rc; return rc;
@ -156,3 +155,6 @@ index 35b12692739c..6a597d68d456 100644
if (rc != X86EMUL_CONTINUE) if (rc != X86EMUL_CONTINUE)
goto fail; goto fail;
--
2.38.1

View file

@ -1,34 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:02 +0300
Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm
This ensures that RIP will be correctly written back,
because the RSM instruction can switch the CPU mode from
32 bit (or less) to 64 bit.
This fixes a guest crash in case the #SMI is received
while the guest runs a code from an address > 32 bit.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 6a597d68d456..49697d589f87 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2639,6 +2639,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
if (ret != X86EMUL_CONTINUE)
goto emulate_shutdown;
+
+ ret = emulator_recalc_and_set_mode(ctxt);
+ if (ret != X86EMUL_CONTINUE)
+ goto emulate_shutdown;
+
/*
* Note, the ctxt->ops callbacks are responsible for handling side
* effects when writing MSRs and CRs, e.g. MMU context resets, CPUID

View file

@ -1,49 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:03 +0300
Subject: [PATCH] KVM: x86: emulator: update the emulation mode after CR0 write
CR0.PE toggles real/protected mode, thus its update
should update the emulation mode.
This is likely a benign bug because there is no writeback
of state, other than the RIP increment, and when toggling
CR0.PE, the CPU has to execute code from a very low memory address.
Also CR0.PG toggle when EFER.LMA is set, toggles the long mode.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 49697d589f87..89f035fc52e7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3635,11 +3635,23 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+ int cr_num = ctxt->modrm_reg;
+ int r;
+
+ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
+
+ if (cr_num == 0) {
+ /* CR0 write might have updated CR0.PE and/or CR0.PG
+ * which can affect the cpu execution mode */
+ r = emulator_recalc_and_set_mode(ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
return X86EMUL_CONTINUE;
}

View file

@ -0,0 +1,33 @@
From 6d83f3690f8f3026df587db2264f0917ba747de9 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:30 +0300
Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm
Update the emulation mode after RSM so that RIP will be correctly
written back, because the RSM instruction can switch the CPU mode from
32 bit (or less) to 64 bit.
This fixes a guest crash in case the #SMI is received while the guest
runs a code from an address > 32 bit.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/emulate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d75d33d60cb8..4365137d823b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2313,7 +2313,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->leave_smm(ctxt))
ctxt->ops->triple_fault(ctxt);
- return X86EMUL_CONTINUE;
+ return emulator_recalc_and_set_mode(ctxt);
}
static void
--
2.38.1

View file

@ -1,280 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:05 +0300
Subject: [PATCH] KVM: x86: emulator/smm: add structs for KVM's smram layout
Those structs will be used to read/write the smram state image.
Also document the differences between KVM's SMRAM layout and SMRAM
layout that is used by real Intel/AMD cpus.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 6 +
arch/x86/kvm/kvm_emulate.h | 218 +++++++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
3 files changed, 225 insertions(+)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 89f035fc52e7..bfaf5d24bf1e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5825,3 +5825,9 @@ bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
return true;
}
+
+void __init kvm_emulator_init(void)
+{
+ __check_smram32_offsets();
+ __check_smram64_offsets();
+}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index fb09cd22cb7f..0b2bbcce321a 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -13,6 +13,7 @@
#define _ASM_X86_KVM_X86_EMULATE_H
#include <asm/desc_defs.h>
+#include <linux/build_bug.h>
#include "fpu.h"
struct x86_emulate_ctxt;
@@ -482,6 +483,223 @@ enum x86_intercept {
nr_x86_intercepts
};
+
+/* 32 bit KVM's emulated SMM layout. Loosely based on Intel's layout */
+
+struct kvm_smm_seg_state_32 {
+ u32 flags;
+ u32 limit;
+ u32 base;
+} __packed;
+
+struct kvm_smram_state_32 {
+ u32 reserved1[62];
+ u32 smbase;
+ u32 smm_revision;
+ u32 reserved2[5];
+ u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
+ u32 reserved3[5];
+
+ /*
+ * Segment state is not present/documented in the Intel/AMD SMRAM image
+ * Instead this area on Intel/AMD contains IO/HLT restart flags.
+ */
+ struct kvm_smm_seg_state_32 ds;
+ struct kvm_smm_seg_state_32 fs;
+ struct kvm_smm_seg_state_32 gs;
+ struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */
+ struct kvm_smm_seg_state_32 tr;
+ u32 reserved;
+ struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */
+ struct kvm_smm_seg_state_32 ldtr;
+ struct kvm_smm_seg_state_32 es;
+ struct kvm_smm_seg_state_32 cs;
+ struct kvm_smm_seg_state_32 ss;
+
+ u32 es_sel;
+ u32 cs_sel;
+ u32 ss_sel;
+ u32 ds_sel;
+ u32 fs_sel;
+ u32 gs_sel;
+ u32 ldtr_sel;
+ u32 tr_sel;
+
+ u32 dr7;
+ u32 dr6;
+ u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */
+ u32 eip;
+ u32 eflags;
+ u32 cr3;
+ u32 cr0;
+} __packed;
+
+
+static inline void __check_smram32_offsets(void)
+{
+#define __CHECK_SMRAM32_OFFSET(field, offset) \
+ ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
+
+ __CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
+ __CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
+ __CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
+ __CHECK_SMRAM32_OFFSET(reserved2, 0xFF00);
+ __CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
+ __CHECK_SMRAM32_OFFSET(reserved3, 0xFF18);
+ __CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
+ __CHECK_SMRAM32_OFFSET(fs, 0xFF38);
+ __CHECK_SMRAM32_OFFSET(gs, 0xFF44);
+ __CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
+ __CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
+ __CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
+ __CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
+ __CHECK_SMRAM32_OFFSET(es, 0xFF84);
+ __CHECK_SMRAM32_OFFSET(cs, 0xFF90);
+ __CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
+ __CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
+ __CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
+ __CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
+ __CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
+ __CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
+ __CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
+ __CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
+ __CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
+ __CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
+ __CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
+ __CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
+ __CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
+ __CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
+ __CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
+ __CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
+#undef __CHECK_SMRAM32_OFFSET
+}
+
+
+/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */
+
+struct kvm_smm_seg_state_64 {
+ u16 selector;
+ u16 attributes;
+ u32 limit;
+ u64 base;
+};
+
+struct kvm_smram_state_64 {
+
+ struct kvm_smm_seg_state_64 es;
+ struct kvm_smm_seg_state_64 cs;
+ struct kvm_smm_seg_state_64 ss;
+ struct kvm_smm_seg_state_64 ds;
+ struct kvm_smm_seg_state_64 fs;
+ struct kvm_smm_seg_state_64 gs;
+ struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/
+ struct kvm_smm_seg_state_64 ldtr;
+ struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/
+ struct kvm_smm_seg_state_64 tr;
+
+ /* I/O restart and auto halt restart are not implemented by KVM */
+ u64 io_restart_rip;
+ u64 io_restart_rcx;
+ u64 io_restart_rsi;
+ u64 io_restart_rdi;
+ u32 io_restart_dword;
+ u32 reserved1;
+ u8 io_inst_restart;
+ u8 auto_hlt_restart;
+ u8 reserved2[6];
+
+ u64 efer;
+
+ /*
+ * Two fields below are implemented on AMD only, to store
+ * SVM guest vmcb address if the #SMI was received while in the guest mode.
+ */
+ u64 svm_guest_flag;
+ u64 svm_guest_vmcb_gpa;
+ u64 svm_guest_virtual_int; /* unknown purpose, not implemented */
+
+ u32 reserved3[3];
+ u32 smm_revison;
+ u32 smbase;
+ u32 reserved4[5];
+
+ /* ssp and svm_* fields below are not implemented by KVM */
+ u64 ssp;
+ u64 svm_guest_pat;
+ u64 svm_host_efer;
+ u64 svm_host_cr4;
+ u64 svm_host_cr3;
+ u64 svm_host_cr0;
+
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
+};
+
+
+static inline void __check_smram64_offsets(void)
+{
+#define __CHECK_SMRAM64_OFFSET(field, offset) \
+ ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
+
+ __CHECK_SMRAM64_OFFSET(es, 0xFE00);
+ __CHECK_SMRAM64_OFFSET(cs, 0xFE10);
+ __CHECK_SMRAM64_OFFSET(ss, 0xFE20);
+ __CHECK_SMRAM64_OFFSET(ds, 0xFE30);
+ __CHECK_SMRAM64_OFFSET(fs, 0xFE40);
+ __CHECK_SMRAM64_OFFSET(gs, 0xFE50);
+ __CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
+ __CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
+ __CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
+ __CHECK_SMRAM64_OFFSET(tr, 0xFE90);
+ __CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
+ __CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
+ __CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
+ __CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
+ __CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
+ __CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
+ __CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
+ __CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
+ __CHECK_SMRAM64_OFFSET(reserved2, 0xFECA);
+ __CHECK_SMRAM64_OFFSET(efer, 0xFED0);
+ __CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
+ __CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
+ __CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
+ __CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
+ __CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
+ __CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
+ __CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
+ __CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
+ __CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
+ __CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
+ __CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
+ __CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
+ __CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
+ __CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
+ __CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
+ __CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
+ __CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
+ __CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
+ __CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
+ __CHECK_SMRAM64_OFFSET(rip, 0xFF78);
+ __CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
+#undef __CHECK_SMRAM64_OFFSET
+}
+
+union kvm_smram {
+ struct kvm_smram_state_64 smram64;
+ struct kvm_smram_state_32 smram32;
+ u8 bytes[512];
+};
+
+void __init kvm_emulator_init(void);
+
+
/* Host execution mode. */
#if defined(CONFIG_X86_32)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 922fc258c37f..07575e5eb254 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12442,6 +12442,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
static int __init kvm_x86_init(void)
{
kvm_mmu_x86_module_init();
+ kvm_emulator_init();
return 0;
}
module_init(kvm_x86_init);

View file

@ -0,0 +1,52 @@
From 3a99d9781d2d3ccf58d70b1dc7edfda886f5d271 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:31 +0300
Subject: [PATCH] KVM: x86: emulator: update the emulation mode after CR0 write
Update the emulation mode when handling writes to CR0, because
toggling CR0.PE switches between Real and Protected Mode, and toggling
CR0.PG when EFER.LME=1 switches between Long and Protected Mode.
This is likely a benign bug because there is no writeback of state,
other than the RIP increment, and when toggling CR0.PE, the CPU has
to execute code from a very low memory address.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/emulate.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4365137d823b..5d7d4c1be843 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3288,11 +3288,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
{
- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
+ int cr_num = ctxt->modrm_reg;
+ int r;
+
+ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
return emulate_gp(ctxt, 0);
/* Disable writeback. */
ctxt->dst.type = OP_NONE;
+
+ if (cr_num == 0) {
+ /*
+ * CR0 write might have updated CR0.PE and/or CR0.PG
+ * which can affect the cpu's execution mode.
+ */
+ r = emulator_recalc_and_set_mode(ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
return X86EMUL_CONTINUE;
}
--
2.38.1

View file

@ -1,214 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:06 +0300
Subject: [PATCH] KVM: x86: emulator/smm: use smram structs in the common code
Switch from using a raw array to 'union kvm_smram'.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/include/asm/kvm_host.h | 5 +++--
arch/x86/kvm/emulate.c | 12 +++++++-----
arch/x86/kvm/kvm_emulate.h | 3 ++-
arch/x86/kvm/svm/svm.c | 8 ++++++--
arch/x86/kvm/vmx/vmx.c | 4 ++--
arch/x86/kvm/x86.c | 16 ++++++++--------
6 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 867febee8fc3..fb48dd8773e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -200,6 +200,7 @@ typedef enum exit_fastpath_completion fastpath_t;
struct x86_emulate_ctxt;
struct x86_exception;
+union kvm_smram;
enum x86_intercept;
enum x86_intercept_stage;
@@ -1463,8 +1464,8 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
- int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+ int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+ int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index bfaf5d24bf1e..730c3e2662d6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2567,16 +2567,18 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
- char buf[512];
+ const union kvm_smram smram;
u64 smbase;
int ret;
+ BUILD_BUG_ON(sizeof(smram) != 512);
+
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
smbase = ctxt->ops->get_smbase(ctxt);
- ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, (void *)&smram, sizeof(smram));
if (ret != X86EMUL_CONTINUE)
return X86EMUL_UNHANDLEABLE;
@@ -2626,15 +2628,15 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
* state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (ctxt->ops->leave_smm(ctxt, buf))
+ if (ctxt->ops->leave_smm(ctxt, &smram))
goto emulate_shutdown;
#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, buf);
+ ret = rsm_load_state_64(ctxt, (const char *)&smram);
else
#endif
- ret = rsm_load_state_32(ctxt, buf);
+ ret = rsm_load_state_32(ctxt, (const char *)&smram);
if (ret != X86EMUL_CONTINUE)
goto emulate_shutdown;
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0b2bbcce321a..3b37b3e17379 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -19,6 +19,7 @@
struct x86_emulate_ctxt;
enum x86_intercept;
enum x86_intercept_stage;
+union kvm_smram;
struct x86_exception {
u8 vector;
@@ -233,7 +234,7 @@ struct x86_emulate_ops {
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
- int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
+ int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const union kvm_smram *smram);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
};
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 21f747eacc9a..d903120811b9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4302,12 +4302,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !svm_smi_blocked(vcpu);
}
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map_save;
int ret;
+ char *smstate = (char *)smram;
+
if (!is_guest_mode(vcpu))
return 0;
@@ -4349,7 +4351,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
@@ -4357,6 +4359,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
struct vmcb *vmcb12;
int ret;
+ const char *smstate = (const char *)smram;
+
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
return 0;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 417176817d80..a45a43bcc844 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7594,7 +7594,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !is_smm(vcpu);
}
-static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7608,7 +7608,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 07575e5eb254..2ebbb441880c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7312,9 +7312,9 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
}
static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ const union kvm_smram *smram)
{
- return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
+ return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smram);
}
static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
@@ -9164,25 +9164,25 @@ static void enter_smm(struct kvm_vcpu *vcpu)
struct kvm_segment cs, ds;
struct desc_ptr dt;
unsigned long cr0;
- char buf[512];
+ union kvm_smram smram;
- memset(buf, 0, 512);
+ memset(smram.bytes, 0, sizeof(smram.bytes));
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, buf);
+ enter_smm_save_state_64(vcpu, (char *)&smram);
else
#endif
- enter_smm_save_state_32(vcpu, buf);
+ enter_smm_save_state_32(vcpu, (char *)&smram);
/*
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
* state (e.g. leave guest mode) after we've saved the state into the
* SMM state-save area.
*/
- static_call(kvm_x86_enter_smm)(vcpu, buf);
+ static_call(kvm_x86_enter_smm)(vcpu, &smram);
kvm_smm_changed(vcpu, true);
- kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram));
if (static_call(kvm_x86_get_nmi_mask)(vcpu))
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;

View file

@ -0,0 +1,59 @@
From 80377e1fac45a547a43511ee5c8d783a7ec37be5 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:32 +0300
Subject: [PATCH] KVM: x86: smm: number of GPRs in the SMRAM image depends on
the image format
On 64 bit host, if the guest doesn't have X86_FEATURE_LM, KVM will
access 16 gprs to 32-bit smram image, causing out-ouf-bound ram
access.
On 32 bit host, the rsm_load_state_64/enter_smm_save_state_64
is compiled out, thus access overflow can't happen.
Fixes: b443183a25ab61 ("KVM: x86: Reduce the number of emulator GPRs to '8' for 32-bit KVM")
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/emulate.c | 1 +
arch/x86/kvm/smm.c | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5d7d4c1be843..02a01c65471b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2305,6 +2305,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
return rc;
}
+
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 41ca128478fc..b290ad14070f 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -382,7 +382,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 8; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
val = GET_SMSTATE(u32, smstate, 0x7fcc);
@@ -438,7 +438,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u64 val, cr0, cr3, cr4;
int i, r;
- for (i = 0; i < NR_EMULATOR_GPRS; i++)
+ for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
--
2.38.1

View file

@ -1,268 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:07 +0300
Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 32 bit smram
load/restore
Use kvm_smram_state_32 struct to save/restore 32 bit SMM state
(used when X86_FEATURE_LM is not present in the guest CPUID).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 81 +++++++++++++++---------------------------
arch/x86/kvm/x86.c | 75 +++++++++++++++++---------------------
2 files changed, 60 insertions(+), 96 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 730c3e2662d6..ad5d2ab9ab84 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2344,25 +2344,17 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
desc->type = (flags >> 8) & 15;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
+ const struct kvm_smm_seg_state_32 *state,
+ u16 selector,
int n)
{
struct desc_struct desc;
- int offset;
- u16 selector;
-
- selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+ set_desc_base(&desc, state->base);
+ set_desc_limit(&desc, state->limit);
+ rsm_set_desc_flags(&desc, state->flags);
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
- return X86EMUL_CONTINUE;
}
#ifdef CONFIG_X86_64
@@ -2433,63 +2425,46 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
}
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ const struct kvm_smram_state_32 *smstate)
{
- struct desc_struct desc;
struct desc_ptr dt;
- u16 selector;
- u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
+ ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
+ ctxt->_eip = smstate->eip;
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
- val = GET_SMSTATE(u32, smstate, 0x7fcc);
+ *reg_write(ctxt, i) = smstate->gprs[i];
- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
return X86EMUL_UNHANDLEABLE;
- selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+ rsm_load_seg_32(ctxt, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
+ rsm_load_seg_32(ctxt, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
- selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
+ dt.address = smstate->gdtr.base;
+ dt.size = smstate->gdtr.limit;
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
+ dt.address = smstate->idtr.base;
+ dt.size = smstate->idtr.limit;
ctxt->ops->set_idt(ctxt, &dt);
- for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
+ rsm_load_seg_32(ctxt, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
+ rsm_load_seg_32(ctxt, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
+ rsm_load_seg_32(ctxt, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
- cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+ rsm_load_seg_32(ctxt, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
+ rsm_load_seg_32(ctxt, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
+ rsm_load_seg_32(ctxt, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+ ctxt->ops->set_smbase(ctxt, smstate->smbase);
- return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ return rsm_enter_protected_mode(ctxt, smstate->cr0,
+ smstate->cr3, smstate->cr4);
}
#ifdef CONFIG_X86_64
@@ -2636,7 +2611,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
ret = rsm_load_state_64(ctxt, (const char *)&smram);
else
#endif
- ret = rsm_load_state_32(ctxt, (const char *)&smram);
+ ret = rsm_load_state_32(ctxt, &smram.smram32);
if (ret != X86EMUL_CONTINUE)
goto emulate_shutdown;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2ebbb441880c..8a6b9bffc770 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9018,22 +9018,18 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
return flags;
}
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
+ struct kvm_smm_seg_state_32 *state,
+ u32 *selector,
+ int n)
{
struct kvm_segment seg;
- int offset;
kvm_get_segment(vcpu, &seg, n);
- put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- put_smstate(u32, buf, offset + 8, seg.base);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+ *selector = seg.selector;
+ state->base = seg.base;
+ state->limit = seg.limit;
+ state->flags = enter_smm_get_segment_flags(&seg);
}
#ifdef CONFIG_X86_64
@@ -9054,54 +9050,47 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
}
#endif
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_state_32 *smram)
{
struct desc_ptr dt;
- struct kvm_segment seg;
unsigned long val;
int i;
- put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+ smram->cr0 = kvm_read_cr0(vcpu);
+ smram->cr3 = kvm_read_cr3(vcpu);
+ smram->eflags = kvm_get_rflags(vcpu);
+ smram->eip = kvm_rip_read(vcpu);
for (i = 0; i < 8; i++)
- put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+ smram->gprs[i] = kvm_register_read_raw(vcpu, i);
kvm_get_dr(vcpu, 6, &val);
- put_smstate(u32, buf, 0x7fcc, (u32)val);
+ smram->dr6 = (u32)val;
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u32, buf, 0x7fc8, (u32)val);
+ smram->dr7 = (u32)val;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u32, buf, 0x7fc4, seg.selector);
- put_smstate(u32, buf, 0x7f64, seg.base);
- put_smstate(u32, buf, 0x7f60, seg.limit);
- put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u32, buf, 0x7fc0, seg.selector);
- put_smstate(u32, buf, 0x7f80, seg.base);
- put_smstate(u32, buf, 0x7f7c, seg.limit);
- put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+ enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
+ enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f74, dt.address);
- put_smstate(u32, buf, 0x7f70, dt.size);
+ smram->gdtr.base = dt.address;
+ smram->gdtr.limit = dt.size;
static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7f58, dt.address);
- put_smstate(u32, buf, 0x7f54, dt.size);
+ smram->idtr.base = dt.address;
+ smram->idtr.limit = dt.size;
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_32(vcpu, buf, i);
+ enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
+ enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
+ enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
- put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+ enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
+ enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
+ enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
- /* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020000);
- put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ smram->cr4 = kvm_read_cr4(vcpu);
+ smram->smm_revision = 0x00020000;
+ smram->smbase = vcpu->arch.smbase;
}
#ifdef CONFIG_X86_64
@@ -9172,7 +9161,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
enter_smm_save_state_64(vcpu, (char *)&smram);
else
#endif
- enter_smm_save_state_32(vcpu, (char *)&smram);
+ enter_smm_save_state_32(vcpu, &smram.smram32);
/*
* Give enter_smm() a chance to make ISA-specific changes to the vCPU

View file

@ -0,0 +1,68 @@
From a01916088ebb15b357d5c110270b797295d02f78 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:33 +0300
Subject: [PATCH] KVM: x86: smm: check for failures on smm entry
In the rare case of the failure on SMM entry, the KVM should at
least terminate the VM instead of going south.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/smm.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index b290ad14070f..1191a79cf027 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -211,11 +211,17 @@ void enter_smm(struct kvm_vcpu *vcpu)
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
* state (e.g. leave guest mode) after we've saved the state into the
* SMM state-save area.
+ *
+ * Kill the VM in the unlikely case of failure, because the VM
+ * can be in undefined state in this case.
*/
- static_call(kvm_x86_enter_smm)(vcpu, buf);
+ if (static_call(kvm_x86_enter_smm)(vcpu, buf))
+ goto error;
kvm_smm_changed(vcpu, true);
- kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+ if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)))
+ goto error;
if (static_call(kvm_x86_get_nmi_mask)(vcpu))
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
@@ -235,7 +241,8 @@ void enter_smm(struct kvm_vcpu *vcpu)
dt.address = dt.size = 0;
static_call(kvm_x86_set_idt)(vcpu, &dt);
- kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+ if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
+ goto error;
cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
cs.base = vcpu->arch.smbase;
@@ -264,11 +271,15 @@ void enter_smm(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- static_call(kvm_x86_set_efer)(vcpu, 0);
+ if (static_call(kvm_x86_set_efer)(vcpu, 0))
+ goto error;
#endif
kvm_update_cpuid_runtime(vcpu);
kvm_mmu_reset_context(vcpu);
+ return;
+error:
+ kvm_vm_dead(vcpu->kvm);
}
static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
--
2.38.1

View file

@ -1,279 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:08 +0300
Subject: [PATCH] KVM: x86: emulator/smm: use smram struct for 64 bit smram
load/restore
Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state
(used when X86_FEATURE_LM is present in the guest CPUID,
regardless of 32-bitness of the guest).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 88 ++++++++++++++----------------------------
arch/x86/kvm/x86.c | 75 ++++++++++++++++-------------------
2 files changed, 62 insertions(+), 101 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ad5d2ab9ab84..4eb35a0a33a5 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2358,24 +2358,16 @@ static void rsm_load_seg_32(struct x86_emulate_ctxt *ctxt,
}
#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
- int n)
+static void rsm_load_seg_64(struct x86_emulate_ctxt *ctxt,
+ const struct kvm_smm_seg_state_64 *state,
+ int n)
{
struct desc_struct desc;
- int offset;
- u16 selector;
- u32 base3;
-
- offset = 0x7e00 + n * 16;
-
- selector = GET_SMSTATE(u16, smstate, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
- base3 = GET_SMSTATE(u32, smstate, offset + 12);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
- return X86EMUL_CONTINUE;
+ rsm_set_desc_flags(&desc, state->attributes << 8);
+ set_desc_limit(&desc, state->limit);
+ set_desc_base(&desc, (u32)state->base);
+ ctxt->ops->set_segment(ctxt, state->selector, &desc, state->base >> 32, n);
}
#endif
@@ -2469,71 +2461,49 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ const struct kvm_smram_state_64 *smstate)
{
- struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr3, cr4;
- u32 base3;
- u16 selector;
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
+ *reg_write(ctxt, i) = smstate->gprs[15 - i];
- ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+ ctxt->_eip = smstate->rip;
+ ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u64, smstate, 0x7f68);
-
- if (ctxt->ops->set_dr(ctxt, 6, val))
+ if (ctxt->ops->set_dr(ctxt, 6, smstate->dr6))
return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u64, smstate, 0x7f60);
-
- if (ctxt->ops->set_dr(ctxt, 7, val))
+ if (ctxt->ops->set_dr(ctxt, 7, smstate->dr7))
return X86EMUL_UNHANDLEABLE;
- cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
- cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
- cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
- val = GET_SMSTATE(u64, smstate, 0x7ed0);
+ ctxt->ops->set_smbase(ctxt, smstate->smbase);
- if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+ if (ctxt->ops->set_msr(ctxt, MSR_EFER, smstate->efer & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;
- selector = GET_SMSTATE(u32, smstate, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
- base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+ rsm_load_seg_64(ctxt, &smstate->tr, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
+ dt.size = smstate->idtr.limit;
+ dt.address = smstate->idtr.base;
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smstate, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
- base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
- ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+ rsm_load_seg_64(ctxt, &smstate->ldtr, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
+ dt.size = smstate->gdtr.limit;
+ dt.address = smstate->gdtr.base;
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+ r = rsm_enter_protected_mode(ctxt, smstate->cr0, smstate->cr3, smstate->cr4);
if (r != X86EMUL_CONTINUE)
return r;
- for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
+ rsm_load_seg_64(ctxt, &smstate->es, VCPU_SREG_ES);
+ rsm_load_seg_64(ctxt, &smstate->cs, VCPU_SREG_CS);
+ rsm_load_seg_64(ctxt, &smstate->ss, VCPU_SREG_SS);
+ rsm_load_seg_64(ctxt, &smstate->ds, VCPU_SREG_DS);
+ rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
+ rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
return X86EMUL_CONTINUE;
}
@@ -2608,7 +2578,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, (const char *)&smram);
+ ret = rsm_load_state_64(ctxt, &smram.smram64);
else
#endif
ret = rsm_load_state_32(ctxt, &smram.smram32);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8a6b9bffc770..d00b82ee6ca4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9033,20 +9033,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
}
#ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
+ struct kvm_smm_seg_state_64 *state,
+ int n)
{
struct kvm_segment seg;
- int offset;
- u16 flags;
kvm_get_segment(vcpu, &seg, n);
- offset = 0x7e00 + n * 16;
-
- flags = enter_smm_get_segment_flags(&seg) >> 8;
- put_smstate(u16, buf, offset, seg.selector);
- put_smstate(u16, buf, offset + 2, flags);
- put_smstate(u32, buf, offset + 4, seg.limit);
- put_smstate(u64, buf, offset + 8, seg.base);
+ state->selector = seg.selector;
+ state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
+ state->limit = seg.limit;
+ state->base = seg.base;
}
#endif
@@ -9094,57 +9091,51 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
}
#ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_state_64 *smram)
{
struct desc_ptr dt;
- struct kvm_segment seg;
unsigned long val;
int i;
for (i = 0; i < 16; i++)
- put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+ smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
+
+ smram->rip = kvm_rip_read(vcpu);
+ smram->rflags = kvm_get_rflags(vcpu);
- put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
kvm_get_dr(vcpu, 6, &val);
- put_smstate(u64, buf, 0x7f68, val);
+ smram->dr6 = val;
kvm_get_dr(vcpu, 7, &val);
- put_smstate(u64, buf, 0x7f60, val);
-
- put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+ smram->dr7 = val;
- put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+ smram->cr0 = kvm_read_cr0(vcpu);
+ smram->cr3 = kvm_read_cr3(vcpu);
+ smram->cr4 = kvm_read_cr4(vcpu);
- /* revision id */
- put_smstate(u32, buf, 0x7efc, 0x00020064);
+ smram->smbase = vcpu->arch.smbase;
+ smram->smm_revison = 0x00020064;
- put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+ smram->efer = vcpu->arch.efer;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- put_smstate(u16, buf, 0x7e90, seg.selector);
- put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e94, seg.limit);
- put_smstate(u64, buf, 0x7e98, seg.base);
+ enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
static_call(kvm_x86_get_idt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e84, dt.size);
- put_smstate(u64, buf, 0x7e88, dt.address);
+ smram->idtr.limit = dt.size;
+ smram->idtr.base = dt.address;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- put_smstate(u16, buf, 0x7e70, seg.selector);
- put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- put_smstate(u32, buf, 0x7e74, seg.limit);
- put_smstate(u64, buf, 0x7e78, seg.base);
+ enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- put_smstate(u32, buf, 0x7e64, dt.size);
- put_smstate(u64, buf, 0x7e68, dt.address);
+ smram->gdtr.limit = dt.size;
+ smram->gdtr.base = dt.address;
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_64(vcpu, buf, i);
+ enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
+ enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
+ enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
+ enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
+ enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
+ enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
}
#endif
@@ -9158,7 +9149,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
memset(smram.bytes, 0, sizeof(smram.bytes));
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, (char *)&smram);
+ enter_smm_save_state_64(vcpu, &smram.smram64);
else
#endif
enter_smm_save_state_32(vcpu, &smram.smram32);

View file

@ -0,0 +1,284 @@
From dae1d13d62cdc44a137c51fbc92c5037a8f104c5 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:34 +0300
Subject: [PATCH] KVM: x86: smm: add structs for KVM's smram layout
Add structs that will be used to define and read/write the KVM's
SMRAM layout, instead of reading/writing to raw offsets.
Also document the differences between KVM's SMRAM layout and SMRAM
layout that is used by real Intel/AMD cpus.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/smm.c | 94 +++++++++++++++++++++++++++++++++
arch/x86/kvm/smm.h | 127 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 221 insertions(+)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 1191a79cf027..01dab9fc3ab4 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -8,6 +8,97 @@
#include "cpuid.h"
#include "trace.h"
+#define CHECK_SMRAM32_OFFSET(field, offset) \
+ ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
+
+#define CHECK_SMRAM64_OFFSET(field, offset) \
+ ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
+
+static void check_smram_offsets(void)
+{
+ /* 32 bit SMRAM image */
+ CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
+ CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
+ CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
+ CHECK_SMRAM32_OFFSET(reserved2, 0xFF00);
+ CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
+ CHECK_SMRAM32_OFFSET(reserved3, 0xFF18);
+ CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
+ CHECK_SMRAM32_OFFSET(fs, 0xFF38);
+ CHECK_SMRAM32_OFFSET(gs, 0xFF44);
+ CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
+ CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
+ CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
+ CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
+ CHECK_SMRAM32_OFFSET(es, 0xFF84);
+ CHECK_SMRAM32_OFFSET(cs, 0xFF90);
+ CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
+ CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
+ CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
+ CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
+ CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
+ CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
+ CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
+ CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
+ CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
+ CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
+ CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
+ CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
+ CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
+ CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
+ CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
+ CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
+
+ /* 64 bit SMRAM image */
+ CHECK_SMRAM64_OFFSET(es, 0xFE00);
+ CHECK_SMRAM64_OFFSET(cs, 0xFE10);
+ CHECK_SMRAM64_OFFSET(ss, 0xFE20);
+ CHECK_SMRAM64_OFFSET(ds, 0xFE30);
+ CHECK_SMRAM64_OFFSET(fs, 0xFE40);
+ CHECK_SMRAM64_OFFSET(gs, 0xFE50);
+ CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
+ CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
+ CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
+ CHECK_SMRAM64_OFFSET(tr, 0xFE90);
+ CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
+ CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
+ CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
+ CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
+ CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
+ CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
+ CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
+ CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
+ CHECK_SMRAM64_OFFSET(reserved2, 0xFECA);
+ CHECK_SMRAM64_OFFSET(efer, 0xFED0);
+ CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
+ CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
+ CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
+ CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
+ CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
+ CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
+ CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
+ CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
+ CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
+ CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
+ CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
+ CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
+ CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
+ CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
+ CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
+ CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
+ CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
+ CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
+ CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
+ CHECK_SMRAM64_OFFSET(rip, 0xFF78);
+ CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
+
+ BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
+}
+
+#undef CHECK_SMRAM64_OFFSET
+#undef CHECK_SMRAM32_OFFSET
+
+
void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
{
trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
@@ -199,6 +290,8 @@ void enter_smm(struct kvm_vcpu *vcpu)
unsigned long cr0;
char buf[512];
+ check_smram_offsets();
+
memset(buf, 0, 512);
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
@@ -449,6 +542,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
u64 val, cr0, cr3, cr4;
int i, r;
+
for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index a6795b93ba30..bf5c7ffeb11e 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -2,6 +2,8 @@
#ifndef ASM_KVM_SMM_H
#define ASM_KVM_SMM_H
+#include <linux/build_bug.h>
+
#define GET_SMSTATE(type, buf, offset) \
(*(type *)((buf) + (offset) - 0x7e00))
@@ -9,6 +11,131 @@
*(type *)((buf) + (offset) - 0x7e00) = val
#ifdef CONFIG_KVM_SMM
+
+
+/* 32 bit KVM's emulated SMM layout. Loosely based on Intel's layout */
+
+struct kvm_smm_seg_state_32 {
+ u32 flags;
+ u32 limit;
+ u32 base;
+} __packed;
+
+struct kvm_smram_state_32 {
+ u32 reserved1[62];
+ u32 smbase;
+ u32 smm_revision;
+ u32 reserved2[5];
+ u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
+ u32 reserved3[5];
+
+ /*
+ * Segment state is not present/documented in the Intel/AMD SMRAM image
+ * Instead this area on Intel/AMD contains IO/HLT restart flags.
+ */
+ struct kvm_smm_seg_state_32 ds;
+ struct kvm_smm_seg_state_32 fs;
+ struct kvm_smm_seg_state_32 gs;
+ struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */
+ struct kvm_smm_seg_state_32 tr;
+ u32 reserved;
+ struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */
+ struct kvm_smm_seg_state_32 ldtr;
+ struct kvm_smm_seg_state_32 es;
+ struct kvm_smm_seg_state_32 cs;
+ struct kvm_smm_seg_state_32 ss;
+
+ u32 es_sel;
+ u32 cs_sel;
+ u32 ss_sel;
+ u32 ds_sel;
+ u32 fs_sel;
+ u32 gs_sel;
+ u32 ldtr_sel;
+ u32 tr_sel;
+
+ u32 dr7;
+ u32 dr6;
+ u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */
+ u32 eip;
+ u32 eflags;
+ u32 cr3;
+ u32 cr0;
+} __packed;
+
+
+/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */
+
+struct kvm_smm_seg_state_64 {
+ u16 selector;
+ u16 attributes;
+ u32 limit;
+ u64 base;
+};
+
+struct kvm_smram_state_64 {
+
+ struct kvm_smm_seg_state_64 es;
+ struct kvm_smm_seg_state_64 cs;
+ struct kvm_smm_seg_state_64 ss;
+ struct kvm_smm_seg_state_64 ds;
+ struct kvm_smm_seg_state_64 fs;
+ struct kvm_smm_seg_state_64 gs;
+ struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/
+ struct kvm_smm_seg_state_64 ldtr;
+ struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/
+ struct kvm_smm_seg_state_64 tr;
+
+ /* I/O restart and auto halt restart are not implemented by KVM */
+ u64 io_restart_rip;
+ u64 io_restart_rcx;
+ u64 io_restart_rsi;
+ u64 io_restart_rdi;
+ u32 io_restart_dword;
+ u32 reserved1;
+ u8 io_inst_restart;
+ u8 auto_hlt_restart;
+ u8 reserved2[6];
+
+ u64 efer;
+
+ /*
+ * Two fields below are implemented on AMD only, to store
+ * SVM guest vmcb address if the #SMI was received while in the guest mode.
+ */
+ u64 svm_guest_flag;
+ u64 svm_guest_vmcb_gpa;
+ u64 svm_guest_virtual_int; /* unknown purpose, not implemented */
+
+ u32 reserved3[3];
+ u32 smm_revison;
+ u32 smbase;
+ u32 reserved4[5];
+
+ /* ssp and svm_* fields below are not implemented by KVM */
+ u64 ssp;
+ u64 svm_guest_pat;
+ u64 svm_host_efer;
+ u64 svm_host_cr4;
+ u64 svm_host_cr3;
+ u64 svm_host_cr0;
+
+ u64 cr4;
+ u64 cr3;
+ u64 cr0;
+ u64 dr7;
+ u64 dr6;
+ u64 rflags;
+ u64 rip;
+ u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
+};
+
+union kvm_smram {
+ struct kvm_smram_state_64 smram64;
+ struct kvm_smram_state_32 smram32;
+ u8 bytes[512];
+};
+
static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
{
kvm_make_request(KVM_REQ_SMI, vcpu);
--
2.38.1

View file

@ -0,0 +1,194 @@
From e13349f01bc9b4b94dd995d60fad196d3074a868 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:35 +0300
Subject: [PATCH] KVM: x86: smm: use smram structs in the common code
Use kvm_smram union instad of raw arrays in the common smm code.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 5 +++--
arch/x86/kvm/smm.c | 27 ++++++++++++++-------------
arch/x86/kvm/svm/svm.c | 8 ++++++--
arch/x86/kvm/vmx/vmx.c | 4 ++--
4 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ee187b3f26..84c0b441a336 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -206,6 +206,7 @@ typedef enum exit_fastpath_completion fastpath_t;
struct x86_emulate_ctxt;
struct x86_exception;
+union kvm_smram;
enum x86_intercept;
enum x86_intercept_stage;
@@ -1604,8 +1605,8 @@ struct kvm_x86_ops {
#ifdef CONFIG_KVM_SMM
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
- int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+ int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+ int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 01dab9fc3ab4..e714d43b746c 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -288,17 +288,18 @@ void enter_smm(struct kvm_vcpu *vcpu)
struct kvm_segment cs, ds;
struct desc_ptr dt;
unsigned long cr0;
- char buf[512];
+ union kvm_smram smram;
check_smram_offsets();
- memset(buf, 0, 512);
+ memset(smram.bytes, 0, sizeof(smram.bytes));
+
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, buf);
+ enter_smm_save_state_64(vcpu, smram.bytes);
else
#endif
- enter_smm_save_state_32(vcpu, buf);
+ enter_smm_save_state_32(vcpu, smram.bytes);
/*
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
@@ -308,12 +309,12 @@ void enter_smm(struct kvm_vcpu *vcpu)
* Kill the VM in the unlikely case of failure, because the VM
* can be in undefined state in this case.
*/
- if (static_call(kvm_x86_enter_smm)(vcpu, buf))
+ if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
goto error;
kvm_smm_changed(vcpu, true);
- if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)))
+ if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
goto error;
if (static_call(kvm_x86_get_nmi_mask)(vcpu))
@@ -473,7 +474,7 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
}
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ u8 *smstate)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
struct kvm_segment desc;
@@ -534,7 +535,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- const char *smstate)
+ u8 *smstate)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
struct kvm_segment desc;
@@ -606,13 +607,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
unsigned long cr0, cr4, efer;
- char buf[512];
+ union kvm_smram smram;
u64 smbase;
int ret;
smbase = vcpu->arch.smbase;
- ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
+ ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
if (ret < 0)
return X86EMUL_UNHANDLEABLE;
@@ -666,13 +667,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
* state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (static_call(kvm_x86_leave_smm)(vcpu, buf))
+ if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
return X86EMUL_UNHANDLEABLE;
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- return rsm_load_state_64(ctxt, buf);
+ return rsm_load_state_64(ctxt, smram.bytes);
else
#endif
- return rsm_load_state_32(ctxt, buf);
+ return rsm_load_state_32(ctxt, smram.bytes);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e69390909d08..2a61b8c50ab4 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4437,12 +4437,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map_save;
int ret;
+ char *smstate = (char *)smram;
+
if (!is_guest_mode(vcpu))
return 0;
@@ -4484,7 +4486,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save;
@@ -4492,6 +4494,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
struct vmcb *vmcb12;
int ret;
+ const char *smstate = (const char *)smram;
+
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
return 0;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8cfb40cfad10..480ff79071c6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7922,7 +7922,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return !is_smm(vcpu);
}
-static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7943,7 +7943,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
--
2.38.1

View file

@ -0,0 +1,259 @@
From 9d654dc3270bcf5d876115c009097974d994a324 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:36 +0300
Subject: [PATCH] KVM: x86: smm: use smram struct for 32 bit smram load/restore
Use kvm_smram_state_32 struct to save/restore 32 bit SMM state
(used when X86_FEATURE_LM is not present in the guest CPUID).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/smm.c | 155 ++++++++++++++++++---------------------------
1 file changed, 61 insertions(+), 94 deletions(-)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index e714d43b746c..2635f6b1d81a 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -142,22 +142,17 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
return flags;
}
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
+ struct kvm_smm_seg_state_32 *state,
+ u32 *selector, int n)
{
struct kvm_segment seg;
- int offset;
kvm_get_segment(vcpu, &seg, n);
- PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
-
- PUT_SMSTATE(u32, buf, offset + 8, seg.base);
- PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
- PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+ *selector = seg.selector;
+ state->base = seg.base;
+ state->limit = seg.limit;
+ state->flags = enter_smm_get_segment_flags(&seg);
}
#ifdef CONFIG_X86_64
@@ -178,54 +173,48 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
}
#endif
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
+ struct kvm_smram_state_32 *smram)
{
struct desc_ptr dt;
- struct kvm_segment seg;
unsigned long val;
int i;
- PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
- PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+ smram->cr0 = kvm_read_cr0(vcpu);
+ smram->cr3 = kvm_read_cr3(vcpu);
+ smram->eflags = kvm_get_rflags(vcpu);
+ smram->eip = kvm_rip_read(vcpu);
for (i = 0; i < 8; i++)
- PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+ smram->gprs[i] = kvm_register_read_raw(vcpu, i);
kvm_get_dr(vcpu, 6, &val);
- PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
+ smram->dr6 = (u32)val;
kvm_get_dr(vcpu, 7, &val);
- PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
+ smram->dr7 = (u32)val;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
- PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
- PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
- PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
- PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
- PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
- PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+ enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
+ enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
- PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
+ smram->gdtr.base = dt.address;
+ smram->gdtr.limit = dt.size;
static_call(kvm_x86_get_idt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
- PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
+ smram->idtr.base = dt.address;
+ smram->idtr.limit = dt.size;
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_32(vcpu, buf, i);
+ enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
+ enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
+ enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
- PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+ enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
+ enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
+ enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
- /* revision id */
- PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
- PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
+ smram->cr4 = kvm_read_cr4(vcpu);
+ smram->smm_revision = 0x00020000;
+ smram->smbase = vcpu->arch.smbase;
}
#ifdef CONFIG_X86_64
@@ -299,7 +288,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
enter_smm_save_state_64(vcpu, smram.bytes);
else
#endif
- enter_smm_save_state_32(vcpu, smram.bytes);
+ enter_smm_save_state_32(vcpu, &smram.smram32);
/*
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
@@ -391,21 +380,16 @@ static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
desc->padding = 0;
}
-static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
- int n)
+static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
+ const struct kvm_smm_seg_state_32 *state,
+ u16 selector, int n)
{
struct kvm_segment desc;
- int offset;
-
- if (n < 3)
- offset = 0x7f84 + n * 12;
- else
- offset = 0x7f2c + (n - 3) * 12;
- desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
- desc.base = GET_SMSTATE(u32, smstate, offset + 8);
- desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+ desc.selector = selector;
+ desc.base = state->base;
+ desc.limit = state->limit;
+ rsm_set_desc_flags(&desc, state->flags);
kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}
@@ -474,63 +458,46 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
}
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
- u8 *smstate)
+ const struct kvm_smram_state_32 *smstate)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
- struct kvm_segment desc;
struct desc_ptr dt;
- u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
+ ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
+ ctxt->_eip = smstate->eip;
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
- val = GET_SMSTATE(u32, smstate, 0x7fcc);
+ *reg_write(ctxt, i) = smstate->gprs[i];
- if (kvm_set_dr(vcpu, 6, val))
+ if (kvm_set_dr(vcpu, 6, smstate->dr6))
return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
- if (kvm_set_dr(vcpu, 7, val))
+ if (kvm_set_dr(vcpu, 7, smstate->dr7))
return X86EMUL_UNHANDLEABLE;
- desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4);
- desc.base = GET_SMSTATE(u32, smstate, 0x7f64);
- desc.limit = GET_SMSTATE(u32, smstate, 0x7f60);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
- kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
-
- desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0);
- desc.base = GET_SMSTATE(u32, smstate, 0x7f80);
- desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
- kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
+ rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
+ rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
+ dt.address = smstate->gdtr.base;
+ dt.size = smstate->gdtr.limit;
static_call(kvm_x86_set_gdt)(vcpu, &dt);
- dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
- dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
+ dt.address = smstate->idtr.base;
+ dt.size = smstate->idtr.limit;
static_call(kvm_x86_set_idt)(vcpu, &dt);
- for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(vcpu, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
+ rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
+ rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
+ rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
- cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+ rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
+ rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
+ rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
- vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);
+ vcpu->arch.smbase = smstate->smbase;
- return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
+ return rsm_enter_protected_mode(vcpu, smstate->cr0,
+ smstate->cr3, smstate->cr4);
}
#ifdef CONFIG_X86_64
@@ -675,5 +642,5 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return rsm_load_state_64(ctxt, smram.bytes);
else
#endif
- return rsm_load_state_32(ctxt, smram.bytes);
+ return rsm_load_state_32(ctxt, &smram.smram32);
}
--
2.38.1

View file

@ -1,180 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:11 +0300
Subject: [PATCH] KVM: x86: emulator/smm: preserve interrupt shadow in SMRAM
When #SMI is asserted, the CPU can be in interrupt shadow
due to sti or mov ss.
It is not mandatory in Intel/AMD prm to have the #SMI
blocked during the shadow, and on top of
that, since neither SVM nor VMX has true support for SMI
window, waiting for one instruction would mean single stepping
the guest.
Instead, allow #SMI in this case, but both reset the interrupt
window and stash its value in SMRAM to restore it on exit
from SMM.
This fixes rare failures seen mostly on windows guests on VMX,
when #SMI falls on the sti instruction which mainfest in
VM entry failure due to EFLAGS.IF not being set, but STI interrupt
window still being set in the VMCS.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
arch/x86/kvm/emulate.c | 17 ++++++++++++++---
arch/x86/kvm/kvm_emulate.h | 10 ++++++----
arch/x86/kvm/x86.c | 12 ++++++++++++
3 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4eb35a0a33a5..3e6ea2951e2b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2420,7 +2420,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
const struct kvm_smram_state_32 *smstate)
{
struct desc_ptr dt;
- int i;
+ int i, r;
ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
ctxt->_eip = smstate->eip;
@@ -2455,8 +2455,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
ctxt->ops->set_smbase(ctxt, smstate->smbase);
- return rsm_enter_protected_mode(ctxt, smstate->cr0,
- smstate->cr3, smstate->cr4);
+ r = rsm_enter_protected_mode(ctxt, smstate->cr0,
+ smstate->cr3, smstate->cr4);
+
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
+ ctxt->ops->set_int_shadow(ctxt, 0);
+ ctxt->interruptibility = (u8)smstate->int_shadow;
+
+ return X86EMUL_CONTINUE;
}
#ifdef CONFIG_X86_64
@@ -2505,6 +2513,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
rsm_load_seg_64(ctxt, &smstate->fs, VCPU_SREG_FS);
rsm_load_seg_64(ctxt, &smstate->gs, VCPU_SREG_GS);
+ ctxt->ops->set_int_shadow(ctxt, 0);
+ ctxt->interruptibility = (u8)smstate->int_shadow;
+
return X86EMUL_CONTINUE;
}
#endif
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 3b37b3e17379..a64c190abf28 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -231,6 +231,7 @@ struct x86_emulate_ops {
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
+ void (*set_int_shadow)(struct x86_emulate_ctxt *ctxt, u8 shadow);
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
@@ -497,7 +498,8 @@ struct kvm_smram_state_32 {
u32 reserved1[62];
u32 smbase;
u32 smm_revision;
- u32 reserved2[5];
+ u32 reserved2[4];
+ u32 int_shadow; /* KVM extension */
u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
u32 reserved3[5];
@@ -545,6 +547,7 @@ static inline void __check_smram32_offsets(void)
__CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
__CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
__CHECK_SMRAM32_OFFSET(reserved2, 0xFF00);
+ __CHECK_SMRAM32_OFFSET(int_shadow, 0xFF10);
__CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
__CHECK_SMRAM32_OFFSET(reserved3, 0xFF18);
__CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
@@ -604,7 +607,7 @@ struct kvm_smram_state_64 {
u64 io_restart_rsi;
u64 io_restart_rdi;
u32 io_restart_dword;
- u32 reserved1;
+ u32 int_shadow;
u8 io_inst_restart;
u8 auto_hlt_restart;
u8 reserved2[6];
@@ -642,7 +645,6 @@ struct kvm_smram_state_64 {
u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
};
-
static inline void __check_smram64_offsets(void)
{
#define __CHECK_SMRAM64_OFFSET(field, offset) \
@@ -663,7 +665,7 @@ static inline void __check_smram64_offsets(void)
__CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
__CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
__CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
- __CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
+ __CHECK_SMRAM64_OFFSET(int_shadow, 0xFEC4);
__CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
__CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
__CHECK_SMRAM64_OFFSET(reserved2, 0xFECA);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d00b82ee6ca4..4cefdd83a448 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7299,6 +7299,11 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
}
+static void emulator_set_int_shadow(struct x86_emulate_ctxt *ctxt, u8 shadow)
+{
+ static_call(kvm_x86_set_interrupt_shadow)(emul_to_vcpu(ctxt), shadow);
+}
+
static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
{
return emul_to_vcpu(ctxt)->arch.hflags;
@@ -7368,6 +7373,7 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_has_fxsr = emulator_guest_has_fxsr,
.guest_has_rdpid = emulator_guest_has_rdpid,
.set_nmi_mask = emulator_set_nmi_mask,
+ .set_int_shadow = emulator_set_int_shadow,
.get_hflags = emulator_get_hflags,
.exiting_smm = emulator_exiting_smm,
.leave_smm = emulator_leave_smm,
@@ -9088,6 +9094,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, struct kvm_smram_stat
smram->cr4 = kvm_read_cr4(vcpu);
smram->smm_revision = 0x00020000;
smram->smbase = vcpu->arch.smbase;
+
+ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
}
#ifdef CONFIG_X86_64
@@ -9136,6 +9144,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, struct kvm_smram_stat
enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
+
+ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
}
#endif
@@ -9172,6 +9182,8 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0x8000);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
static_call(kvm_x86_set_cr0)(vcpu, cr0);
vcpu->arch.cr0 = cr0;

View file

@ -0,0 +1,268 @@
From b7913065928c913fb8569a8a71f6eec4a32779c7 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:37 +0300
Subject: [PATCH] KVM: x86: smm: use smram struct for 64 bit smram load/restore
Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state
(used when X86_FEATURE_LM is present in the guest CPUID,
regardless of 32-bitness of the guest).
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/smm.c | 153 +++++++++++++++++++--------------------------
1 file changed, 63 insertions(+), 90 deletions(-)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 2635f6b1d81a..82761384a866 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -156,20 +156,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
}
#ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
+ struct kvm_smm_seg_state_64 *state,
+ int n)
{
struct kvm_segment seg;
- int offset;
- u16 flags;
kvm_get_segment(vcpu, &seg, n);
- offset = 0x7e00 + n * 16;
-
- flags = enter_smm_get_segment_flags(&seg) >> 8;
- PUT_SMSTATE(u16, buf, offset, seg.selector);
- PUT_SMSTATE(u16, buf, offset + 2, flags);
- PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
- PUT_SMSTATE(u64, buf, offset + 8, seg.base);
+ state->selector = seg.selector;
+ state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
+ state->limit = seg.limit;
+ state->base = seg.base;
}
#endif
@@ -218,57 +215,52 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
}
#ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
+ struct kvm_smram_state_64 *smram)
{
struct desc_ptr dt;
- struct kvm_segment seg;
unsigned long val;
int i;
for (i = 0; i < 16; i++)
- PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+ smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
+
+ smram->rip = kvm_rip_read(vcpu);
+ smram->rflags = kvm_get_rflags(vcpu);
- PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
- PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
kvm_get_dr(vcpu, 6, &val);
- PUT_SMSTATE(u64, buf, 0x7f68, val);
+ smram->dr6 = val;
kvm_get_dr(vcpu, 7, &val);
- PUT_SMSTATE(u64, buf, 0x7f60, val);
-
- PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
- PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
- PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+ smram->dr7 = val;
- PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
+ smram->cr0 = kvm_read_cr0(vcpu);
+ smram->cr3 = kvm_read_cr3(vcpu);
+ smram->cr4 = kvm_read_cr4(vcpu);
- /* revision id */
- PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
+ smram->smbase = vcpu->arch.smbase;
+ smram->smm_revison = 0x00020064;
- PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
+ smram->efer = vcpu->arch.efer;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
- PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
- PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
- PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
- PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
+ enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
static_call(kvm_x86_get_idt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
- PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
+ smram->idtr.limit = dt.size;
+ smram->idtr.base = dt.address;
- kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
- PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
- PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
- PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
- PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
+ enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
static_call(kvm_x86_get_gdt)(vcpu, &dt);
- PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
- PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
+ smram->gdtr.limit = dt.size;
+ smram->gdtr.base = dt.address;
- for (i = 0; i < 6; i++)
- enter_smm_save_seg_64(vcpu, buf, i);
+ enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
+ enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
+ enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
+ enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
+ enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
+ enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
}
#endif
@@ -285,7 +277,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- enter_smm_save_state_64(vcpu, smram.bytes);
+ enter_smm_save_state_64(vcpu, &smram.smram64);
else
#endif
enter_smm_save_state_32(vcpu, &smram.smram32);
@@ -395,18 +387,17 @@ static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
}
#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
+
+static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
+ const struct kvm_smm_seg_state_64 *state,
int n)
{
struct kvm_segment desc;
- int offset;
-
- offset = 0x7e00 + n * 16;
- desc.selector = GET_SMSTATE(u16, smstate, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
- desc.limit = GET_SMSTATE(u32, smstate, offset + 4);
- desc.base = GET_SMSTATE(u64, smstate, offset + 8);
+ desc.selector = state->selector;
+ rsm_set_desc_flags(&desc, state->attributes << 8);
+ desc.limit = state->limit;
+ desc.base = state->base;
kvm_set_segment(vcpu, &desc, n);
return X86EMUL_CONTINUE;
}
@@ -502,69 +493,51 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
#ifdef CONFIG_X86_64
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
- u8 *smstate)
+ const struct kvm_smram_state_64 *smstate)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
- struct kvm_segment desc;
struct desc_ptr dt;
- u64 val, cr0, cr3, cr4;
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
-
- ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+ *reg_write(ctxt, i) = smstate->gprs[15 - i];
- val = GET_SMSTATE(u64, smstate, 0x7f68);
+ ctxt->_eip = smstate->rip;
+ ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
- if (kvm_set_dr(vcpu, 6, val))
+ if (kvm_set_dr(vcpu, 6, smstate->dr6))
return X86EMUL_UNHANDLEABLE;
-
- val = GET_SMSTATE(u64, smstate, 0x7f60);
-
- if (kvm_set_dr(vcpu, 7, val))
+ if (kvm_set_dr(vcpu, 7, smstate->dr7))
return X86EMUL_UNHANDLEABLE;
- cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
- cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
- cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
- vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00);
- val = GET_SMSTATE(u64, smstate, 0x7ed0);
+ vcpu->arch.smbase = smstate->smbase;
- if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
+ if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
return X86EMUL_UNHANDLEABLE;
- desc.selector = GET_SMSTATE(u32, smstate, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
- desc.limit = GET_SMSTATE(u32, smstate, 0x7e94);
- desc.base = GET_SMSTATE(u64, smstate, 0x7e98);
- kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
+ rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
+ dt.size = smstate->idtr.limit;
+ dt.address = smstate->idtr.base;
static_call(kvm_x86_set_idt)(vcpu, &dt);
- desc.selector = GET_SMSTATE(u32, smstate, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
- desc.limit = GET_SMSTATE(u32, smstate, 0x7e74);
- desc.base = GET_SMSTATE(u64, smstate, 0x7e78);
- kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
+ rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
- dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
+ dt.size = smstate->gdtr.limit;
+ dt.address = smstate->gdtr.base;
static_call(kvm_x86_set_gdt)(vcpu, &dt);
- r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
+ r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
if (r != X86EMUL_CONTINUE)
return r;
- for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(vcpu, smstate, i);
- if (r != X86EMUL_CONTINUE)
- return r;
- }
+ rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
+ rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
+ rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
+ rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
+ rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
+ rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
return X86EMUL_CONTINUE;
}
@@ -639,7 +612,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- return rsm_load_state_64(ctxt, smram.bytes);
+ return rsm_load_state_64(ctxt, &smram.smram64);
else
#endif
return rsm_load_state_32(ctxt, &smram.smram32);
--
2.38.1

View file

@ -0,0 +1,45 @@
From 736f391168886fa50eb3888007a34db101f8de9e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:38 +0300
Subject: [PATCH] KVM: svm: drop explicit return value of kvm_vcpu_map
if kvm_vcpu_map returns non zero value, error path should be triggered
regardless of the exact returned error value.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/svm/svm.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2a61b8c50ab4..74f390077a1e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4473,8 +4473,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
* that, see svm_prepare_switch_to_guest()) which must be
* preserved.
*/
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
- &map_save) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
return 1;
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
@@ -4511,11 +4510,11 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
return 1;
vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
return 1;
ret = 1;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
goto unmap_map;
if (svm_allocate_nested(svm))
--
2.38.1

View file

@ -1,43 +1,39 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From 0e0e4df8502c00f60955187c3e98e50653b15008 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com> From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:09 +0300 Date: Tue, 25 Oct 2022 15:47:39 +0300
Subject: [PATCH] KVM: x86: SVM: use smram structs Subject: [PATCH] KVM: x86: SVM: use smram structs
This removes the last user of put_smstate/GET_SMSTATE so Use SMM structs in the SVM code as well, which removes the last user of
remove these functions as well. put_smstate/GET_SMSTATE so remove these macros as well.
Also add a sanity check that we don't attempt to enter the SMM
on non long mode capable guest CPU with a running nested guest.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
arch/x86/include/asm/kvm_host.h | 6 ------ arch/x86/kvm/smm.h | 6 ------
arch/x86/kvm/svm/svm.c | 21 ++++++--------------- arch/x86/kvm/svm/svm.c | 21 +++++++--------------
2 files changed, 6 insertions(+), 21 deletions(-) 2 files changed, 7 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index d752fabde94ad2..d570ec522ebb55 100644 index bf5c7ffeb11e..8d96bff3f4d5 100644
--- a/arch/x86/include/asm/kvm_host.h --- a/arch/x86/kvm/smm.h
+++ b/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/kvm/smm.h
@@ -2077,12 +2077,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) @@ -4,12 +4,6 @@
#endif
} #include <linux/build_bug.h>
-#define put_smstate(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
-
-#define GET_SMSTATE(type, buf, offset) \ -#define GET_SMSTATE(type, buf, offset) \
- (*(type *)((buf) + (offset) - 0x7e00)) - (*(type *)((buf) + (offset) - 0x7e00))
- -
int kvm_cpu_dirty_log_size(void); -#define PUT_SMSTATE(type, buf, offset, val) \
- *(type *)((buf) + (offset) - 0x7e00) = val
-
#ifdef CONFIG_KVM_SMM
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 688315d1dfabd1..7ca5e06878e19a 100644 index 74f390077a1e..44d6a2240e9e 100644
--- a/arch/x86/kvm/svm/svm.c --- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c
@@ -4439,15 +4439,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) @@ -4443,15 +4443,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
struct kvm_host_map map_save; struct kvm_host_map map_save;
int ret; int ret;
@ -47,15 +43,15 @@ index 688315d1dfabd1..7ca5e06878e19a 100644
return 0; return 0;
- /* FED8h - SVM Guest */ - /* FED8h - SVM Guest */
- put_smstate(u64, smstate, 0x7ed8, 1); - PUT_SMSTATE(u64, smstate, 0x7ed8, 1);
- /* FEE0h - SVM Guest VMCB Physical Address */ - /* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); - PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+ smram->smram64.svm_guest_flag = 1; + smram->smram64.svm_guest_flag = 1;
+ smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; + smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4486,28 +4482,23 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) @@ -4489,28 +4485,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_host_map map, map_save; struct kvm_host_map map, map_save;
@ -64,13 +60,14 @@ index 688315d1dfabd1..7ca5e06878e19a 100644
int ret; int ret;
- const char *smstate = (const char *)smram; - const char *smstate = (const char *)smram;
- + const struct kvm_smram_state_64 *smram64 = &smram->smram64;
if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
return 0; return 0;
/* Non-zero if SMI arrived while vCPU was in guest mode. */ /* Non-zero if SMI arrived while vCPU was in guest mode. */
- if (!GET_SMSTATE(u64, smstate, 0x7ed8)) - if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+ if (!smram->smram64.svm_guest_flag) + if (!smram64->svm_guest_flag)
return 0; return 0;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@ -78,21 +75,24 @@ index 688315d1dfabd1..7ca5e06878e19a 100644
- saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); - saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
- if (!(saved_efer & EFER_SVME)) - if (!(saved_efer & EFER_SVME))
+ if (!(smram->smram64.efer & EFER_SVME)) + if (!(smram64->efer & EFER_SVME))
return 1; return 1;
- vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); - vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram->smram64.svm_guest_vmcb_gpa), &map) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map))
return 1; return 1;
ret = 1; ret = 1;
@@ -4533,7 +4524,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) @@ -4536,7 +4529,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
vmcb12 = map.hva; vmcb12 = map.hva;
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
- ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+ ret = enter_svm_guest_mode(vcpu, smram->smram64.svm_guest_vmcb_gpa, vmcb12, false); + ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
if (ret) if (ret)
goto unmap_save; goto unmap_save;
--
2.38.1

View file

@ -1,6 +1,6 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From 61fe0ac44f8d9714accad28bab0179d85f969b23 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com> From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 3 Aug 2022 18:50:10 +0300 Date: Tue, 25 Oct 2022 15:47:40 +0300
Subject: [PATCH] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not Subject: [PATCH] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not
long mode capable long mode capable
@ -13,23 +13,21 @@ long mode capable - such VM can still run 32 bit guests in compatibility
mode. mode.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
--- ---
arch/x86/kvm/svm/svm.c | 9 +++++++++ arch/x86/kvm/svm/svm.c | 8 ++++++++
1 file changed, 9 insertions(+) 1 file changed, 8 insertions(+)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 742497b1d4c3..938b9b24f0ee 100644 index 44d6a2240e9e..9f2e3fe3dcd7 100644
--- a/arch/x86/kvm/svm/svm.c --- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c
@@ -4311,6 +4311,15 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) @@ -4446,6 +4446,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
if (!is_guest_mode(vcpu)) if (!is_guest_mode(vcpu))
return 0; return 0;
+ /* + /*
+ * 32 bit SMRAM format doesn't preserve EFER and SVM state. + * 32-bit SMRAM format doesn't preserve EFER and SVM state. Userspace is
+ * SVM should not be enabled by the userspace without marking + * responsible for ensuring nested SVM and SMIs are mutually exclusive.
+ * the CPU as at least long mode capable.
+ */ + */
+ +
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
@ -38,3 +36,6 @@ index 742497b1d4c3..938b9b24f0ee 100644
smram->smram64.svm_guest_flag = 1; smram->smram64.svm_guest_flag = 1;
smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
--
2.38.1

View file

@ -0,0 +1,138 @@
From 810253988e9e317d6e576ebe608a5454f274b8fc Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:41 +0300
Subject: [PATCH] KVM: x86: smm: preserve interrupt shadow in SMRAM
When #SMI is asserted, the CPU can be in interrupt shadow due to sti or
mov ss.
It is not mandatory in Intel/AMD prm to have the #SMI blocked during the
shadow, and on top of that, since neither SVM nor VMX has true support
for SMI window, waiting for one instruction would mean single stepping
the guest.
Instead, allow #SMI in this case, but both reset the interrupt window and
stash its value in SMRAM to restore it on exit from SMM.
This fixes rare failures seen mostly on windows guests on VMX, when #SMI
falls on the sti instruction which mainfest in VM entry failure due
to EFLAGS.IF not being set, but STI interrupt window still being set
in the VMCS.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
arch/x86/kvm/smm.c | 24 +++++++++++++++++++++---
arch/x86/kvm/smm.h | 5 +++--
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 82761384a866..46d2656937a7 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -21,6 +21,7 @@ static void check_smram_offsets(void)
CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
CHECK_SMRAM32_OFFSET(reserved2, 0xFF00);
+ CHECK_SMRAM32_OFFSET(int_shadow, 0xFF10);
CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
CHECK_SMRAM32_OFFSET(reserved3, 0xFF18);
CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
@@ -65,7 +66,7 @@ static void check_smram_offsets(void)
CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
- CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
+ CHECK_SMRAM64_OFFSET(int_shadow, 0xFEC4);
CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
CHECK_SMRAM64_OFFSET(reserved2, 0xFECA);
@@ -212,6 +213,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
smram->cr4 = kvm_read_cr4(vcpu);
smram->smm_revision = 0x00020000;
smram->smbase = vcpu->arch.smbase;
+
+ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
}
#ifdef CONFIG_X86_64
@@ -261,6 +264,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
+
+ smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
}
#endif
@@ -306,6 +311,8 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0x8000);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
static_call(kvm_x86_set_cr0)(vcpu, cr0);
vcpu->arch.cr0 = cr0;
@@ -453,7 +460,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
struct desc_ptr dt;
- int i;
+ int i, r;
ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
ctxt->_eip = smstate->eip;
@@ -487,8 +494,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
vcpu->arch.smbase = smstate->smbase;
- return rsm_enter_protected_mode(vcpu, smstate->cr0,
+ r = rsm_enter_protected_mode(vcpu, smstate->cr0,
smstate->cr3, smstate->cr4);
+
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+ ctxt->interruptibility = (u8)smstate->int_shadow;
+
+ return r;
}
#ifdef CONFIG_X86_64
@@ -539,6 +554,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+ ctxt->interruptibility = (u8)smstate->int_shadow;
+
return X86EMUL_CONTINUE;
}
#endif
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 8d96bff3f4d5..2eaec53bcc95 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -19,7 +19,8 @@ struct kvm_smram_state_32 {
u32 reserved1[62];
u32 smbase;
u32 smm_revision;
- u32 reserved2[5];
+ u32 reserved2[4];
+ u32 int_shadow; /* KVM extension */
u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */
u32 reserved3[5];
@@ -86,7 +87,7 @@ struct kvm_smram_state_64 {
u64 io_restart_rsi;
u64 io_restart_rdi;
u32 io_restart_dword;
- u32 reserved1;
+ u32 int_shadow;
u8 io_inst_restart;
u8 auto_hlt_restart;
u8 reserved2[6];
--
2.38.1

View file

@ -8,24 +8,27 @@ pve/0002-bridge-keep-MAC-of-first-assigned-port.patch
pve/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch pve/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch
pve/0004-kvm-disable-default-dynamic-halt-polling-growth.patch pve/0004-kvm-disable-default-dynamic-halt-polling-growth.patch
pve/0005-net-core-downgrade-unregister_netdevice-refcount-lea.patch pve/0005-net-core-downgrade-unregister_netdevice-refcount-lea.patch
# pve/0006-Revert-PCI-Coalesce-host-bridge-contiguous-apertures.patch # Already in mainline (v5.16) # pve/0007-Ubuntu-remove-leftover-reference-to-ubuntu-hio-drive.patch # Not in Ubuntu Mainline
# pve/0007-PCI-Reinstate-PCI-Coalesce-host-bridge-contiguous-ap.patch # Already in mainline (v5.16) pve/0008-KVM-x86-start-moving-SMM-related-functions-to-new-fi.patch
pve/0008-do-not-generate-split-BTF-type-info-per-default.patch pve/0009-KVM-x86-move-SMM-entry-to-a-new-file.patch
# pve/0009-blk-cgroup-always-terminate-io.stat-lines.patch # Already in mainline (v5.17) pve/0010-KVM-x86-move-SMM-exit-to-a-new-file.patch
# pve/0010-drivers-firmware-Don-t-mark-as-busy-the-simple-frame.patch # Already in mainline (v5.18) pve/0011-KVM-x86-do-not-go-through-ctxt-ops-when-emulating-rs.patch
# pve/0011-drm-simpledrm-Request-memory-region-in-driver.patch # Already in mainline (v5.18) pve/0012-KVM-allow-compiling-out-SMM-support.patch
# pve/0012-fbdev-simplefb-Request-memory-region-in-driver.patch # Already in mainline (v5.18) pve/0013-KVM-x86-compile-out-vendor-specific-code-if-SMM-is-d.patch
# pve/0013-NFSv4.1-provide-mount-option-to-toggle-trunking-disc.patch # Already in mainline (v5.18) pve/0014-KVM-x86-remove-SMRAM-address-space-if-SMM-is-not-sup.patch
# pve/0014-EDAC-amd64-Add-PCI-device-IDs-for-family-19h-model-5.patch # Already in mainline (v5.17) pve/0015-KVM-x86-do-not-define-KVM_REQ_SMI-if-SMM-disabled.patch
pve/0015-bug-introduce-ASSERT_STRUCT_OFFSET.patch pve/0016-bug-introduce-ASSERT_STRUCT_OFFSET.patch
pve/0016-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch pve/0017-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch
pve/0017-KVM-x86-emulator-introduce-emulator_recalc_and_set_m.patch pve/0018-KVM-x86-emulator-introduce-emulator_recalc_and_set_m.patch
pve/0018-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch pve/0019-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch
pve/0019-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch pve/0020-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch
pve/0020-KVM-x86-emulator-smm-add-structs-for-KVM-s-smram-lay.patch pve/0021-KVM-x86-smm-number-of-GPRs-in-the-SMRAM-image-depend.patch
pve/0021-KVM-x86-emulator-smm-use-smram-structs-in-the-common.patch pve/0022-KVM-x86-smm-check-for-failures-on-smm-entry.patch
pve/0022-KVM-x86-emulator-smm-use-smram-struct-for-32-bit-smr.patch pve/0023-KVM-x86-smm-add-structs-for-KVM-s-smram-layout.patch
pve/0023-KVM-x86-emulator-smm-use-smram-struct-for-64-bit-smr.patch pve/0024-KVM-x86-smm-use-smram-structs-in-the-common-code.patch
pve/0024-KVM-x86-SVM-use-smram-structs.patch pve/0025-KVM-x86-smm-use-smram-struct-for-32-bit-smram-load-r.patch
pve/0025-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch pve/0026-KVM-x86-smm-use-smram-struct-for-64-bit-smram-load-r.patch
pve/0026-KVM-x86-emulator-smm-preserve-interrupt-shadow-in-SM.patch pve/0027-KVM-svm-drop-explicit-return-value-of-kvm_vcpu_map.patch
pve/0028-KVM-x86-SVM-use-smram-structs.patch
pve/0029-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch
pve/0030-KVM-x86-smm-preserve-interrupt-shadow-in-SMRAM.patch

2
linux

@ -1 +1 @@
Subproject commit 3d7cb6b04c3f3115719235cc6866b10326de34cd Subproject commit 4fe89d07dcc2804c8b562f6c7896a45643d34b2f

2
zfs

@ -1 +1 @@
Subproject commit 6c3c5fcfbe27d9193cd131753cc7e47ee2784621 Subproject commit 6a6bd493988c75331deab06e5352a9bed035a87d