Merge 1f70935f63 ("Merge tag 'armsoc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc") into android-mainline

Steps on the way to 5.10-rc1

Resolves conflicts in:
	Documentation/admin-guide/sysctl/vm.rst

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ic58f28718f28dae42948c935dfb0c62122fe86fc
This commit is contained in:
Greg Kroah-Hartman 2020-10-26 11:39:46 +01:00
commit 79c83f152b
419 changed files with 14310 additions and 7724 deletions

View file

@ -61,43 +61,46 @@ will lead to quite erratic information inside ``/proc/stat``::
static volatile sig_atomic_t stop;
static void sighandler (int signr)
static void sighandler(int signr)
{
(void) signr;
stop = 1;
(void) signr;
stop = 1;
}
static unsigned long hog (unsigned long niters)
{
stop = 0;
while (!stop && --niters);
return niters;
stop = 0;
while (!stop && --niters);
return niters;
}
int main (void)
{
int i;
struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
.it_value = { .tv_sec = 0, .tv_usec = 1 } };
sigset_t set;
unsigned long v[HIST];
double tmp = 0.0;
unsigned long n;
signal (SIGALRM, &sighandler);
setitimer (ITIMER_REAL, &it, NULL);
int i;
struct itimerval it = {
.it_interval = { .tv_sec = 0, .tv_usec = 1 },
.it_value = { .tv_sec = 0, .tv_usec = 1 } };
sigset_t set;
unsigned long v[HIST];
double tmp = 0.0;
unsigned long n;
signal(SIGALRM, &sighandler);
setitimer(ITIMER_REAL, &it, NULL);
hog (ULONG_MAX);
for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
for (i = 0; i < HIST; ++i) tmp += v[i];
tmp /= HIST;
n = tmp - (tmp / 3.0);
hog (ULONG_MAX);
for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog(ULONG_MAX);
for (i = 0; i < HIST; ++i) tmp += v[i];
tmp /= HIST;
n = tmp - (tmp / 3.0);
sigemptyset (&set);
sigaddset (&set, SIGALRM);
sigemptyset(&set);
sigaddset(&set, SIGALRM);
for (;;) {
hog (n);
sigwait (&set, &i);
}
return 0;
for (;;) {
hog(n);
sigwait(&set, &i);
}
return 0;
}

View file

@ -27,6 +27,7 @@ Currently, these files are in /proc/sys/vm:
- admin_reserve_kbytes
- block_dump
- compact_memory
- compaction_proactiveness
- compact_unevictable_allowed
- dirty_background_bytes
- dirty_background_ratio
@ -38,6 +39,7 @@ Currently, these files are in /proc/sys/vm:
- drop_caches
- extfrag_threshold
- extra_free_kbytes
- highmem_is_dirtyable
- hugetlb_shm_group
- laptop_mode
- legacy_va_layout

View file

@ -1,3 +1,5 @@
.. _hugetlbpage_index:
====================
HugeTLBpage on ARM64
====================

View file

@ -0,0 +1,121 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
# Copyright 2019-2020 Artur Rojek
%YAML 1.2
---
$id: "http://devicetree.org/schemas/input/adc-joystick.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: ADC attached joystick
maintainers:
- Artur Rojek <contact@artur-rojek.eu>
description: >
Bindings for joystick devices connected to ADC controllers supporting
the Industrial I/O subsystem.
properties:
compatible:
const: adc-joystick
io-channels:
minItems: 1
maxItems: 1024
description: >
List of phandle and IIO specifier pairs.
Each pair defines one ADC channel to which a joystick axis is connected.
See Documentation/devicetree/bindings/iio/iio-bindings.txt for details.
'#address-cells':
const: 1
'#size-cells':
const: 0
required:
- compatible
- io-channels
- '#address-cells'
- '#size-cells'
additionalProperties: false
patternProperties:
"^axis@[0-9a-f]+$":
type: object
description: >
Represents a joystick axis bound to the given ADC channel.
For each entry in the io-channels list, one axis subnode with a matching
reg property must be specified.
properties:
reg:
minimum: 0
maximum: 1023
description: Index of an io-channels list entry bound to this axis.
linux,code:
$ref: /schemas/types.yaml#/definitions/uint32
description: EV_ABS specific event code generated by the axis.
abs-range:
allOf:
- $ref: /schemas/types.yaml#/definitions/uint32-array
- items:
- description: minimum value
- description: maximum value
description: >
Minimum and maximum values produced by the axis.
For an ABS_X axis this will be the left-most and right-most
inclination of the joystick. If min > max, it is left to userspace to
treat the axis as inverted.
This property is interpreted as two signed 32 bit values.
abs-fuzz:
$ref: /schemas/types.yaml#/definitions/uint32
description: >
Amount of noise in the input value.
Omitting this property indicates the axis is precise.
abs-flat:
$ref: /schemas/types.yaml#/definitions/uint32
description: >
Axial "deadzone", or area around the center position, where the axis
is considered to be at rest.
Omitting this property indicates the axis always returns to exactly
the center position.
required:
- reg
- linux,code
- abs-range
additionalProperties: false
examples:
- |
#include <dt-bindings/iio/adc/ingenic,adc.h>
#include <dt-bindings/input/input.h>
joystick: adc-joystick {
compatible = "adc-joystick";
io-channels = <&adc INGENIC_ADC_TOUCH_XP>,
<&adc INGENIC_ADC_TOUCH_YP>;
#address-cells = <1>;
#size-cells = <0>;
axis@0 {
reg = <0>;
linux,code = <ABS_X>;
abs-range = <3300 0>;
abs-fuzz = <4>;
abs-flat = <200>;
};
axis@1 {
reg = <1>;
linux,code = <ABS_Y>;
abs-range = <0 3300>;
abs-fuzz = <4>;
abs-flat = <200>;
};
};

View file

@ -0,0 +1,40 @@
Device tree bindings for Zinitx BT541 touchscreen controller
Required properties:
- compatible : Should be "zinitix,bt541"
- reg : I2C address of the chip. Should be 0x20
- interrupts : Interrupt to which the chip is connected
Optional properties:
- vdd-supply : Analog power supply regulator on VCCA pin
- vddo-supply : Digital power supply regulator on VDD pin
- zinitix,mode : Mode of reporting touch points. Some modes may not work
with a particular ts firmware for unknown reasons. Available
modes are 1 and 2. Mode 2 is the default and preferred.
The touchscreen-* properties are documented in touchscreen.txt in this
directory.
Example:
i2c@00000000 {
/* ... */
bt541@20 {
compatible = "zinitix,bt541";
reg = <0x20>;
interrupt-parent = <&msmgpio>;
interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
pinctrl-names = "default";
pinctrl-0 = <&tsp_default>;
vdd-supply = <&reg_vdd_tsp>;
vddo-supply = <&pm8916_l6>;
touchscreen-size-x = <540>;
touchscreen-size-y = <960>;
zinitix,mode = <2>;
};
/* ... */
};

View file

@ -1222,6 +1222,8 @@ patternProperties:
description: Shenzhen Zidoo Technology Co., Ltd.
"^zii,.*":
description: Zodiac Inflight Innovations
"^zinitix,.*":
description: Zinitix Co., Ltd
"^zte,.*":
description: ZTE Corp.
"^zyxel,.*":

View file

@ -77,7 +77,6 @@ available subsections can be seen below.
console
dcdbas
eisa
ipmb
isa
isapnp
io-mapping

View file

@ -1,16 +1,19 @@
===============
Provoke crashes
===============
.. SPDX-License-Identifier: GPL-2.0
The lkdtm module provides an interface to crash or injure the kernel at
predefined crashpoints to evaluate the reliability of crash dumps obtained
using different dumping solutions. The module uses KPROBEs to instrument
crashing points, but can also crash the kernel directly without KRPOBE
support.
============================================================
Provoking crashes with Linux Kernel Dump Test Module (LKDTM)
============================================================
The lkdtm module provides an interface to disrupt (and usually crash)
the kernel at predefined code locations to evaluate the reliability of
the kernel's exception handling and to test crash dumps obtained using
different dumping solutions. The module uses KPROBEs to instrument the
trigger location, but can also trigger the kernel directly without KPROBE
support via debugfs.
You can provide the way either through module arguments when inserting
the module, or through a debugfs interface.
You can select the location of the trigger ("crash point name") and the
type of action ("crash point type") either through module arguments when
inserting the module, or through the debugfs interface.
Usage::
@ -18,31 +21,38 @@ Usage::
[cpoint_count={>0}]
recur_count
Recursion level for the stack overflow test. Default is 10.
Recursion level for the stack overflow test. By default this is
dynamically calculated based on kernel configuration, with the
goal of being just large enough to exhaust the kernel stack. The
value can be seen at `/sys/module/lkdtm/parameters/recur_count`.
cpoint_name
Crash point where the kernel is to be crashed. It can be
Where in the kernel to trigger the action. It can be
one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
IDE_CORE_CP, DIRECT
IDE_CORE_CP, or DIRECT
cpoint_type
Indicates the action to be taken on hitting the crash point.
It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW,
CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION,
WRITE_AFTER_FREE,
These are numerous, and best queried directly from debugfs. Some
of the common ones are PANIC, BUG, EXCEPTION, LOOP, and OVERFLOW.
See the contents of `/sys/kernel/debug/provoke-crash/DIRECT` for
a complete list.
cpoint_count
Indicates the number of times the crash point is to be hit
to trigger an action. The default is 10.
before triggering the action. The default is 10 (except for
DIRECT, which always fires immediately).
You can also induce failures by mounting debugfs and writing the type to
<mountpoint>/provoke-crash/<crashpoint>. E.g.::
<debugfs>/provoke-crash/<crashpoint>. E.g.::
mount -t debugfs debugfs /mnt
echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY
mount -t debugfs debugfs /sys/kernel/debug
echo EXCEPTION > /sys/kernel/debug/provoke-crash/INT_HARDWARE_ENTRY
The special file `DIRECT` will induce the action directly without KPROBE
instrumentation. This mode is the only one available when the module is
built for a kernel without KPROBEs support::
A special file is `DIRECT` which will induce the crash directly without
KPROBE instrumentation. This mode is the only one available when the module
is built on a kernel without KPROBEs support.
# Instead of having a BUG kill your shell, have it kill "cat":
cat <(echo WRITE_RO) >/sys/kernel/debug/provoke-crash/DIRECT

View file

@ -81,7 +81,7 @@ C. Boot options
1. fbcon=font:<name>
Select the initial font to use. The value 'name' can be any of the
compiled-in fonts: 10x18, 6x10, 7x14, Acorn8x8, MINI4x6,
compiled-in fonts: 10x18, 6x10, 6x8, 7x14, Acorn8x8, MINI4x6,
PEARL8x8, ProFont6x11, SUN12x22, SUN8x16, TER16x32, VGA8x16, VGA8x8.
Note, not all drivers can handle font with widths not divisible by 8,

View file

@ -109,7 +109,7 @@ More reading
This documentation is based on the LWN.net articles [1]_\ [2]_. Rafal Milecki
also made a very complete and comprehensive summary called "A state of network
acceleration" that describes how things were before this infrastructure was
mailined [3]_ and it also makes a rough summary of this work [4]_.
mainlined [3]_ and it also makes a rough summary of this work [4]_.
.. [1] https://lwn.net/Articles/738214/
.. [2] https://lwn.net/Articles/742164/

View file

@ -106,23 +106,29 @@ NUL or newline terminated.
strcpy()
--------
strcpy() performs no bounds checking on the destination
buffer. This could result in linear overflows beyond the
end of the buffer, leading to all kinds of misbehaviors. While
`CONFIG_FORTIFY_SOURCE=y` and various compiler flags help reduce the
risk of using this function, there is no good reason to add new uses of
this function. The safe replacement is strscpy().
strcpy() performs no bounds checking on the destination buffer. This
could result in linear overflows beyond the end of the buffer, leading to
all kinds of misbehaviors. While `CONFIG_FORTIFY_SOURCE=y` and various
compiler flags help reduce the risk of using this function, there is
no good reason to add new uses of this function. The safe replacement
is strscpy(), though care must be given to any cases where the return
value of strcpy() was used, since strscpy() does not return a pointer to
the destination, but rather a count of non-NUL bytes copied (or negative
errno when it truncates).
strncpy() on NUL-terminated strings
-----------------------------------
Use of strncpy() does not guarantee that the destination buffer
will be NUL terminated. This can lead to various linear read overflows
and other misbehavior due to the missing termination. It also NUL-pads the
destination buffer if the source contents are shorter than the destination
buffer size, which may be a needless performance penalty for callers using
only NUL-terminated strings. The safe replacement is strscpy().
(Users of strscpy() still needing NUL-padding should instead
use strscpy_pad().)
Use of strncpy() does not guarantee that the destination buffer will
be NUL terminated. This can lead to various linear read overflows and
other misbehavior due to the missing termination. It also NUL-pads
the destination buffer if the source contents are shorter than the
destination buffer size, which may be a needless performance penalty
for callers using only NUL-terminated strings. The safe replacement is
strscpy(), though care must be given to any cases where the return value
of strncpy() was used, since strscpy() does not return a pointer to the
destination, but rather a count of non-NUL bytes copied (or negative
errno when it truncates). Any cases still needing NUL-padding should
instead use strscpy_pad().
If a caller is using non-NUL-terminated strings, strncpy() can
still be used, but destinations should be marked with the `__nonstring
@ -131,10 +137,12 @@ attribute to avoid future compiler warnings.
strlcpy()
---------
strlcpy() reads the entire source buffer first, possibly exceeding
the given limit of bytes to copy. This is inefficient and can lead to
linear read overflows if a source string is not NUL-terminated. The
safe replacement is strscpy().
strlcpy() reads the entire source buffer first (since the return value
is meant to match that of strlen()). This read may exceed the destination
size limit. This is both inefficient and can lead to linear read overflows
if a source string is not NUL-terminated. The safe replacement is strscpy(),
though care must be given to any cases where the return value of strlcpy()
is used, since strscpy() will return negative errno values when it truncates.
%p format specifier
-------------------

View file

@ -527,6 +527,13 @@ done on the patch. Reviewed-by: tags, when supplied by reviewers known to
understand the subject area and to perform thorough reviews, will normally
increase the likelihood of your patch getting into the kernel.
Both Tested-by and Reviewed-by tags, once received on mailing list from tester
or reviewer, should be added by author to the applicable patches when sending
next versions. However if the patch has changed substantially in following
version, these tags might not be applicable anymore and thus should be removed.
Usually removal of someone's Tested-by or Reviewed-by tags should be mentioned
in the patch changelog (after the '---' separator).
A Suggested-by: tag indicates that the patch idea is suggested by the person
named and ensures credit to the person for the idea. Please note that this
tag should not be added without the reporter's permission, especially if the

View file

@ -0,0 +1,45 @@
.. include:: ../disclaimer-zh_CN.rst
:Original: :ref:`Documentation/arm64/hugetlbpage.rst <hugetlbpage_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
=====================
ARM64中的 HugeTLBpage
=====================
大页依靠有效利用 TLBs 来提高地址翻译的性能。这取决于以下
两点 -
- 大页的大小
- TLBs 支持的条目大小
ARM64 接口支持2种大页方式。
1) pud/pmd 级别的块映射
-----------------------
这是常规大页,他们的 pmd 或 pud 页面表条目指向一个内存块。
不管 TLB 中支持的条目大小如何,块映射可以减少翻译大页地址
所需遍历的页表深度。
2) 使用连续位
-------------
架构中转换页表条目(D4.5.3, ARM DDI 0487C.a)中提供一个连续
位告诉 MMU 这个条目是一个连续条目集的一员,它可以被缓存在单
个 TLB 条目中。
在 Linux 中连续位用来增加 pmd 和 pte(最后一级)级别映射的大
小。受支持的连续页表条目数量因页面大小和页表级别而异。
支持以下大页尺寸配置 -
====== ======== ==== ======== ===
- CONT PTE PMD CONT PMD PUD
====== ======== ==== ======== ===
4K: 64K 2M 32M 1G
16K: 2M 32M 1G
64K: 2M 512M 16G
====== ======== ==== ======== ===

View file

@ -14,3 +14,4 @@ ARM64 架构
:maxdepth: 2
amu
hugetlbpage

View file

@ -4498,11 +4498,14 @@ Currently, the following list of CPUID leaves are returned:
- HYPERV_CPUID_ENLIGHTMENT_INFO
- HYPERV_CPUID_IMPLEMENT_LIMITS
- HYPERV_CPUID_NESTED_FEATURES
- HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS
- HYPERV_CPUID_SYNDBG_INTERFACE
- HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure
with the 'nent' field indicating the number of entries in the variable-size
array 'entries'. If the number of entries is too low to describe all Hyper-V
feature leaves, an error (E2BIG) is returned. If the number is more or equal
@ -4704,6 +4707,106 @@ KVM_PV_VM_VERIFY
Verify the integrity of the unpacked image. Only if this succeeds,
KVM is allowed to start protected VCPUs.
4.126 KVM_X86_SET_MSR_FILTER
----------------------------
:Capability: KVM_X86_SET_MSR_FILTER
:Architectures: x86
:Type: vm ioctl
:Parameters: struct kvm_msr_filter
:Returns: 0 on success, < 0 on error
::
struct kvm_msr_filter_range {
#define KVM_MSR_FILTER_READ (1 << 0)
#define KVM_MSR_FILTER_WRITE (1 << 1)
__u32 flags;
__u32 nmsrs; /* number of msrs in bitmap */
__u32 base; /* MSR index the bitmap starts at */
__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
};
#define KVM_MSR_FILTER_MAX_RANGES 16
struct kvm_msr_filter {
#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
__u32 flags;
struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
};
flags values for ``struct kvm_msr_filter_range``:
``KVM_MSR_FILTER_READ``
Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
indicates that a read should immediately fail, while a 1 indicates that
a read for a particular MSR should be handled regardless of the default
filter action.
``KVM_MSR_FILTER_WRITE``
Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
indicates that a write should immediately fail, while a 1 indicates that
a write for a particular MSR should be handled regardless of the default
filter action.
``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
Filter both read and write accesses to MSRs using the given bitmap. A 0
in the bitmap indicates that both reads and writes should immediately fail,
while a 1 indicates that reads and writes for a particular MSR are not
filtered by this range.
flags values for ``struct kvm_msr_filter``:
``KVM_MSR_FILTER_DEFAULT_ALLOW``
If no filter range matches an MSR index that is getting accessed, KVM will
fall back to allowing access to the MSR.
``KVM_MSR_FILTER_DEFAULT_DENY``
If no filter range matches an MSR index that is getting accessed, KVM will
fall back to rejecting access to the MSR. In this mode, all MSRs that should
be processed by KVM need to explicitly be marked as allowed in the bitmaps.
This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
specify whether a certain MSR access should be explicitly filtered for or not.
If this ioctl has never been invoked, MSR accesses are not guarded and the
default KVM in-kernel emulation behavior is fully preserved.
Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
an error.
As soon as the filtering is in place, every MSR access is processed through
the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
register.
If a bit is within one of the defined ranges, read and write accesses are
guarded by the bitmap's value for the MSR index if the kind of access
is included in the ``struct kvm_msr_filter_range`` flags. If no range
cover this particular access, the behavior is determined by the flags
field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
and ``KVM_MSR_FILTER_DEFAULT_DENY``.
Each bitmap range specifies a range of MSRs to potentially allow access on.
The range goes from MSR index [base .. base+nmsrs]. The flags field
indicates whether reads, writes or both reads and writes are filtered
by setting a 1 bit in the bitmap for the corresponding MSR index.
If an MSR access is not permitted through the filtering, it generates a
#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
allows user space to deflect and potentially handle various MSR accesses
into user space.
If a vCPU is in running state while this ioctl is invoked, the vCPU may
experience inconsistent filtering behavior on MSR accesses.
5. The kvm_run structure
========================
@ -4869,14 +4972,13 @@ to the byte array.
.. note::
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
KVM_EXIT_EPR the corresponding
operations are complete (and guest state is consistent) only after userspace
has re-entered the kernel with KVM_RUN. The kernel side will first finish
incomplete operations and then check for pending signals. Userspace
can re-enter the guest with an unmasked signal pending to complete
pending operations.
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR,
KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
operations are complete (and guest state is consistent) only after userspace
has re-entered the kernel with KVM_RUN. The kernel side will first finish
incomplete operations and then check for pending signals. Userspace
can re-enter the guest with an unmasked signal pending to complete
pending operations.
::
@ -5163,6 +5265,44 @@ Note that KVM does not skip the faulting instruction as it does for
KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
if it decides to decode and emulate the instruction.
::
/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
struct {
__u8 error; /* user -> kernel */
__u8 pad[7];
__u32 reason; /* kernel -> user */
__u32 index; /* kernel -> user */
__u64 data; /* kernel <-> user */
} msr;
Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is
enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code
will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
exit for writes.
The "reason" field specifies why the MSR trap occurred. User space will only
receive MSR exit traps when a particular reason was requested during through
ENABLE_CAP. Currently valid exit reasons are:
KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER
For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest
wants to read. To respond to this request with a successful read, user space
writes the respective data into the "data" field and must continue guest
execution to ensure the read data is transferred into guest register state.
If the RDMSR request was unsuccessful, user space indicates that with a "1" in
the "error" field. This will inject a #GP into the guest when the VCPU is
executed again.
For KVM_EXIT_X86_WRMSR, the "index" field tells user space which MSR the guest
wants to write. Once finished processing the event, user space must continue
vCPU execution. If the MSR write was unsuccessful, user space also sets the
"error" field to "1".
::
/* Fix the size of the union. */
@ -5852,6 +5992,28 @@ controlled by the kvm module parameter halt_poll_ns. This capability allows
the maximum halt time to specified on a per-VM basis, effectively overriding
the module parameter for the target VM.
7.21 KVM_CAP_X86_USER_SPACE_MSR
-------------------------------
:Architectures: x86
:Target: VM
:Parameters: args[0] contains the mask of KVM_MSR_EXIT_REASON_* events to report
:Returns: 0 on success; -1 on error
This capability enables trapping of #GP invoking RDMSR and WRMSR instructions
into user space.
When a guest requests to read or write an MSR, KVM may not implement all MSRs
that are relevant to a respective system. It also does not differentiate by
CPU type.
To allow more fine grained control over MSR handling, user space may enable
this capability. With it enabled, MSR accesses that match the mask specified in
args[0] and trigger a #GP event inside the guest by KVM will instead trigger
KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
can then handle to implement model specific MSR handling and/or user notifications
to inform a user that an MSR was not handled.
8. Other capabilities.
======================
@ -6193,3 +6355,39 @@ distribution...)
If this capability is available, then the CPNC and CPVC can be synchronized
between KVM and userspace via the sync regs mechanism (KVM_SYNC_DIAG318).
8.26 KVM_CAP_X86_USER_SPACE_MSR
-------------------------------
:Architectures: x86
This capability indicates that KVM supports deflection of MSR reads and
writes to user space. It can be enabled on a VM level. If enabled, MSR
accesses that would usually trigger a #GP by KVM into the guest will
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
KVM_EXIT_X86_WRMSR exit notifications.
8.25 KVM_X86_SET_MSR_FILTER
---------------------------
:Architectures: x86
This capability indicates that KVM supports that accesses to user defined MSRs
may be rejected. With this capability exposed, KVM exports new VM ioctl
KVM_X86_SET_MSR_FILTER which user space can call to specify bitmaps of MSR
ranges that KVM should reject access to.
In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
8.26 KVM_CAP_ENFORCE_PV_CPUID
-----------------------------
Architectures: x86
When enabled, KVM will disable paravirtual features provided to the
guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
(0x40000001). Otherwise, a guest may use the paravirtual features
regardless of what has actually been exposed through the CPUID leaf.

View file

@ -38,64 +38,64 @@ returns::
where ``flag`` is defined as below:
================================= =========== ================================
flag value meaning
================================= =========== ================================
KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs
0x11 and 0x12
================================== =========== ================================
flag value meaning
================================== =========== ================================
KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs
0x11 and 0x12
KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
on PIO operations
KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
on PIO operations
KVM_FEATURE_MMU_OP 2 deprecated
KVM_FEATURE_MMU_OP 2 deprecated
KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
0x4b564d00 and 0x4b564d01
KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
0x4b564d00 and 0x4b564d01
KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
writing to msr 0x4b564d02
KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
writing to msr 0x4b564d02
KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by
writing to msr 0x4b564d03
KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by
writing to msr 0x4b564d03
KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt
handler can be enabled by
writing to msr 0x4b564d04
KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt
handler can be enabled by
writing to msr 0x4b564d04
KVM_FEATURE_PV_UNHAULT 7 guest checks this feature bit
before enabling paravirtualized
spinlock support
KVM_FEATURE_PV_UNHALT 7 guest checks this feature bit
before enabling paravirtualized
spinlock support
KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit
before enabling paravirtualized
tlb flush
KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit
before enabling paravirtualized
tlb flush
KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT
can be enabled by setting bit 2
when writing to msr 0x4b564d02
KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT
can be enabled by setting bit 2
when writing to msr 0x4b564d02
KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit
before enabling paravirtualized
sebd IPIs
KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit
before enabling paravirtualized
send IPIs
KVM_FEATURE_POLL_CONTROL 12 host-side polling on HLT can
be disabled by writing
to msr 0x4b564d05.
KVM_FEATURE_POLL_CONTROL 12 host-side polling on HLT can
be disabled by writing
to msr 0x4b564d05.
KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
before using paravirtualized
sched yield.
KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
before using paravirtualized
sched yield.
KVM_FEATURE_ASYNC_PF_INT 14 guest checks this feature bit
before using the second async
pf control msr 0x4b564d06 and
async pf acknowledgment msr
0x4b564d07.
KVM_FEATURE_ASYNC_PF_INT 14 guest checks this feature bit
before using the second async
pf control msr 0x4b564d06 and
async pf acknowledgment msr
0x4b564d07.
KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
per-cpu warps are expeced in
kvmclock
================================= =========== ================================
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 host will warn if no guest-side
per-cpu warps are expected in
kvmclock
================================== =========== ================================
::

View file

@ -25,8 +25,10 @@ Returns:
======= ========================================================
-EBUSY The PMU overflow interrupt is already set
-ENXIO The overflow interrupt not set when attempting to get it
-ENODEV PMUv3 not supported
-EFAULT Error reading interrupt number
-ENXIO PMUv3 not supported or the overflow interrupt not set
when attempting to get it
-ENODEV KVM_ARM_VCPU_PMU_V3 feature missing from VCPU
-EINVAL Invalid PMU overflow interrupt number supplied or
trying to set the IRQ number without using an in-kernel
irqchip.
@ -45,9 +47,10 @@ all vcpus, while as an SPI it must be a separate number per vcpu.
Returns:
======= ======================================================
-EEXIST Interrupt number already used
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-ENXIO PMUv3 not supported, missing VCPU feature or interrupt
number not set
-EBUSY PMUv3 already initialized
======= ======================================================
@ -55,6 +58,52 @@ Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
virtual GIC implementation, this must be done after initializing the in-kernel
irqchip.
1.3 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_FILTER
-----------------------------------------
:Parameters: in kvm_device_attr.addr the address for a PMU event filter is a
pointer to a struct kvm_pmu_event_filter
:Returns:
======= ======================================================
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-EBUSY PMUv3 already initialized
-EINVAL Invalid filter range
======= ======================================================
Request the installation of a PMU event filter described as follows::
struct kvm_pmu_event_filter {
__u16 base_event;
__u16 nevents;
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
__u8 action;
__u8 pad[3];
};
A filter range is defined as the range [@base_event, @base_event + @nevents),
together with an @action (KVM_PMU_EVENT_ALLOW or KVM_PMU_EVENT_DENY). The
first registered range defines the global policy (global ALLOW if the first
@action is DENY, global DENY if the first @action is ALLOW). Multiple ranges
can be programmed, and must fit within the event space defined by the PMU
architecture (10 bits on ARMv8.0, 16 bits from ARMv8.1 onwards).
Note: "Cancelling" a filter by registering the opposite action for the same
range doesn't change the default action. For example, installing an ALLOW
filter for event range [0:10) as the first filter and then applying a DENY
action for the same range will leave the whole range as disabled.
Restrictions: Event 0 (SW_INCR) is never filtered, as it doesn't count a
hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
isn't strictly speaking an event. Filtering the cycle counter is possible
using event 0x11 (CPU_CYCLES).
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
=================================

View file

@ -89,7 +89,7 @@ they are write protected for COW (other case of B apply too).
So here because at time N+2 the clear page table entry was not pair with a
notification to invalidate the secondary TLB, the device see the new value for
addrB before seing the new value for addrA. This break total memory ordering
addrB before seeing the new value for addrA. This break total memory ordering
for the device.
When changing a pte to write protect or to point to a new write protected page

View file

@ -99,7 +99,7 @@ Steps:
2. Ensure that writeback is complete.
3. Lock the new page that we want to move to. It is locked so that accesses to
this (not yet uptodate) page immediately block while the move is in progress.
this (not yet up-to-date) page immediately block while the move is in progress.
4. All the page table references to the page are converted to migration
entries. This decreases the mapcount of a page. If the resulting

View file

@ -18,7 +18,7 @@ Although we already have tracepoint for tracing page allocation/free,
using it for analyzing who allocate each page is rather complex. We need
to enlarge the trace buffer for preventing overlapping until userspace
program launched. And, launched program continually dump out the trace
buffer for later analysis and it would change system behviour with more
buffer for later analysis and it would change system behaviour with more
possibility rather than just keeping it in memory, so bad for debugging.
page owner can also be used for various purposes. For example, accurate

View file

@ -378,7 +378,7 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
can go unnoticed. To deal with that, ``slabinfo-gnuplot.sh`` has two
options to 'zoom-in'/'zoom-out':
a) ``-s %d,%d`` -- overwrites the default image width and heigh
a) ``-s %d,%d`` -- overwrites the default image width and height
b) ``-r %d,%d`` -- specifies a range of samples to use (for example,
in ``slabinfo -X >> FOO_STATS; sleep 1;`` case, using a ``-r
40,60`` range will plot only samples collected between 40th and

View file

@ -19,7 +19,7 @@ Complete virtual memory map with 4-level page tables
Note that as we get closer to the top of the address space, the notation changes
from TB to GB and then MB/KB.
- "16M TB" might look weird at first sight, but it's an easier to visualize size
- "16M TB" might look weird at first sight, but it's an easier way to visualize size
notation than "16 EB", which few will recognize at first sight as 16 exabytes.
It also shows it nicely how incredibly large 64-bit address space is.

View file

@ -4585,6 +4585,14 @@ L: linux-arm-kernel@lists.infradead.org
S: Supported
F: drivers/cpuidle/cpuidle-psci.c
CPUIDLE DRIVER - ARM PSCI PM DOMAIN
M: Ulf Hansson <ulf.hansson@linaro.org>
L: linux-pm@vger.kernel.org
L: linux-arm-kernel@lists.infradead.org
S: Supported
F: drivers/cpuidle/cpuidle-psci.h
F: drivers/cpuidle/cpuidle-psci-domain.c
CRAMFS FILESYSTEM
M: Nicolas Pitre <nico@fluxnic.net>
S: Maintained
@ -5398,11 +5406,11 @@ F: include/linux/debugfs.h
F: include/linux/kobj*
F: lib/kobj*
DRIVERS FOR ADAPTIVE VOLTAGE SCALING (AVS)
DRIVERS FOR OMAP ADAPTIVE VOLTAGE SCALING (AVS)
M: Nishanth Menon <nm@ti.com>
L: linux-pm@vger.kernel.org
S: Maintained
F: drivers/power/avs/
F: drivers/soc/ti/smartreflex.c
F: include/linux/power/smartreflex.h
DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE
@ -14472,7 +14480,7 @@ L: linux-pm@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/power/avs/qcom,cpr.txt
F: drivers/power/avs/qcom-cpr.c
F: drivers/soc/qcom/cpr.c
QUALCOMM CPUFREQ DRIVER MSM8996/APQ8096
M: Ilia Lin <ilia.lin@kernel.org>
@ -16057,6 +16065,13 @@ F: Documentation/fb/sisfb.rst
F: drivers/video/fbdev/sis/
F: include/video/sisfb.h
SIS I2C TOUCHSCREEN DRIVER
M: Mika Penttilä <mika.penttila@nextfour.com>
L: linux-input@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/input/touchscreen/sis_i2c.txt
F: drivers/input/touchscreen/sis_i2c.c
SIS USB2VGA DRIVER
M: Thomas Winischhofer <thomas@winischhofer.net>
S: Maintained
@ -18643,6 +18658,7 @@ VIRTIO MEM DRIVER
M: David Hildenbrand <david@redhat.com>
L: virtualization@lists.linux-foundation.org
S: Maintained
W: https://virtio-mem.gitlab.io/
F: drivers/virtio/virtio_mem.c
F: include/uapi/linux/virtio_mem.h

View file

@ -531,7 +531,6 @@ do_work_pending(struct pt_regs *regs, unsigned long thread_flags,
do_signal(regs, r0, r19);
r0 = 0;
} else {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
}

View file

@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
{
struct arc_reg_pct_build pct_bcr;
struct arc_reg_cc_build cc_bcr;
int i, has_interrupts, irq;
int i, has_interrupts, irq = -1;
int counter_size; /* in bits */
union cc_name {
@ -637,19 +637,28 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
.attr_groups = arc_pmu->attr_groups,
};
if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) {
if (has_interrupts) {
irq = platform_get_irq(pdev, 0);
if (irq >= 0) {
int ret;
arc_pmu->irq = irq;
arc_pmu->irq = irq;
/* intc map function ensures irq_set_percpu_devid() called */
request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
this_cpu_ptr(&arc_pmu_cpu));
/* intc map function ensures irq_set_percpu_devid() called */
ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
this_cpu_ptr(&arc_pmu_cpu));
if (!ret)
on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
else
irq = -1;
}
on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
} else {
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
}
if (irq == -1)
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
/*
* perf parser doesn't really like '-' symbol in events name, so let's
* use '_' in arc pct name as it goes to kernel PMU event prefix.

View file

@ -394,6 +394,6 @@ void do_notify_resume(struct pt_regs *regs)
* ASM glue gaurantees that this is only called when returning to
* user mode
*/
if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}

View file

@ -46,6 +46,7 @@
MATRIX_KEY(0x02, 0x09, KEY_F8)
MATRIX_KEY(0x02, 0x0a, KEY_YEN)
MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)
MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
MATRIX_KEY(0x03, 0x02, KEY_F2)
MATRIX_KEY(0x03, 0x03, KEY_5)

View file

@ -57,7 +57,7 @@
lvds-receiver {
compatible = "ti,ds90cf384a", "lvds-decoder";
powerdown-gpios = <&gpio7 25 GPIO_ACTIVE_LOW>;
power-supply = <&vcc_3v3_tft1>;
ports {
#address-cells = <1>;
@ -81,6 +81,7 @@
panel {
compatible = "edt,etm0700g0dh6";
backlight = <&lcd_backlight>;
power-supply = <&vcc_3v3_tft1>;
port {
panel_in: endpoint {
@ -113,6 +114,17 @@
};
};
vcc_3v3_tft1: regulator-panel {
compatible = "regulator-fixed";
regulator-name = "vcc-3v3-tft1";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
enable-active-high;
startup-delay-us = <500>;
gpio = <&gpio7 25 GPIO_ACTIVE_HIGH>;
};
vcc_sdhi1: regulator-vcc-sdhi1 {
compatible = "regulator-fixed";
@ -207,6 +219,7 @@
reg = <0x38>;
interrupt-parent = <&gpio2>;
interrupts = <12 IRQ_TYPE_EDGE_FALLING>;
vcc-supply = <&vcc_3v3_tft1>;
};
};

View file

@ -223,16 +223,16 @@
};
&reg_dc1sw {
regulator-min-microvolt = <3000000>;
regulator-max-microvolt = <3000000>;
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vcc-gmac-phy";
};
&reg_dcdc1 {
regulator-always-on;
regulator-min-microvolt = <3000000>;
regulator-max-microvolt = <3000000>;
regulator-name = "vcc-3v0";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vcc-3v3";
};
&reg_dcdc2 {

View file

@ -669,7 +669,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
} else if (thread_flags & _TIF_UPROBE) {
uprobe_notify_resume(regs);
} else {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}

View file

@ -174,8 +174,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
*/
if (mpuss_can_lose_context) {
error = cpu_cluster_pm_enter();
if (error)
if (error) {
omap_set_pwrdm_state(mpu_pd, PWRDM_POWER_ON);
goto cpu_cluster_pm_out;
}
}
}

View file

@ -23,7 +23,7 @@ config OMAP_DEBUG_LEDS
config POWER_AVS_OMAP
bool "AVS(Adaptive Voltage Scaling) support for OMAP IP versions 1&2"
depends on POWER_AVS && (ARCH_OMAP3 || ARCH_OMAP4) && PM
depends on (ARCH_OMAP3 || ARCH_OMAP4) && PM
select POWER_SUPPLY
help
Say Y to enable AVS(Adaptive Voltage Scaling)

View file

@ -139,8 +139,7 @@
<GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
<GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "gp",
"gpmmu",
"pp",
@ -151,8 +150,7 @@
"pp2",
"ppmmu2",
"pp3",
"ppmmu3",
"pmu";
"ppmmu3";
clocks = <&ccu CLK_BUS_GPU>, <&ccu CLK_GPU>;
clock-names = "bus", "core";
resets = <&ccu RST_BUS_GPU>;

View file

@ -221,7 +221,12 @@
};
&sdhc1 {
status = "okay";
/* There is an issue with the eMMC causing permanent
* damage to the card if a quirk isn't addressed.
* Until it's fixed, disable the MMC so as not to brick
* devices.
*/
status = "disabled";
/* Downstream pushes 2.95V to the sdhci device,
* but upstream driver REALLY wants to make vmmc 1.8v

View file

@ -44,7 +44,7 @@
gpio-ranges = <&pm660_gpios 0 0 13>;
#gpio-cells = <2>;
interrupt-controller;
interrupt-cells =<2>;
#interrupt-cells = <2>;
};
};
};

View file

@ -518,6 +518,8 @@
<GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
tcsr_mutex_regs: syscon@1f40000 {
@ -749,6 +751,8 @@
<GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 349 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 350 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
lpass_smmu: iommu@5100000 {
@ -778,6 +782,8 @@
<GIC_SPI 225 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 310 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 404 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
spmi_bus: spmi@800f000 {
@ -1074,6 +1080,8 @@
<GIC_SPI 274 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 275 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 276 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
apcs_glb: mailbox@17911000 {

View file

@ -1093,8 +1093,8 @@
qup_opp_table: qup-opp-table {
compatible = "operating-points-v2";
opp-19200000 {
opp-hz = /bits/ 64 <19200000>;
opp-50000000 {
opp-hz = /bits/ 64 <50000000>;
required-opps = <&rpmhpd_opp_min_svs>;
};
@ -1107,6 +1107,11 @@
opp-hz = /bits/ 64 <100000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-128000000 {
opp-hz = /bits/ 64 <128000000>;
required-opps = <&rpmhpd_opp_nom>;
};
};
qupv3_id_0: geniqup@8c0000 {

View file

@ -404,11 +404,12 @@
};
&serdes_ln_ctrl {
idle-states = <SERDES0_LANE0_PCIE0_LANE0>, <SERDES0_LANE1_PCIE0_LANE1>,
<SERDES1_LANE0_PCIE1_LANE0>, <SERDES1_LANE1_PCIE1_LANE1>,
<SERDES2_LANE0_PCIE2_LANE0>, <SERDES2_LANE1_PCIE2_LANE1>,
<SERDES3_LANE0_USB3_0_SWAP>, <SERDES3_LANE1_USB3_0>,
<SERDES4_LANE0_EDP_LANE0>, <SERDES4_LANE1_EDP_LANE1>, <SERDES4_LANE2_EDP_LANE2>, <SERDES4_LANE3_EDP_LANE3>;
idle-states = <J721E_SERDES0_LANE0_PCIE0_LANE0>, <J721E_SERDES0_LANE1_PCIE0_LANE1>,
<J721E_SERDES1_LANE0_PCIE1_LANE0>, <J721E_SERDES1_LANE1_PCIE1_LANE1>,
<J721E_SERDES2_LANE0_PCIE2_LANE0>, <J721E_SERDES2_LANE1_PCIE2_LANE1>,
<J721E_SERDES3_LANE0_USB3_0_SWAP>, <J721E_SERDES3_LANE1_USB3_0>,
<J721E_SERDES4_LANE0_EDP_LANE0>, <J721E_SERDES4_LANE1_EDP_LANE1>,
<J721E_SERDES4_LANE2_EDP_LANE2>, <J721E_SERDES4_LANE3_EDP_LANE3>;
};
&serdes_wiz3 {

View file

@ -6,7 +6,7 @@
*/
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/mux/mux.h>
#include <dt-bindings/mux/mux-j721e-wiz.h>
#include <dt-bindings/mux/ti-serdes.h>
&cbass_main {
msmc_ram: sram@70000000 {
@ -38,11 +38,12 @@
<0x40b0 0x3>, <0x40b4 0x3>, /* SERDES3 lane0/1 select */
<0x40c0 0x3>, <0x40c4 0x3>, <0x40c8 0x3>, <0x40cc 0x3>;
/* SERDES4 lane0/1/2/3 select */
idle-states = <SERDES0_LANE0_PCIE0_LANE0>, <SERDES0_LANE1_PCIE0_LANE1>,
<SERDES1_LANE0_PCIE1_LANE0>, <SERDES1_LANE1_PCIE1_LANE1>,
<SERDES2_LANE0_PCIE2_LANE0>, <SERDES2_LANE1_PCIE2_LANE1>,
<MUX_IDLE_AS_IS>, <SERDES3_LANE1_USB3_0>,
<SERDES4_LANE0_EDP_LANE0>, <SERDES4_LANE1_EDP_LANE1>, <SERDES4_LANE2_EDP_LANE2>, <SERDES4_LANE3_EDP_LANE3>;
idle-states = <J721E_SERDES0_LANE0_PCIE0_LANE0>, <J721E_SERDES0_LANE1_PCIE0_LANE1>,
<J721E_SERDES1_LANE0_PCIE1_LANE0>, <J721E_SERDES1_LANE1_PCIE1_LANE1>,
<J721E_SERDES2_LANE0_PCIE2_LANE0>, <J721E_SERDES2_LANE1_PCIE2_LANE1>,
<MUX_IDLE_AS_IS>, <J721E_SERDES3_LANE1_USB3_0>,
<J721E_SERDES4_LANE0_EDP_LANE0>, <J721E_SERDES4_LANE1_EDP_LANE1>,
<J721E_SERDES4_LANE2_EDP_LANE2>, <J721E_SERDES4_LANE3_EDP_LANE3>;
};
usb_serdes_mux: mux-controller@4000 {

View file

@ -218,6 +218,23 @@ lr .req x30 // link register
str \src, [\tmp, :lo12:\sym]
.endm
/*
* @dst: destination register
*/
#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
.macro this_cpu_offset, dst
mrs \dst, tpidr_el2
.endm
#else
.macro this_cpu_offset, dst
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \dst, tpidr_el1
alternative_else
mrs \dst, tpidr_el2
alternative_endif
.endm
#endif
/*
* @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
* @sym: The name of the per-cpu variable
@ -226,11 +243,7 @@ lr .req x30 // link register
.macro adr_this_cpu, dst, sym, tmp
adrp \tmp, \sym
add \dst, \tmp, #:lo12:\sym
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
alternative_else
mrs \tmp, tpidr_el2
alternative_endif
this_cpu_offset \tmp
add \dst, \dst, \tmp
.endm
@ -241,11 +254,7 @@ alternative_endif
*/
.macro ldr_this_cpu dst, sym, tmp
adr_l \dst, \sym
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
alternative_else
mrs \tmp, tpidr_el2
alternative_endif
this_cpu_offset \tmp
ldr \dst, [\dst, \tmp]
.endm

View file

@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020 Google LLC.
* Written by David Brazdil <dbrazdil@google.com>
*/
#ifndef __ARM64_HYP_IMAGE_H__
#define __ARM64_HYP_IMAGE_H__
/*
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
* to separate it from the kernel proper.
*/
#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
#ifdef LINKER_SCRIPT
/*
* KVM nVHE ELF section names are prefixed with .hyp, to separate them
* from the kernel proper.
*/
#define HYP_SECTION_NAME(NAME) .hyp##NAME
/* Defines an ELF hyp section from input section @NAME and its subsections. */
#define HYP_SECTION(NAME) \
HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) }
/*
* Defines a linker script alias of a kernel-proper symbol referenced by
* KVM nVHE hyp code.
*/
#define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym;
#endif /* LINKER_SCRIPT */
#endif /* __ARM64_HYP_IMAGE_H__ */

View file

@ -7,6 +7,7 @@
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__
#include <asm/hyp_image.h>
#include <asm/virt.h>
#define ARM_EXIT_WITH_SERROR_BIT 31
@ -35,17 +36,34 @@
#define __SMCCC_WORKAROUND_1_SMC_SZ 36
#define KVM_HOST_SMCCC_ID(id) \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_64, \
ARM_SMCCC_OWNER_VENDOR_HYP, \
(id))
#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name)
#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0
#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1
#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5
#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6
#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11
#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
#ifndef __ASSEMBLY__
#include <linux/mm.h>
/*
* Translate name of a symbol defined in nVHE hyp to the name seen
* by kernel proper. All nVHE symbols are prefixed by the build system
* to avoid clashes with the VHE variants.
*/
#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[]
@ -57,10 +75,53 @@
DECLARE_KVM_VHE_SYM(sym); \
DECLARE_KVM_NVHE_SYM(sym)
#define CHOOSE_VHE_SYM(sym) sym
#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym)
#define DECLARE_KVM_VHE_PER_CPU(type, sym) \
DECLARE_PER_CPU(type, sym)
#define DECLARE_KVM_NVHE_PER_CPU(type, sym) \
DECLARE_PER_CPU(type, kvm_nvhe_sym(sym))
#define DECLARE_KVM_HYP_PER_CPU(type, sym) \
DECLARE_KVM_VHE_PER_CPU(type, sym); \
DECLARE_KVM_NVHE_PER_CPU(type, sym)
/*
* Compute pointer to a symbol defined in nVHE percpu region.
* Returns NULL if percpu memory has not been allocated yet.
*/
#define this_cpu_ptr_nvhe_sym(sym) per_cpu_ptr_nvhe_sym(sym, smp_processor_id())
#define per_cpu_ptr_nvhe_sym(sym, cpu) \
({ \
unsigned long base, off; \
base = kvm_arm_hyp_percpu_base[cpu]; \
off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
(unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
})
#if defined(__KVM_NVHE_HYPERVISOR__)
#define CHOOSE_NVHE_SYM(sym) sym
#define CHOOSE_HYP_SYM(sym) CHOOSE_NVHE_SYM(sym)
/* The nVHE hypervisor shouldn't even try to access VHE symbols */
extern void *__nvhe_undefined_symbol;
#define CHOOSE_VHE_SYM(sym) __nvhe_undefined_symbol
#define this_cpu_ptr_hyp_sym(sym) (&__nvhe_undefined_symbol)
#define per_cpu_ptr_hyp_sym(sym, cpu) (&__nvhe_undefined_symbol)
#elif defined(__KVM_VHE_HYPERVISOR__)
#define CHOOSE_VHE_SYM(sym) sym
#define CHOOSE_HYP_SYM(sym) CHOOSE_VHE_SYM(sym)
/* The VHE hypervisor shouldn't even try to access nVHE symbols */
extern void *__vhe_undefined_symbol;
#define CHOOSE_NVHE_SYM(sym) __vhe_undefined_symbol
#define this_cpu_ptr_hyp_sym(sym) (&__vhe_undefined_symbol)
#define per_cpu_ptr_hyp_sym(sym, cpu) (&__vhe_undefined_symbol)
#else
#ifndef __KVM_NVHE_HYPERVISOR__
/*
* BIG FAT WARNINGS:
*
@ -72,12 +133,21 @@
* - Don't let the nVHE hypervisor have access to this, as it will
* pick the *wrong* symbol (yes, it runs at EL2...).
*/
#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \
#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() \
? CHOOSE_VHE_SYM(sym) \
: CHOOSE_NVHE_SYM(sym))
#else
/* The nVHE hypervisor shouldn't even try to access anything */
extern void *__nvhe_undefined_symbol;
#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol
#define this_cpu_ptr_hyp_sym(sym) (is_kernel_in_hyp_mode() \
? this_cpu_ptr(&sym) \
: this_cpu_ptr_nvhe_sym(sym))
#define per_cpu_ptr_hyp_sym(sym, cpu) (is_kernel_in_hyp_mode() \
? per_cpu_ptr(&sym, cpu) \
: per_cpu_ptr_nvhe_sym(sym, cpu))
#define CHOOSE_VHE_SYM(sym) sym
#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym)
#endif
/* Translate a kernel address @ptr into its equivalent linear mapping */
@ -95,10 +165,16 @@ struct kvm_vcpu;
struct kvm_s2_mmu;
DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector);
DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector)
#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_SYM(__per_cpu_start);
DECLARE_KVM_NVHE_SYM(__per_cpu_end);
extern atomic_t arm64_el2_vector_last_slot;
DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
@ -144,26 +220,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
addr; \
})
/*
* Home-grown __this_cpu_{ptr,read} variants that always work at HYP,
* provided that sym is really a *symbol* and not a pointer obtained from
* a data structure. As for SHIFT_PERCPU_PTR(), the creative casting keeps
* sparse quiet.
*/
#define __hyp_this_cpu_ptr(sym) \
({ \
void *__ptr; \
__verify_pcpu_ptr(&sym); \
__ptr = hyp_symbol_addr(sym); \
__ptr += read_sysreg(tpidr_el2); \
(typeof(sym) __kernel __force *)__ptr; \
})
#define __hyp_this_cpu_read(sym) \
({ \
*__hyp_this_cpu_ptr(sym); \
})
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
@ -194,20 +250,8 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
#else /* __ASSEMBLY__ */
.macro hyp_adr_this_cpu reg, sym, tmp
adr_l \reg, \sym
mrs \tmp, tpidr_el2
add \reg, \reg, \tmp
.endm
.macro hyp_ldr_this_cpu reg, sym, tmp
adr_l \reg, \sym
mrs \tmp, tpidr_el2
ldr \reg, [\reg, \tmp]
.endm
.macro get_host_ctxt reg, tmp
hyp_adr_this_cpu \reg, kvm_host_data, \tmp
adr_this_cpu \reg, kvm_host_data, \tmp
add \reg, \reg, #HOST_DATA_CONTEXT
.endm
@ -216,6 +260,16 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
.endm
.macro get_loaded_vcpu vcpu, ctxt
adr_this_cpu \ctxt, kvm_hyp_ctxt, \vcpu
ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
.endm
.macro set_loaded_vcpu vcpu, ctxt, tmp
adr_this_cpu \ctxt, kvm_hyp_ctxt, \tmp
str \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
.endm
/*
* KVM extable for unexpected exceptions.
* In the same format _asm_extable, but output to a different section so that
@ -231,6 +285,45 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
.popsection
.endm
#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
#define CPU_LR_OFFSET CPU_XREG_OFFSET(30)
#define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8)
/*
* We treat x18 as callee-saved as the host may use it as a platform
* register (e.g. for shadow call stack).
*/
.macro save_callee_saved_regs ctxt
str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
.macro restore_callee_saved_regs ctxt
// We require \ctxt is not x18-x28
ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
.macro save_sp_el0 ctxt, tmp
mrs \tmp, sp_el0
str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
.endm
.macro restore_sp_el0 ctxt, tmp
ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
msr sp_el0, \tmp
.endm
#endif
#endif /* __ARM_KVM_ASM_H__ */

View file

@ -11,6 +11,7 @@
#ifndef __ARM64_KVM_HOST_H__
#define __ARM64_KVM_HOST_H__
#include <linux/arm-smccc.h>
#include <linux/bitmap.h>
#include <linux/types.h>
#include <linux/jump_label.h>
@ -79,8 +80,8 @@ struct kvm_s2_mmu {
* for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the
* canonical stage-2 page tables.
*/
pgd_t *pgd;
phys_addr_t pgd_phys;
struct kvm_pgtable *pgt;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
@ -110,6 +111,13 @@ struct kvm_arch {
* supported.
*/
bool return_nisv_io_abort_to_user;
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
*/
unsigned long *pmu_filter;
unsigned int pmuver;
};
struct kvm_vcpu_fault_info {
@ -262,8 +270,6 @@ struct kvm_host_data {
struct kvm_pmu_events pmu_events;
};
typedef struct kvm_host_data kvm_host_data_t;
struct vcpu_reset_state {
unsigned long pc;
unsigned long r0;
@ -480,18 +486,15 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
u64 __kvm_call_hyp(void *hypfn, ...);
#define kvm_call_hyp_nvhe(f, ...) \
do { \
DECLARE_KVM_NVHE_SYM(f); \
__kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \
} while(0)
#define kvm_call_hyp_nvhe_ret(f, ...) \
#define kvm_call_hyp_nvhe(f, ...) \
({ \
DECLARE_KVM_NVHE_SYM(f); \
__kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \
struct arm_smccc_res res; \
\
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \
##__VA_ARGS__, &res); \
WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
\
res.a1; \
})
/*
@ -517,7 +520,7 @@ u64 __kvm_call_hyp(void *hypfn, ...);
ret = f(__VA_ARGS__); \
isb(); \
} else { \
ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \
ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
\
ret; \
@ -565,7 +568,7 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{

View file

@ -12,6 +12,9 @@
#include <asm/alternative.h>
#include <asm/sysreg.h>
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
@ -87,11 +90,11 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
void deactivate_traps_vhe_put(void);
#endif
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
u64 __guest_enter(struct kvm_vcpu *vcpu);
void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt);
void __noreturn hyp_panic(void);
#ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(unsigned long, ...);
void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
#endif
#endif /* __ARM64_KVM_HYP_H__ */

View file

@ -44,16 +44,6 @@
* HYP_VA_MIN = 1 << (VA_BITS - 1)
* HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1
*
* This of course assumes that the trampoline page exists within the
* VA_BITS range. If it doesn't, then it means we're in the odd case
* where the kernel idmap (as well as HYP) uses more levels than the
* kernel runtime page tables (as seen when the kernel is configured
* for 4k pages, 39bits VA, and yet memory lives just above that
* limit, forcing the idmap to use 4 levels of page tables while the
* kernel itself only uses 3). In this particular case, it doesn't
* matter which side of VA_BITS we use, as we're guaranteed not to
* conflict with anything.
*
* When using VHE, there are no separate hyp mappings and all KVM
* functionality is already mapped as part of the main kernel
* mappings, and none of this applies in that case.
@ -118,15 +108,10 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
void __iomem **haddr);
@ -142,149 +127,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
#define kvm_mk_pmd(ptep) \
__pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
#define kvm_mk_pud(pmdp) \
__pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
#define kvm_mk_p4d(pmdp) \
__p4d(__phys_to_p4d_val(__pa(pmdp)) | PUD_TYPE_TABLE)
#define kvm_set_pud(pudp, pud) set_pud(pudp, pud)
#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot)
#define kvm_pud_pfn(pud) pud_pfn(pud)
#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
#define kvm_pud_mkhuge(pud) pud_mkhuge(pud)
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
pte_val(pte) |= PTE_S2_RDWR;
return pte;
}
static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
{
pmd_val(pmd) |= PMD_S2_RDWR;
return pmd;
}
static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
{
pud_val(pud) |= PUD_S2_RDWR;
return pud;
}
static inline pte_t kvm_s2pte_mkexec(pte_t pte)
{
pte_val(pte) &= ~PTE_S2_XN;
return pte;
}
static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
{
pmd_val(pmd) &= ~PMD_S2_XN;
return pmd;
}
static inline pud_t kvm_s2pud_mkexec(pud_t pud)
{
pud_val(pud) &= ~PUD_S2_XN;
return pud;
}
static inline void kvm_set_s2pte_readonly(pte_t *ptep)
{
pteval_t old_pteval, pteval;
pteval = READ_ONCE(pte_val(*ptep));
do {
old_pteval = pteval;
pteval &= ~PTE_S2_RDWR;
pteval |= PTE_S2_RDONLY;
pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
} while (pteval != old_pteval);
}
static inline bool kvm_s2pte_readonly(pte_t *ptep)
{
return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
}
static inline bool kvm_s2pte_exec(pte_t *ptep)
{
return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
}
static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
{
kvm_set_s2pte_readonly((pte_t *)pmdp);
}
static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
{
return kvm_s2pte_readonly((pte_t *)pmdp);
}
static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
{
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
static inline void kvm_set_s2pud_readonly(pud_t *pudp)
{
kvm_set_s2pte_readonly((pte_t *)pudp);
}
static inline bool kvm_s2pud_readonly(pud_t *pudp)
{
return kvm_s2pte_readonly((pte_t *)pudp);
}
static inline bool kvm_s2pud_exec(pud_t *pudp)
{
return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN);
}
static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
{
return pud_mkyoung(pud);
}
static inline bool kvm_s2pud_young(pud_t pud)
{
return pud_young(pud);
}
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
#define hyp_pmd_table_empty(pmdp) (0)
#else
#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#endif
#ifdef __PAGETABLE_PUD_FOLDED
#define hyp_pud_table_empty(pudp) (0)
#else
#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp)
#endif
#ifdef __PAGETABLE_P4D_FOLDED
#define hyp_p4d_table_empty(p4dp) (0)
#else
#define hyp_p4d_table_empty(p4dp) kvm_page_empty(p4dp)
#endif
struct kvm;
@ -326,77 +171,9 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
}
}
static inline void __kvm_flush_dcache_pte(pte_t pte)
{
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
struct page *page = pte_page(pte);
kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
}
}
static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
{
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
struct page *page = pmd_page(pmd);
kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
}
}
static inline void __kvm_flush_dcache_pud(pud_t pud)
{
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
struct page *page = pud_page(pud);
kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
}
}
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
static inline bool __kvm_cpu_uses_extended_idmap(void)
{
return __cpu_uses_extended_idmap_level();
}
static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
{
return idmap_ptrs_per_pgd;
}
/*
* Can't use pgd_populate here, because the extended idmap adds an extra level
* above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended
* idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4.
*/
static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
pgd_t *hyp_pgd,
pgd_t *merged_hyp_pgd,
unsigned long hyp_idmap_start)
{
int idmap_idx;
u64 pgd_addr;
/*
* Use the first entry to access the HYP mappings. It is
* guaranteed to be free, otherwise we wouldn't use an
* extended idmap.
*/
VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd));
merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE);
/*
* Create another extended level entry that points to the boot HYP map,
* which contains an ID mapping of the HYP init code. We essentially
* merge the boot and runtime HYP maps by doing so, but they don't
* overlap anyway, so this is fine.
*/
idmap_idx = hyp_idmap_start >> VA_BITS;
VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd));
merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE);
}
static inline unsigned int kvm_get_vmid_bits(void)
{
int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
@ -479,30 +256,6 @@ static inline void *kvm_get_hyp_vector(void)
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
/*
* Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
* With v8.2 LVA extensions, 'x' should be a minimum of 6 with
* 52bit IPS.
*/
static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
{
int x = ARM64_VTTBR_X(ipa_shift, levels);
return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
}
static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
{
unsigned int x = arm64_vttbr_x(ipa_shift, levels);
return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
}
static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
{
return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
}
static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
{
struct kvm_vmid *vmid = &mmu->vmid;

View file

@ -0,0 +1,309 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 Google LLC
* Author: Will Deacon <will@kernel.org>
*/
#ifndef __ARM64_KVM_PGTABLE_H__
#define __ARM64_KVM_PGTABLE_H__
#include <linux/bits.h>
#include <linux/kvm_host.h>
#include <linux/types.h>
typedef u64 kvm_pte_t;
/**
* struct kvm_pgtable - KVM page-table.
* @ia_bits: Maximum input address size, in bits.
* @start_level: Level at which the page-table walk starts.
* @pgd: Pointer to the first top-level entry of the page-table.
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
*/
struct kvm_pgtable {
u32 ia_bits;
u32 start_level;
kvm_pte_t *pgd;
/* Stage-2 only */
struct kvm_s2_mmu *mmu;
};
/**
* enum kvm_pgtable_prot - Page-table permissions and attributes.
* @KVM_PGTABLE_PROT_X: Execute permission.
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
*/
enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_X = BIT(0),
KVM_PGTABLE_PROT_W = BIT(1),
KVM_PGTABLE_PROT_R = BIT(2),
KVM_PGTABLE_PROT_DEVICE = BIT(3),
};
#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
/**
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
* entries.
* @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
* children.
* @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
* children.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
};
typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg);
/**
* struct kvm_pgtable_walker - Hook into a page-table walk.
* @cb: Callback function to invoke during the walk.
* @arg: Argument passed to the callback function.
* @flags: Bitwise-OR of flags to identify the entry types on which to
* invoke the callback function.
*/
struct kvm_pgtable_walker {
const kvm_pgtable_visitor_fn_t cb;
void * const arg;
const enum kvm_pgtable_walk_flags flags;
};
/**
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @va_bits: Maximum virtual address bits.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits);
/**
* kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
*
* The page-table is assumed to be unreachable by any hardware walkers prior
* to freeing and therefore no TLB invalidation is performed.
*/
void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
* @addr: Virtual address at which to place the mapping.
* @size: Size of the mapping.
* @phys: Physical address of the memory to map.
* @prot: Permissions and attributes for the mapping.
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
* boundary.
*
* If device attributes are not explicitly requested in @prot, then the
* mapping will be normal, cacheable. Attempts to install a new mapping
* for a virtual address that is already mapped will be rejected with an
* error and a WARN().
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot);
/**
* kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
* @pgt: Uninitialised page-table structure to initialise.
* @kvm: KVM structure representing the guest virtual machine.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
*
* The page-table is assumed to be unreachable by any hardware walkers prior
* to freeing and therefore no TLB invalidation is performed.
*/
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address at which to place the mapping.
* @size: Size of the mapping.
* @phys: Physical address of the memory to map.
* @prot: Permissions and attributes for the mapping.
* @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to
* allocate page-table pages.
*
* The offset of @addr within a page is ignored, @size is rounded-up to
* the next page boundary and @phys is rounded-down to the previous page
* boundary.
*
* If device attributes are not explicitly requested in @prot, then the
* mapping will be normal, cacheable.
*
* Note that this function will both coalesce existing table entries and split
* existing block mappings, relying on page-faults to fault back areas outside
* of the new mapping lazily.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
struct kvm_mmu_memory_cache *mc);
/**
* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address from which to remove the mapping.
* @size: Size of the mapping.
*
* The offset of @addr within a page is ignored and @size is rounded-up to
* the next page boundary.
*
* TLB invalidation is performed for each page-table entry cleared during the
* unmapping operation and the reference count for the page-table page
* containing the cleared entry is decremented, with unreferenced pages being
* freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
* FWB is not supported by the CPU.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
* kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
* without TLB invalidation.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address from which to write-protect,
* @size: Size of the range.
*
* The offset of @addr within a page is ignored and @size is rounded-up to
* the next page boundary.
*
* Note that it is the caller's responsibility to invalidate the TLB after
* calling this function to ensure that the updated permissions are visible
* to the CPUs.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address to identify the page-table entry.
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* set the access flag in that entry.
*
* Return: The old page-table entry prior to setting the flag, 0 on failure.
*/
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
/**
* kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address to identify the page-table entry.
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* clear the access flag in that entry.
*
* Note that it is the caller's responsibility to invalidate the TLB after
* calling this function to ensure that the updated permissions are visible
* to the CPUs.
*
* Return: The old page-table entry prior to clearing the flag, 0 on failure.
*/
kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
/**
* kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
* page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address to identify the page-table entry.
* @prot: Additional permissions to grant for the mapping.
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* relax the permissions in that entry according to the read, write and
* execute permissions specified by @prot. No permissions are removed, and
* TLB invalidation is performed after updating the entry.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot);
/**
* kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
* access flag set.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address to identify the page-table entry.
*
* The offset of @addr within a page is ignored.
*
* Return: True if the page-table entry has the access flag set, false otherwise.
*/
bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
* of Coherency for guest stage-2 address
* range.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
* @addr: Intermediate physical address from which to flush.
* @size: Size of the range.
*
* The offset of @addr within a page is ignored and @size is rounded-up to
* the next page boundary.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
* kvm_pgtable_walk() - Walk a page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
* @addr: Input address for the start of the walk.
* @size: Size of the range to walk.
* @walker: Walker callback description.
*
* The offset of @addr within a page is ignored and @size is rounded-up to
* the next page boundary.
*
* The walker will walk the page-table entries corresponding to the input
* address range specified, visiting entries according to the walker flags.
* Invalid entries are treated as leaf entries. Leaf entries are reloaded
* after invoking the walker callback, allowing the walker to descend into
* a newly installed table.
*
* Returning a negative error code from the walker callback function will
* terminate the walk immediately with the same error code.
*
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker *walker);
#endif /* __ARM64_KVM_PGTABLE_H__ */

View file

@ -60,7 +60,7 @@
.endm
/*
* Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will
* Both ptrauth_switch_to_guest and ptrauth_switch_to_hyp macros will
* check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as
* (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and
* then proceed ahead with the save/restore of Pointer Authentication
@ -78,7 +78,7 @@ alternative_else_nop_endif
.L__skip_switch\@:
.endm
.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
alternative_if_not ARM64_HAS_ADDRESS_AUTH
b .L__skip_switch\@
alternative_else_nop_endif
@ -96,7 +96,7 @@ alternative_else_nop_endif
#else /* !CONFIG_ARM64_PTR_AUTH */
.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
.endm
.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
.endm
#endif /* CONFIG_ARM64_PTR_AUTH */
#endif /* __ASSEMBLY__ */

View file

@ -19,7 +19,16 @@ static inline void set_my_cpu_offset(unsigned long off)
:: "r" (off) : "memory");
}
static inline unsigned long __my_cpu_offset(void)
static inline unsigned long __hyp_my_cpu_offset(void)
{
/*
* Non-VHE hyp code runs with preemption disabled. No need to hazard
* the register access against barrier() as in __kern_my_cpu_offset.
*/
return read_sysreg(tpidr_el2);
}
static inline unsigned long __kern_my_cpu_offset(void)
{
unsigned long off;
@ -35,7 +44,12 @@ static inline unsigned long __my_cpu_offset(void)
return off;
}
#define __my_cpu_offset __my_cpu_offset()
#ifdef __KVM_NVHE_HYPERVISOR__
#define __my_cpu_offset __hyp_my_cpu_offset()
#else
#define __my_cpu_offset __kern_my_cpu_offset()
#endif
#define PERCPU_RW_OPS(sz) \
static inline unsigned long __percpu_read_##sz(void *ptr) \
@ -227,4 +241,14 @@ PERCPU_RET_OP(add, add, ldadd)
#include <asm-generic/percpu.h>
/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
#if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT)
#undef this_cpu_ptr
#define this_cpu_ptr raw_cpu_ptr
#undef __this_cpu_read
#define __this_cpu_read raw_cpu_read
#undef __this_cpu_write
#define __this_cpu_write raw_cpu_write
#endif
#endif /* __ASM_PERCPU_H */

View file

@ -146,7 +146,6 @@
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
@ -162,34 +161,11 @@
#define PTE_ATTRINDX(t) (_AT(pteval_t, (t)) << 2)
#define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2)
/*
* 2nd stage PTE definitions
*/
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */
#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */
#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */
#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
/*
* EL2/HYP PTE/PMD definitions
*/
#define PMD_HYP PMD_SECT_USER
#define PTE_HYP PTE_USER
/*
* Highest possible physical address supported.
*/

View file

@ -64,7 +64,6 @@ extern bool arm64_use_ng_mappings;
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT
#define PAGE_KERNEL __pgprot(PROT_NORMAL)
#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED)
@ -73,11 +72,6 @@ extern bool arm64_use_ng_mappings;
#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN)
#define PAGE_S2_MEMATTR(attr) \
({ \
u64 __val; \
@ -88,19 +82,6 @@ extern bool arm64_use_ng_mappings;
__val; \
})
#define PAGE_S2_XN \
({ \
u64 __val; \
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \
__val = 0; \
else \
__val = PTE_S2_XN; \
__val; \
})
#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)

View file

@ -8,7 +8,6 @@
#ifndef __ARM64_S2_PGTABLE_H_
#define __ARM64_S2_PGTABLE_H_
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
/*
@ -36,21 +35,6 @@
#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
/*
* The number of PTRS across all concatenated stage2 tables given by the
* number of bits resolved at the initial level.
* If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
* in which case, stage2_pgd_ptrs will have one entry.
*/
#define pgd_ptrs_shift(ipa, pgdir_shift) \
((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
#define __s2_pgd_ptrs(ipa, lvls) \
(1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
/*
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
@ -58,196 +42,6 @@
*/
#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
/* Stage2 PUD definitions when the level is present */
static inline bool kvm_stage2_has_pud(struct kvm *kvm)
{
return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
}
#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT)
#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
#define stage2_pgd_populate(kvm, pgd, p4d) pgd_populate(NULL, pgd, p4d)
static inline p4d_t *stage2_p4d_offset(struct kvm *kvm,
pgd_t *pgd, unsigned long address)
{
return p4d_offset(pgd, address);
}
static inline void stage2_p4d_free(struct kvm *kvm, p4d_t *p4d)
{
}
static inline bool stage2_p4d_table_empty(struct kvm *kvm, p4d_t *p4dp)
{
return false;
}
static inline phys_addr_t stage2_p4d_addr_end(struct kvm *kvm,
phys_addr_t addr, phys_addr_t end)
{
return end;
}
static inline bool stage2_p4d_none(struct kvm *kvm, p4d_t p4d)
{
if (kvm_stage2_has_pud(kvm))
return p4d_none(p4d);
else
return 0;
}
static inline void stage2_p4d_clear(struct kvm *kvm, p4d_t *p4dp)
{
if (kvm_stage2_has_pud(kvm))
p4d_clear(p4dp);
}
static inline bool stage2_p4d_present(struct kvm *kvm, p4d_t p4d)
{
if (kvm_stage2_has_pud(kvm))
return p4d_present(p4d);
else
return 1;
}
static inline void stage2_p4d_populate(struct kvm *kvm, p4d_t *p4d, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
p4d_populate(NULL, p4d, pud);
}
static inline pud_t *stage2_pud_offset(struct kvm *kvm,
p4d_t *p4d, unsigned long address)
{
if (kvm_stage2_has_pud(kvm))
return pud_offset(p4d, address);
else
return (pud_t *)p4d;
}
static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
free_page((unsigned long)pud);
}
static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
{
if (kvm_stage2_has_pud(kvm))
return kvm_page_empty(pudp);
else
return false;
}
static inline phys_addr_t
stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
if (kvm_stage2_has_pud(kvm)) {
phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
} else {
return end;
}
}
/* Stage2 PMD definitions when the level is present */
static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
{
return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
}
#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_none(pud);
else
return 0;
}
static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
{
if (kvm_stage2_has_pmd(kvm))
pud_clear(pud);
}
static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_present(pud);
else
return 1;
}
static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
{
if (kvm_stage2_has_pmd(kvm))
pud_populate(NULL, pud, pmd);
}
static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
pud_t *pud, unsigned long address)
{
if (kvm_stage2_has_pmd(kvm))
return pmd_offset(pud, address);
else
return (pmd_t *)pud;
}
static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
{
if (kvm_stage2_has_pmd(kvm))
free_page((unsigned long)pmd);
}
static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_huge(pud);
else
return 0;
}
static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
{
if (kvm_stage2_has_pmd(kvm))
return kvm_page_empty(pmdp);
else
return 0;
}
static inline phys_addr_t
stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
if (kvm_stage2_has_pmd(kvm)) {
phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
} else {
return end;
}
}
static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
{
return kvm_page_empty(ptep);
}
static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
{
return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
}
static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
@ -256,13 +50,4 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
return (boundary - 1 < end - 1) ? boundary : end;
}
/*
* Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
* the architectural page-table level.
*/
#define S2_NO_LEVEL_HINT 0
#define S2_PUD_LEVEL 1
#define S2_PMD_LEVEL 2
#define S2_PTE_LEVEL 3
#endif /* __ARM64_S2_PGTABLE_H_ */

View file

@ -159,6 +159,21 @@ struct kvm_sync_regs {
struct kvm_arch_memory_slot {
};
/*
* PMU filter structure. Describe a range of events with a particular
* action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
*/
struct kvm_pmu_event_filter {
__u16 base_event;
__u16 nevents;
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
__u8 action;
__u8 pad[3];
};
/* for KVM_GET/SET_VCPU_EVENTS */
struct kvm_vcpu_events {
struct {
@ -338,6 +353,7 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1

View file

@ -61,14 +61,11 @@ __efistub__ctype = _ctype;
* memory mappings.
*/
#define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym;
/* Alternative callbacks for init-time patching of nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_host_data);
KVM_NVHE_ALIAS(kvm_vgic_global_state);
/* Kernel constant needed to compute idmap addresses. */

View file

@ -946,7 +946,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}

View file

@ -10,6 +10,7 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/hyp_image.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
#include <asm/page.h>
@ -22,12 +23,23 @@ ENTRY(_text)
jiffies = jiffies_64;
#ifdef CONFIG_KVM
#define HYPERVISOR_EXTABLE \
. = ALIGN(SZ_8); \
__start___kvm_ex_table = .; \
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
*(HYP_SECTION_NAME(.data..percpu)) \
}
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
#define HYPERVISOR_PERCPU_SECTION
#endif
#define HYPERVISOR_TEXT \
/* \
* Align to 4 KB so that \
@ -196,6 +208,7 @@ SECTIONS
}
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
.rela.dyn : ALIGN(8) {
*(.rela .rela*)

View file

@ -13,7 +13,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \
inject_fault.o regmap.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
aarch32.o arch_timer.o \

View file

@ -46,8 +46,10 @@
__asm__(".arch_extension virt");
#endif
DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
/* The VMID used in the VTTBR */
static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
@ -145,6 +147,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
int i;
bitmap_free(kvm->arch.pmu_filter);
kvm_vgic_destroy(kvm);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
@ -286,7 +290,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
static_branch_dec(&userspace_irqchip_in_use);
kvm_mmu_free_memory_caches(vcpu);
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
@ -1259,6 +1263,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
static unsigned long nvhe_percpu_size(void)
{
return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
(unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start);
}
static unsigned long nvhe_percpu_order(void)
{
unsigned long size = nvhe_percpu_size();
return size ? get_order(size) : 0;
}
static int kvm_map_vectors(void)
{
/*
@ -1299,6 +1316,7 @@ static void cpu_init_hyp_mode(void)
unsigned long hyp_stack_ptr;
unsigned long vector_ptr;
unsigned long tpidr_el2;
struct arm_smccc_res res;
/* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors(kvm_get_idmap_vector());
@ -1308,12 +1326,13 @@ static void cpu_init_hyp_mode(void)
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
(unsigned long)kvm_ksym_ref(&kvm_host_data));
tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
(unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
pgd_ptr = kvm_mmu_get_httbr();
hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
vector_ptr = (unsigned long)kvm_get_hyp_vector();
hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr);
vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector));
/*
* Call initialization code, and switch to the full blown HYP code.
@ -1322,7 +1341,9 @@ static void cpu_init_hyp_mode(void)
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init),
pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res);
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
/*
* Disabling SSBD on a non-VHE system requires us to enable SSBS
@ -1342,10 +1363,12 @@ static void cpu_hyp_reset(void)
static void cpu_hyp_reinit(void)
{
kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
cpu_hyp_reset();
*this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
@ -1496,8 +1519,10 @@ static void teardown_hyp_mode(void)
int cpu;
free_hyp_pgds();
for_each_possible_cpu(cpu)
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
}
}
/**
@ -1530,6 +1555,24 @@ static int init_hyp_mode(void)
per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
}
/*
* Allocate and initialize pages for Hypervisor-mode percpu regions.
*/
for_each_possible_cpu(cpu) {
struct page *page;
void *page_addr;
page = alloc_pages(GFP_KERNEL, nvhe_percpu_order());
if (!page) {
err = -ENOMEM;
goto out_err;
}
page_addr = page_address(page);
memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
}
/*
* Map the Hyp-code called directly from the host
*/
@ -1574,14 +1617,17 @@ static int init_hyp_mode(void)
}
}
/*
* Map Hyp percpu pages
*/
for_each_possible_cpu(cpu) {
kvm_host_data_t *cpu_data;
char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
char *percpu_end = percpu_begin + nvhe_percpu_size();
cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
if (err) {
kvm_err("Cannot map host CPU state: %d\n", err);
kvm_err("Cannot map hyp percpu region\n");
goto out_err;
}
}

View file

@ -1,34 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/cpufeature.h>
/*
* u64 __kvm_call_hyp(void *hypfn, ...);
*
* This is not really a variadic function in the classic C-way and care must
* be taken when calling this to ensure parameters are passed in registers
* only, since the stack will change between the caller and the callee.
*
* Call the function with the first argument containing a pointer to the
* function you wish to call in Hyp mode, and subsequent arguments will be
* passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
* function pointer can be passed). The function being called must be mapped
* in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
* passed in x0.
*
* A function pointer with a value less than 0xfff has a special meaning,
* and is used to implement hyp stubs in the same way as in
* arch/arm64/kernel/hyp_stub.S.
*/
SYM_FUNC_START(__kvm_call_hyp)
hvc #0
ret
SYM_FUNC_END(__kvm_call_hyp)

View file

@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
obj-$(CONFIG_KVM) += vhe/ nvhe/ smccc_wa.o
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o

View file

@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/fpsimdmacros.h>
#include <asm/kvm.h>
@ -16,66 +15,28 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_ptrauth.h>
#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8)
.text
/*
* We treat x18 as callee-saved as the host may use it as a platform
* register (e.g. for shadow call stack).
*/
.macro save_callee_saved_regs ctxt
str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
.macro restore_callee_saved_regs ctxt
// We require \ctxt is not x18-x28
ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
.endm
.macro save_sp_el0 ctxt, tmp
mrs \tmp, sp_el0
str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
.endm
.macro restore_sp_el0 ctxt, tmp
ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
msr sp_el0, \tmp
.endm
/*
* u64 __guest_enter(struct kvm_vcpu *vcpu,
* struct kvm_cpu_context *host_ctxt);
* u64 __guest_enter(struct kvm_vcpu *vcpu);
*/
SYM_FUNC_START(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
// x1-x17: clobbered by macros
// x29: guest context
// Store the host regs
adr_this_cpu x1, kvm_hyp_ctxt, x2
// Store the hyp regs
save_callee_saved_regs x1
// Save the host's sp_el0
// Save hyp's sp_el0
save_sp_el0 x1, x2
// Now the host state is stored if we have a pending RAS SError it must
// affect the host. If any asynchronous exception is pending we defer
// the guest entry. The DSB isn't necessary before v8.2 as any SError
// would be fatal.
// Now the hyp state is stored if we have a pending RAS SError it must
// affect the host or hyp. If any asynchronous exception is pending we
// defer the guest entry. The DSB isn't necessary before v8.2 as any
// SError would be fatal.
alternative_if ARM64_HAS_RAS_EXTN
dsb nshst
isb
@ -86,6 +47,8 @@ alternative_else_nop_endif
ret
1:
set_loaded_vcpu x0, x1, x2
add x29, x0, #VCPU_CONTEXT
// Macro ptrauth_switch_to_guest format:
@ -116,6 +79,26 @@ alternative_else_nop_endif
eret
sb
SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
// x2-x29,lr: vcpu regs
// vcpu x0-x1 on the stack
// If the hyp context is loaded, go straight to hyp_panic
get_loaded_vcpu x0, x1
cbz x0, hyp_panic
// The hyp context is saved so make sure it is restored to allow
// hyp_panic to run at hyp and, subsequently, panic to run in the host.
// This makes use of __guest_exit to avoid duplication but sets the
// return address to tail call into hyp_panic. As a side effect, the
// current state is saved to the guest context but it will only be
// accurate if the guest had been completely restored.
adr_this_cpu x0, kvm_hyp_ctxt, x1
adr x1, hyp_panic
str x1, [x0, #CPU_XREG_OFFSET(30)]
get_vcpu_ptr x1, x0
SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// x0: return code
// x1: vcpu
@ -148,21 +131,23 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// Store the guest's sp_el0
save_sp_el0 x1, x2
get_host_ctxt x2, x3
adr_this_cpu x2, kvm_hyp_ctxt, x3
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3)
// Macro ptrauth_switch_to_hyp format:
// ptrauth_switch_to_hyp(guest cxt, host cxt, tmp1, tmp2, tmp3)
// The below macro to save/restore keys is not implemented in C code
// as it may cause Pointer Authentication key signing mismatch errors
// when this feature is enabled for kernel code.
ptrauth_switch_to_host x1, x2, x3, x4, x5
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
// Restore the hosts's sp_el0
// Restore hyp's sp_el0
restore_sp_el0 x2, x3
// Now restore the host regs
// Now restore the hyp regs
restore_callee_saved_regs x2
set_loaded_vcpu xzr, x1, x2
alternative_if ARM64_HAS_RAS_EXTN
// If we have the RAS extensions we can consume a pending error
// without an unmask-SError and isb. The ESB-instruction consumed any

View file

@ -12,7 +12,6 @@
#include <asm/cpufeature.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/mmu.h>
.macro save_caller_saved_regs_vect
@ -41,20 +40,6 @@
.text
.macro do_el2_call
/*
* Shuffle the parameters before calling the function
* pointed to in x0. Assumes parameters in x[1,2,3].
*/
str lr, [sp, #-16]!
mov lr, x0
mov x0, x1
mov x1, x2
mov x2, x3
blr lr
ldr lr, [sp], #16
.endm
el1_sync: // Guest trapped into EL2
mrs x0, esr_el2
@ -63,44 +48,6 @@ el1_sync: // Guest trapped into EL2
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
#ifdef __KVM_NVHE_HYPERVISOR__
mrs x1, vttbr_el2 // If vttbr is valid, the guest
cbnz x1, el1_hvc_guest // called HVC
/* Here, we're pretty sure the host called HVC. */
ldp x0, x1, [sp], #16
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.hs 1f
/*
* Compute the idmap address of __kvm_handle_stub_hvc and
* jump there. Since we use kimage_voffset, do not use the
* HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
* (by loading it from the constant pool).
*
* Preserve x0-x4, which may contain stub parameters.
*/
ldr x5, =__kvm_handle_stub_hvc
ldr_l x6, kimage_voffset
/* x5 = __pa(x5) */
sub x5, x5, x6
br x5
1:
/*
* Perform the EL2 call
*/
kern_hyp_va x0
do_el2_call
eret
sb
#endif /* __KVM_NVHE_HYPERVISOR__ */
el1_hvc_guest:
/*
* Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1.
* The workaround has already been applied on the host,
@ -169,24 +116,7 @@ el2_error:
eret
sb
#ifdef __KVM_NVHE_HYPERVISOR__
SYM_FUNC_START(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
ldr lr, =panic
msr elr_el2, lr
eret
sb
SYM_FUNC_END(__hyp_do_panic)
#endif
SYM_CODE_START(__hyp_panic)
get_host_ctxt x0, x1
b hyp_panic
SYM_CODE_END(__hyp_panic)
.macro invalid_vector label, target = __hyp_panic
.macro invalid_vector label, target = __guest_exit_panic
.align 2
SYM_CODE_START(\label)
b \target
@ -198,7 +128,6 @@ SYM_CODE_END(\label)
invalid_vector el2t_irq_invalid
invalid_vector el2t_fiq_invalid
invalid_vector el2t_error_invalid
invalid_vector el2h_sync_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
invalid_vector el1_fiq_invalid
@ -228,10 +157,9 @@ check_preamble_length 661b, 662b
.macro invalid_vect target
.align 7
661:
b \target
nop
stp x0, x1, [sp, #-16]!
662:
ldp x0, x1, [sp], #16
b \target
check_preamble_length 661b, 662b

View file

@ -135,7 +135,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
@ -154,7 +154,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = &vcpu->arch.host_debug_state.regs;
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);

View file

@ -126,11 +126,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
}
}
static inline void __activate_vm(struct kvm_s2_mmu *mmu)
{
__load_guest_stage2(mmu);
}
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
u64 par, tmp;
@ -377,6 +372,8 @@ static inline bool esr_is_ptrauth_trap(u32 esr)
ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
} while(0)
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *ctxt;
@ -386,7 +383,7 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
!esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
return false;
ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
__ptrauth_save_key(ctxt, APIA);
__ptrauth_save_key(ctxt, APIB);
__ptrauth_save_key(ctxt, APDA);
@ -481,14 +478,13 @@ exit:
static inline void __kvm_unexpected_el2_exception(void)
{
extern char __guest_exit_panic[];
unsigned long addr, fixup;
struct kvm_cpu_context *host_ctxt;
struct exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);
entry = hyp_symbol_addr(__start___kvm_ex_table);
end = hyp_symbol_addr(__stop___kvm_ex_table);
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
while (entry < end) {
addr = (unsigned long)&entry->insn + entry->insn;
@ -503,7 +499,8 @@ static inline void __kvm_unexpected_el2_exception(void)
return;
}
hyp_panic(host_ctxt);
/* Trigger a panic after restoring the hyp context. */
write_sysreg(__guest_exit_panic, elr_el2);
}
#endif /* __ARM64_KVM_HYP_SWITCH_H__ */

2
arch/arm64/kvm/hyp/nvhe/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
hyp.lds

View file

@ -6,44 +6,50 @@
asflags-y := -D__KVM_NVHE_HYPERVISOR__
ccflags-y := -D__KVM_NVHE_HYPERVISOR__
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o
obj-y := $(patsubst %.o,%.hyp.o,$(obj-y))
extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y))
##
## Build rules for compiling nVHE hyp code
## Output of this folder is `kvm_nvhe.o`, a partially linked object
## file containing all nVHE hyp code and data.
##
$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
obj-y := kvm_nvhe.o
extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds
# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
$(obj)/%.nvhe.o: $(src)/%.c FORCE
$(call if_changed_rule,cc_o_c)
$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE
$(obj)/%.nvhe.o: $(src)/%.S FORCE
$(call if_changed_rule,as_o_S)
$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE
# 2) Compile linker script.
$(obj)/hyp.lds: $(src)/hyp.lds.S FORCE
$(call if_changed_dep,cpp_lds_S)
# 3) Partially link all '.nvhe.o' files and apply the linker script.
# Prefixes names of ELF sections with '.hyp', eg. '.hyp.text'.
# Note: The following rule assumes that the 'ld' rule puts LDFLAGS before
# the list of dependencies to form '-T $(obj)/hyp.lds'. This is to
# keep the dependency on the target while avoiding an error from
# GNU ld if the linker script is passed to it twice.
LDFLAGS_kvm_nvhe.tmp.o := -r -T
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
$(call if_changed,ld)
# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE
$(call if_changed,hypcopy)
# Disable reordering functions by GCC (enabled at -O2).
# This pass puts functions into '.text.*' sections to aid the linker
# in optimizing ELF layout. See HYPCOPY comment below for more info.
ccflags-y += $(call cc-option,-fno-reorder-functions)
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
# and relevant ELF section names to avoid clashes with VHE code/data.
#
# Hyp code is assumed to be in the '.text' section of the input object
# files (with the exception of specialized sections such as
# '.hyp.idmap.text'). This assumption may be broken by a compiler that
# divides code into sections like '.text.unlikely' so as to optimize
# ELF layout. HYPCOPY checks that no such sections exist in the input
# using `objdump`, otherwise they would be linked together with other
# kernel code and not memory-mapped correctly at runtime.
# to avoid clashes with VHE code/data.
quiet_cmd_hypcopy = HYPCOPY $@
cmd_hypcopy = \
if $(OBJDUMP) -h $< | grep -F '.text.'; then \
echo "$@: function reordering not supported in nVHE hyp code" >&2; \
/bin/false; \
fi; \
$(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \
--rename-section=.text=.hyp.text \
$< $@
cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@
# Remove ftrace and Shadow Call Stack CFLAGS.
# This is equivalent to the 'notrace' and '__noscs' annotations.

View file

@ -0,0 +1,187 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 - Google Inc
* Author: Andrew Scull <ascull@google.com>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
.text
SYM_FUNC_START(__host_exit)
stp x0, x1, [sp, #-16]!
get_host_ctxt x0, x1
ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
/* Store the host regs x2 and x3 */
stp x2, x3, [x0, #CPU_XREG_OFFSET(2)]
/* Retrieve the host regs x0-x1 from the stack */
ldp x2, x3, [sp], #16 // x0, x1
/* Store the host regs x0-x1 and x4-x17 */
stp x2, x3, [x0, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x0, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x0, #CPU_XREG_OFFSET(6)]
stp x8, x9, [x0, #CPU_XREG_OFFSET(8)]
stp x10, x11, [x0, #CPU_XREG_OFFSET(10)]
stp x12, x13, [x0, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x0, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x0, #CPU_XREG_OFFSET(16)]
/* Store the host regs x18-x29, lr */
save_callee_saved_regs x0
/* Save the host context pointer in x29 across the function call */
mov x29, x0
bl handle_trap
/* Restore host regs x0-x17 */
ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
/* x0-7 are use for panic arguments */
__host_enter_for_panic:
ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
/* Restore host regs x18-x29, lr */
restore_callee_saved_regs x29
/* Do not touch any register after this! */
__host_enter_without_restoring:
eret
sb
SYM_FUNC_END(__host_exit)
/*
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
*/
SYM_FUNC_START(__hyp_do_panic)
/* Load the format arguments into x1-7 */
mov x6, x3
get_vcpu_ptr x7, x3
mrs x3, esr_el2
mrs x4, far_el2
mrs x5, hpfar_el2
/* Prepare and exit to the host's panic funciton. */
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
ldr lr, =panic
msr elr_el2, lr
/*
* Set the panic format string and enter the host, conditionally
* restoring the host context.
*/
cmp x0, xzr
ldr x0, =__hyp_panic_string
b.eq __host_enter_without_restoring
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
.macro host_el1_sync_vect
.align 7
.L__vect_start\@:
stp x0, x1, [sp, #-16]!
mrs x0, esr_el2
lsr x0, x0, #ESR_ELx_EC_SHIFT
cmp x0, #ESR_ELx_EC_HVC64
ldp x0, x1, [sp], #16
b.ne __host_exit
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.hs __host_exit
/*
* Compute the idmap address of __kvm_handle_stub_hvc and
* jump there. Since we use kimage_voffset, do not use the
* HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
* (by loading it from the constant pool).
*
* Preserve x0-x4, which may contain stub parameters.
*/
ldr x5, =__kvm_handle_stub_hvc
ldr_l x6, kimage_voffset
/* x5 = __pa(x5) */
sub x5, x5, x6
br x5
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
.error "host_el1_sync_vect larger than vector entry"
.endif
.endm
.macro invalid_host_el2_vect
.align 7
/* If a guest is loaded, panic out of it. */
stp x0, x1, [sp, #-16]!
get_loaded_vcpu x0, x1
cbnz x0, __guest_exit_panic
add sp, sp, #16
/*
* The panic may not be clean if the exception is taken before the host
* context has been saved by __host_exit or after the hyp context has
* been partially clobbered by __host_enter.
*/
b hyp_panic
.endm
.macro invalid_host_el1_vect
.align 7
mov x0, xzr /* restore_host = false */
mrs x1, spsr_el2
mrs x2, elr_el2
mrs x3, par_el1
b __hyp_do_panic
.endm
/*
* The host vector does not use an ESB instruction in order to avoid consuming
* SErrors that should only be consumed by the host. Guest entry is deferred by
* __guest_enter if there are any pending asynchronous exceptions so hyp will
* always return to the host without having consumerd host SErrors.
*
* CONFIG_KVM_INDIRECT_VECTORS is not applied to the host vectors because the
* host knows about the EL2 vectors already, and there is no point in hiding
* them.
*/
.align 11
SYM_CODE_START(__kvm_hyp_host_vector)
invalid_host_el2_vect // Synchronous EL2t
invalid_host_el2_vect // IRQ EL2t
invalid_host_el2_vect // FIQ EL2t
invalid_host_el2_vect // Error EL2t
invalid_host_el2_vect // Synchronous EL2h
invalid_host_el2_vect // IRQ EL2h
invalid_host_el2_vect // FIQ EL2h
invalid_host_el2_vect // Error EL2h
host_el1_sync_vect // Synchronous 64-bit EL1
invalid_host_el1_vect // IRQ 64-bit EL1
invalid_host_el1_vect // FIQ 64-bit EL1
invalid_host_el1_vect // Error 64-bit EL1
invalid_host_el1_vect // Synchronous 32-bit EL1
invalid_host_el1_vect // IRQ 32-bit EL1
invalid_host_el1_vect // FIQ 32-bit EL1
invalid_host_el1_vect // Error 32-bit EL1
SYM_CODE_END(__kvm_hyp_host_vector)

View file

@ -4,11 +4,13 @@
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#include <linux/arm-smccc.h>
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sysreg.h>
@ -44,27 +46,37 @@ __invalid:
b .
/*
* x0: HYP pgd
* x1: HYP stack
* x2: HYP vectors
* x3: per-CPU offset
* x0: SMCCC function ID
* x1: HYP pgd
* x2: per-CPU offset
* x3: HYP stack
* x4: HYP vectors
*/
__do_hyp_init:
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.lo __kvm_handle_stub_hvc
phys_to_ttbr x4, x0
alternative_if ARM64_HAS_CNP
orr x4, x4, #TTBR_CNP_BIT
alternative_else_nop_endif
msr ttbr0_el2, x4
/* Set tpidr_el2 for use by HYP to free a register */
msr tpidr_el2, x2
mrs x4, tcr_el1
mov_q x5, TCR_EL2_MASK
and x4, x4, x5
mov x5, #TCR_EL2_RES1
orr x4, x4, x5
mov x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init)
cmp x0, x2
b.eq 1f
mov x0, #SMCCC_RET_NOT_SUPPORTED
eret
1: phys_to_ttbr x0, x1
alternative_if ARM64_HAS_CNP
orr x0, x0, #TTBR_CNP_BIT
alternative_else_nop_endif
msr ttbr0_el2, x0
mrs x0, tcr_el1
mov_q x1, TCR_EL2_MASK
and x0, x0, x1
mov x1, #TCR_EL2_RES1
orr x0, x0, x1
/*
* The ID map may be configured to use an extended virtual address
@ -80,18 +92,18 @@ alternative_else_nop_endif
*
* So use the same T0SZ value we use for the ID map.
*/
ldr_l x5, idmap_t0sz
bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
ldr_l x1, idmap_t0sz
bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
/*
* Set the PS bits in TCR_EL2.
*/
tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
msr tcr_el2, x4
msr tcr_el2, x0
mrs x4, mair_el1
msr mair_el2, x4
mrs x0, mair_el1
msr mair_el2, x0
isb
/* Invalidate the stale TLBs from Bootloader */
@ -103,25 +115,22 @@ alternative_else_nop_endif
* as well as the EE bit on BE. Drop the A flag since the compiler
* is allowed to generate unaligned accesses.
*/
mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
mov_q x0, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
CPU_BE( orr x0, x0, #SCTLR_ELx_EE)
alternative_if ARM64_HAS_ADDRESS_AUTH
mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
mov_q x1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
orr x4, x4, x5
orr x0, x0, x1
alternative_else_nop_endif
msr sctlr_el2, x4
msr sctlr_el2, x0
isb
/* Set the stack and new vectors */
kern_hyp_va x1
mov sp, x1
msr vbar_el2, x2
/* Set tpidr_el2 for use by HYP */
msr tpidr_el2, x3
mov sp, x3
msr vbar_el2, x4
/* Hello, World! */
mov x0, #SMCCC_RET_SUCCESS
eret
SYM_CODE_END(__kvm_hyp_init)

View file

@ -0,0 +1,117 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020 - Google Inc
* Author: Andrew Scull <ascull@google.com>
*/
#include <hyp/switch.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <kvm/arm_hypercalls.h>
static void handle_host_hcall(unsigned long func_id,
struct kvm_cpu_context *host_ctxt)
{
unsigned long ret = 0;
switch (func_id) {
case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1;
ret = __kvm_vcpu_run(kern_hyp_va(vcpu));
break;
}
case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context):
__kvm_flush_vm_context();
break;
case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
phys_addr_t ipa = host_ctxt->regs.regs[2];
int level = host_ctxt->regs.regs[3];
__kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
break;
}
case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
break;
}
case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
__kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
break;
}
case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): {
u64 cntvoff = host_ctxt->regs.regs[1];
__kvm_timer_set_cntvoff(cntvoff);
break;
}
case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs):
__kvm_enable_ssbs();
break;
case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2):
ret = __vgic_v3_get_ich_vtr_el2();
break;
case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr):
ret = __vgic_v3_read_vmcr();
break;
case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): {
u32 vmcr = host_ctxt->regs.regs[1];
__vgic_v3_write_vmcr(vmcr);
break;
}
case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs):
__vgic_v3_init_lrs();
break;
case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2):
ret = __kvm_get_mdcr_el2();
break;
case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1;
__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
break;
}
case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): {
unsigned long r1 = host_ctxt->regs.regs[1];
struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1;
__vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
break;
}
default:
/* Invalid host HVC. */
host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED;
return;
}
host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS;
host_ctxt->regs.regs[1] = ret;
}
void handle_trap(struct kvm_cpu_context *host_ctxt)
{
u64 esr = read_sysreg_el2(SYS_ESR);
unsigned long func_id;
if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)
hyp_panic();
func_id = host_ctxt->regs.regs[0];
handle_host_hcall(func_id, host_ctxt);
}

View file

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020 Google LLC.
* Written by David Brazdil <dbrazdil@google.com>
*
* Linker script used for partial linking of nVHE EL2 object files.
*/
#include <asm/hyp_image.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/memory.h>
SECTIONS {
HYP_SECTION(.text)
HYP_SECTION_NAME(.data..percpu) : {
PERCPU_INPUT(L1_CACHE_BYTES)
}
}

View file

@ -27,6 +27,11 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
/* Non-VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@ -42,6 +47,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
}
write_sysreg(val, cptr_el2);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
@ -60,6 +66,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
static void __deactivate_traps(struct kvm_vcpu *vcpu)
{
extern char __kvm_hyp_host_vector[];
u64 mdcr_el2;
___deactivate_traps(vcpu);
@ -91,9 +98,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(mdcr_el2, mdcr_el2);
write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
}
static void __deactivate_vm(struct kvm_vcpu *vcpu)
static void __load_host_stage2(void)
{
write_sysreg(0, vttbr_el2);
}
@ -173,9 +181,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmr_sync();
}
vcpu = kern_hyp_va(vcpu);
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@ -194,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg32_restore_state(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
__activate_vm(kern_hyp_va(vcpu->arch.hw_mmu));
__load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu));
__activate_traps(vcpu);
__hyp_vgic_restore_state(vcpu);
@ -204,7 +210,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
exit_code = __guest_enter(vcpu);
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
@ -215,7 +221,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__hyp_vgic_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__load_host_stage2();
__sysreg_restore_state_nvhe(host_ctxt);
@ -235,35 +241,31 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
if (system_uses_irq_prio_masking())
gic_write_pmr(GIC_PRIO_IRQOFF);
host_ctxt->__hyp_running_vcpu = NULL;
return exit_code;
}
void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
void __noreturn hyp_panic(void)
{
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg(par_el1);
struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu;
unsigned long str_va;
bool restore_host = true;
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
if (read_sysreg(vttbr_el2)) {
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
__timer_disable_traps(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__load_host_stage2();
__sysreg_restore_state_nvhe(host_ctxt);
}
/*
* Force the panic string to be loaded from the literal pool,
* making sure it is a kernel address and not a PC-relative
* reference.
*/
asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
__hyp_do_panic(str_va,
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
read_sysreg(hpfar_el2), par, vcpu);
__hyp_do_panic(restore_host, spsr, elr, par);
unreachable();
}

View file

@ -61,7 +61,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
mmu = kern_hyp_va(mmu);
__tlb_switch_to_guest(mmu, &cxt);
/*
@ -115,7 +114,6 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
dsb(ishst);
/* Switch to requested VMID */
mmu = kern_hyp_va(mmu);
__tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalls12e1is);

View file

@ -0,0 +1,892 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
* No bombay mix was harmed in the writing of this file.
*
* Copyright (C) 2020 Google LLC
* Author: Will Deacon <will@kernel.org>
*/
#include <linux/bitfield.h>
#include <asm/kvm_pgtable.h>
#define KVM_PGTABLE_MAX_LEVELS 4U
#define KVM_PTE_VALID BIT(0)
#define KVM_PTE_TYPE BIT(1)
#define KVM_PTE_TYPE_BLOCK 0
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3
#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1
#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
struct kvm_pgtable_walk_data {
struct kvm_pgtable *pgt;
struct kvm_pgtable_walker *walker;
u64 addr;
u64 end;
};
static u64 kvm_granule_shift(u32 level)
{
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
}
static u64 kvm_granule_size(u32 level)
{
return BIT(kvm_granule_shift(level));
}
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
{
u64 granule = kvm_granule_size(level);
/*
* Reject invalid block mappings and don't bother with 4TB mappings for
* 52-bit PAs.
*/
if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1))
return false;
if (granule > (end - addr))
return false;
return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
}
static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
{
u64 shift = kvm_granule_shift(level);
u64 mask = BIT(PAGE_SHIFT - 3) - 1;
return (data->addr >> shift) & mask;
}
static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
{
u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
u64 mask = BIT(pgt->ia_bits) - 1;
return (addr & mask) >> shift;
}
static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
{
return __kvm_pgd_page_idx(data->pgt, data->addr);
}
static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
{
struct kvm_pgtable pgt = {
.ia_bits = ia_bits,
.start_level = start_level,
};
return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
}
static bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
}
static bool kvm_pte_table(kvm_pte_t pte, u32 level)
{
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
return false;
if (!kvm_pte_valid(pte))
return false;
return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
}
static u64 kvm_pte_to_phys(kvm_pte_t pte)
{
u64 pa = pte & KVM_PTE_ADDR_MASK;
if (PAGE_SHIFT == 16)
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
return pa;
}
static kvm_pte_t kvm_phys_to_pte(u64 pa)
{
kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
if (PAGE_SHIFT == 16)
pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
return pte;
}
static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
{
return __va(kvm_pte_to_phys(pte));
}
static void kvm_set_invalid_pte(kvm_pte_t *ptep)
{
kvm_pte_t pte = *ptep;
WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID);
}
static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
{
kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp));
pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
pte |= KVM_PTE_VALID;
WARN_ON(kvm_pte_valid(old));
smp_store_release(ptep, pte);
}
static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
u32 level)
{
kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa);
u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
KVM_PTE_TYPE_BLOCK;
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
pte |= KVM_PTE_VALID;
/* Tolerate KVM recreating the exact same mapping. */
if (kvm_pte_valid(old))
return old == pte;
smp_store_release(ptep, pte);
return true;
}
static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag)
{
struct kvm_pgtable_walker *walker = data->walker;
return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
kvm_pte_t *pgtable, u32 level);
static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
kvm_pte_t *ptep, u32 level)
{
int ret = 0;
u64 addr = data->addr;
kvm_pte_t *childp, pte = *ptep;
bool table = kvm_pte_table(pte, level);
enum kvm_pgtable_walk_flags flags = data->walker->flags;
if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
KVM_PGTABLE_WALK_TABLE_PRE);
}
if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
KVM_PGTABLE_WALK_LEAF);
pte = *ptep;
table = kvm_pte_table(pte, level);
}
if (ret)
goto out;
if (!table) {
data->addr += kvm_granule_size(level);
goto out;
}
childp = kvm_pte_follow(pte);
ret = __kvm_pgtable_walk(data, childp, level + 1);
if (ret)
goto out;
if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
KVM_PGTABLE_WALK_TABLE_POST);
}
out:
return ret;
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
kvm_pte_t *pgtable, u32 level)
{
u32 idx;
int ret = 0;
if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
return -EINVAL;
for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
kvm_pte_t *ptep = &pgtable[idx];
if (data->addr >= data->end)
break;
ret = __kvm_pgtable_visit(data, ptep, level);
if (ret)
break;
}
return ret;
}
static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
{
u32 idx;
int ret = 0;
struct kvm_pgtable *pgt = data->pgt;
u64 limit = BIT(pgt->ia_bits);
if (data->addr > limit || data->end > limit)
return -ERANGE;
if (!pgt->pgd)
return -EINVAL;
for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
if (ret)
break;
}
return ret;
}
int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker *walker)
{
struct kvm_pgtable_walk_data walk_data = {
.pgt = pgt,
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
.end = PAGE_ALIGN(walk_data.addr + size),
.walker = walker,
};
return _kvm_pgtable_walk(&walk_data);
}
struct hyp_map_data {
u64 phys;
kvm_pte_t attr;
};
static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
struct hyp_map_data *data)
{
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
if (!(prot & KVM_PGTABLE_PROT_R))
return -EINVAL;
if (prot & KVM_PGTABLE_PROT_X) {
if (prot & KVM_PGTABLE_PROT_W)
return -EINVAL;
if (device)
return -EINVAL;
} else {
attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
}
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
data->attr = attr;
return 0;
}
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
u64 granule = kvm_granule_size(level), phys = data->phys;
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level));
data->phys += granule;
return true;
}
static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
kvm_pte_t *childp;
if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
return 0;
if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
if (!childp)
return -ENOMEM;
kvm_set_table_pte(ptep, childp);
return 0;
}
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot)
{
int ret;
struct hyp_map_data map_data = {
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
};
struct kvm_pgtable_walker walker = {
.cb = hyp_map_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
.arg = &map_data,
};
ret = hyp_map_set_prot_attr(prot, &map_data);
if (ret)
return ret;
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
dsb(ishst);
isb();
return ret;
}
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
{
u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = va_bits;
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
pgt->mmu = NULL;
return 0;
}
static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
free_page((unsigned long)kvm_pte_follow(*ptep));
return 0;
}
void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
.flags = KVM_PGTABLE_WALK_TABLE_POST,
};
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
free_page((unsigned long)pgt->pgd);
pgt->pgd = NULL;
}
struct stage2_map_data {
u64 phys;
kvm_pte_t attr;
kvm_pte_t *anchor;
struct kvm_s2_mmu *mmu;
struct kvm_mmu_memory_cache *memcache;
};
static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
struct stage2_map_data *data)
{
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
PAGE_S2_MEMATTR(NORMAL);
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
else if (device)
return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
if (prot & KVM_PGTABLE_PROT_W)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
data->attr = attr;
return 0;
}
static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
u64 granule = kvm_granule_size(level), phys = data->phys;
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
goto out;
/* There's an existing valid leaf entry, so perform break-before-make */
kvm_set_invalid_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
out:
data->phys += granule;
return true;
}
static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
if (data->anchor)
return 0;
if (!kvm_block_mapping_supported(addr, end, data->phys, level))
return 0;
kvm_set_invalid_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
data->anchor = ptep;
return 0;
}
static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct stage2_map_data *data)
{
kvm_pte_t *childp, pte = *ptep;
struct page *page = virt_to_page(ptep);
if (data->anchor) {
if (kvm_pte_valid(pte))
put_page(page);
return 0;
}
if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
goto out_get_page;
if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
if (!data->memcache)
return -ENOMEM;
childp = kvm_mmu_memory_cache_alloc(data->memcache);
if (!childp)
return -ENOMEM;
/*
* If we've run into an existing block mapping then replace it with
* a table. Accesses beyond 'end' that fall within the new table
* will be mapped lazily.
*/
if (kvm_pte_valid(pte)) {
kvm_set_invalid_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
put_page(page);
}
kvm_set_table_pte(ptep, childp);
out_get_page:
get_page(page);
return 0;
}
static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep,
struct stage2_map_data *data)
{
int ret = 0;
if (!data->anchor)
return 0;
free_page((unsigned long)kvm_pte_follow(*ptep));
put_page(virt_to_page(ptep));
if (data->anchor == ptep) {
data->anchor = NULL;
ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
}
return ret;
}
/*
* This is a little fiddly, as we use all three of the walk flags. The idea
* is that the TABLE_PRE callback runs for table entries on the way down,
* looking for table entries which we could conceivably replace with a
* block entry for this mapping. If it finds one, then it sets the 'anchor'
* field in 'struct stage2_map_data' to point at the table entry, before
* clearing the entry to zero and descending into the now detached table.
*
* The behaviour of the LEAF callback then depends on whether or not the
* anchor has been set. If not, then we're not using a block mapping higher
* up the table and we perform the mapping at the existing leaves instead.
* If, on the other hand, the anchor _is_ set, then we drop references to
* all valid leaves so that the pages beneath the anchor can be freed.
*
* Finally, the TABLE_POST callback does nothing if the anchor has not
* been set, but otherwise frees the page-table pages while walking back up
* the page-table, installing the block entry when it revisits the anchor
* pointer and clearing the anchor to NULL.
*/
static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct stage2_map_data *data = arg;
switch (flag) {
case KVM_PGTABLE_WALK_TABLE_PRE:
return stage2_map_walk_table_pre(addr, end, level, ptep, data);
case KVM_PGTABLE_WALK_LEAF:
return stage2_map_walk_leaf(addr, end, level, ptep, data);
case KVM_PGTABLE_WALK_TABLE_POST:
return stage2_map_walk_table_post(addr, end, level, ptep, data);
}
return -EINVAL;
}
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
u64 phys, enum kvm_pgtable_prot prot,
struct kvm_mmu_memory_cache *mc)
{
int ret;
struct stage2_map_data map_data = {
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
.mmu = pgt->mmu,
.memcache = mc,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
.arg = &map_data,
};
ret = stage2_map_set_prot_attr(prot, &map_data);
if (ret)
return ret;
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
dsb(ishst);
return ret;
}
static void stage2_flush_dcache(void *addr, u64 size)
{
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
return;
__flush_dcache_area(addr, size);
}
static bool stage2_pte_cacheable(kvm_pte_t pte)
{
u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte);
return memattr == PAGE_S2_MEMATTR(NORMAL);
}
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
struct kvm_s2_mmu *mmu = arg;
kvm_pte_t pte = *ptep, *childp = NULL;
bool need_flush = false;
if (!kvm_pte_valid(pte))
return 0;
if (kvm_pte_table(pte, level)) {
childp = kvm_pte_follow(pte);
if (page_count(virt_to_page(childp)) != 1)
return 0;
} else if (stage2_pte_cacheable(pte)) {
need_flush = true;
}
/*
* This is similar to the map() path in that we unmap the entire
* block entry and rely on the remaining portions being faulted
* back lazily.
*/
kvm_set_invalid_pte(ptep);
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
put_page(virt_to_page(ptep));
if (need_flush) {
stage2_flush_dcache(kvm_pte_follow(pte),
kvm_granule_size(level));
}
if (childp)
free_page((unsigned long)childp);
return 0;
}
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
.arg = pgt->mmu,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
struct stage2_attr_data {
kvm_pte_t attr_set;
kvm_pte_t attr_clr;
kvm_pte_t pte;
u32 level;
};
static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
kvm_pte_t pte = *ptep;
struct stage2_attr_data *data = arg;
if (!kvm_pte_valid(pte))
return 0;
data->level = level;
data->pte = pte;
pte &= ~data->attr_clr;
pte |= data->attr_set;
/*
* We may race with the CPU trying to set the access flag here,
* but worst-case the access flag update gets lost and will be
* set on the next access instead.
*/
if (data->pte != pte)
WRITE_ONCE(*ptep, pte);
return 0;
}
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
u64 size, kvm_pte_t attr_set,
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
u32 *level)
{
int ret;
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
struct stage2_attr_data data = {
.attr_set = attr_set & attr_mask,
.attr_clr = attr_clr & attr_mask,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_attr_walker,
.arg = &data,
.flags = KVM_PGTABLE_WALK_LEAF,
};
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
if (ret)
return ret;
if (orig_pte)
*orig_pte = data.pte;
if (level)
*level = data.level;
return 0;
}
int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
return stage2_update_leaf_attrs(pgt, addr, size, 0,
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
NULL, NULL);
}
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
&pte, NULL);
dsb(ishst);
return pte;
}
kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
&pte, NULL);
/*
* "But where's the TLBI?!", you scream.
* "Over in the core code", I sigh.
*
* See the '->clear_flush_young()' callback on the KVM mmu notifier.
*/
return pte;
}
bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
}
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot)
{
int ret;
u32 level;
kvm_pte_t set = 0, clr = 0;
if (prot & KVM_PGTABLE_PROT_R)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
if (prot & KVM_PGTABLE_PROT_W)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
if (prot & KVM_PGTABLE_PROT_X)
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
if (!ret)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
return ret;
}
static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
return 0;
stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level));
return 0;
}
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
struct kvm_pgtable_walker walker = {
.cb = stage2_flush_walker,
.flags = KVM_PGTABLE_WALK_LEAF,
};
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
return 0;
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
{
size_t pgd_sz;
u64 vtcr = kvm->arch.vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mmu = &kvm->arch.mmu;
/* Ensure zeroed PGD pages are visible to the hardware walker */
dsb(ishst);
return 0;
}
static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
kvm_pte_t pte = *ptep;
if (!kvm_pte_valid(pte))
return 0;
put_page(virt_to_page(ptep));
if (kvm_pte_table(pte, level))
free_page((unsigned long)kvm_pte_follow(pte));
return 0;
}
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
size_t pgd_sz;
struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
};
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
free_pages_exact(pgt->pgd, pgd_sz);
pgt->pgd = NULL;
}

View file

@ -28,6 +28,11 @@
const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
@ -59,7 +64,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, cpacr_el1);
write_sysreg(kvm_get_hyp_vector(), vbar_el1);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
}
NOKPROBE_SYMBOL(__activate_traps);
@ -108,7 +113,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt;
u64 exit_code;
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@ -120,12 +125,12 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
* HCR_EL2.TGE.
*
* We have already configured the guest's stage 1 translation in
* kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm
* before __activate_traps, because __activate_vm configures
* stage 2 translation, and __activate_traps clear HCR_EL2.TGE
* (among other things).
* kvm_vcpu_load_sysregs_vhe above. We must now call
* __load_guest_stage2 before __activate_traps, because
* __load_guest_stage2 configures stage 2 translation, and
* __activate_traps clear HCR_EL2.TGE (among other things).
*/
__activate_vm(vcpu->arch.hw_mmu);
__load_guest_stage2(vcpu->arch.hw_mmu);
__activate_traps(vcpu);
sysreg_restore_guest_state_vhe(guest_ctxt);
@ -133,7 +138,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
exit_code = __guest_enter(vcpu);
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
@ -188,10 +193,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
return ret;
}
static void __hyp_call_panic(u64 spsr, u64 elr, u64 par,
struct kvm_cpu_context *host_ctxt)
static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
@ -204,13 +211,13 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par,
}
NOKPROBE_SYMBOL(__hyp_call_panic);
void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
void __noreturn hyp_panic(void)
{
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg(par_el1);
__hyp_call_panic(spsr, elr, par, host_ctxt);
__hyp_call_panic(spsr, elr, par);
unreachable();
}

View file

@ -66,7 +66,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
__sysreg_save_user_state(host_ctxt);
/*
@ -100,7 +100,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
deactivate_traps_vhe_put();
__sysreg_save_el1_state(guest_ctxt);

View file

@ -202,6 +202,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
/**
* kvm_inject_undefined - inject an undefined instruction into the guest
* @vcpu: The vCPU in which to inject the exception
*
* It is assumed that this code is called from the VCPU thread and that the
* VCPU therefore is not currently executing guest code.

File diff suppressed because it is too large Load diff

View file

@ -20,6 +20,21 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
switch (kvm->arch.pmuver) {
case 1: /* ARMv8.0 */
return GENMASK(9, 0);
case 4: /* ARMv8.1 */
case 5: /* ARMv8.4 */
case 6: /* ARMv8.5 */
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
return 0;
}
}
/**
* kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
* @vcpu: The vcpu pointer
@ -100,7 +115,7 @@ static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
return false;
reg = PMEVTYPER0_EL0 + select_idx;
eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
}
@ -516,7 +531,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
/* PMSWINC only applies to ... SW_INC! */
type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
type &= ARMV8_PMU_EVTYPE_EVENT;
type &= kvm_pmu_event_mask(vcpu->kvm);
if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
continue;
@ -599,11 +614,21 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
data = __vcpu_sys_reg(vcpu, reg);
kvm_pmu_stop_counter(vcpu, pmc);
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
else
eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
/* Software increment event does't need to be backed by a perf event */
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
pmc->idx != ARMV8_PMU_CYCLE_IDX)
/* Software increment event doesn't need to be backed by a perf event */
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
return;
/*
* If we have a filter in place and that the event isn't allowed, do
* not install a perf event either.
*/
if (vcpu->kvm->arch.pmu_filter &&
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
@ -615,8 +640,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
attr.config = eventsel;
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
@ -700,17 +724,95 @@ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 select_idx)
{
u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
u64 reg, mask;
mask = ARMV8_PMU_EVTYPE_MASK;
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
mask |= kvm_pmu_event_mask(vcpu->kvm);
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
__vcpu_sys_reg(vcpu, reg) = event_type;
__vcpu_sys_reg(vcpu, reg) = data & mask;
kvm_pmu_update_pmc_chained(vcpu, select_idx);
kvm_pmu_create_perf_event(vcpu, select_idx);
}
static int kvm_pmu_probe_pmuver(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu;
int pmuver = 0xf;
/*
* Create a dummy event that only counts user cycles. As we'll never
* leave this function with the event being live, it will never
* count anything. But it allows us to probe some of the PMU
* details. Yes, this is terrible.
*/
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = 0;
attr.exclude_user = 0;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.exclude_host = 1;
attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
attr.sample_period = GENMASK(63, 0);
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow, &attr);
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return 0xf;
}
if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
if (pmu->pmuver)
pmuver = pmu->pmuver;
}
perf_event_disable(event);
perf_event_release_kernel(event);
return pmuver;
}
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
{
unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
u64 val, mask = 0;
int base, i;
if (!pmceid1) {
val = read_sysreg(pmceid0_el0);
base = 0;
} else {
val = read_sysreg(pmceid1_el0);
base = 32;
}
if (!bmap)
return val;
for (i = 0; i < 32; i += 8) {
u64 byte;
byte = bitmap_get_value8(bmap, base + i);
mask |= byte << i;
byte = bitmap_get_value8(bmap, 0x4000 + base + i);
mask |= byte << (32 + i);
}
return val & mask;
}
bool kvm_arm_support_pmu_v3(void)
{
/*
@ -756,15 +858,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
{
if (!kvm_arm_support_pmu_v3())
return -ENODEV;
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENXIO;
if (vcpu->arch.pmu.created)
return -EBUSY;
if (irqchip_in_kernel(vcpu->kvm)) {
int ret;
@ -820,6 +913,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
if (!kvm_arm_support_pmu_v3() ||
!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
if (!vcpu->kvm->arch.pmuver)
vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver();
if (vcpu->kvm->arch.pmuver == 0xf)
return -ENODEV;
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
int __user *uaddr = (int __user *)(long)attr->addr;
@ -828,9 +934,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
if (get_user(irq, uaddr))
return -EFAULT;
@ -848,6 +951,53 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
vcpu->arch.pmu.irq_num = irq;
return 0;
}
case KVM_ARM_VCPU_PMU_V3_FILTER: {
struct kvm_pmu_event_filter __user *uaddr;
struct kvm_pmu_event_filter filter;
int nr_events;
nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
if (copy_from_user(&filter, uaddr, sizeof(filter)))
return -EFAULT;
if (((u32)filter.base_event + filter.nevents) > nr_events ||
(filter.action != KVM_PMU_EVENT_ALLOW &&
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
mutex_lock(&vcpu->kvm->lock);
if (!vcpu->kvm->arch.pmu_filter) {
vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
if (!vcpu->kvm->arch.pmu_filter) {
mutex_unlock(&vcpu->kvm->lock);
return -ENOMEM;
}
/*
* The default depends on the first applied filter.
* If it allows events, the default is to deny.
* Conversely, if the first filter denies a set of
* events, the default is to allow.
*/
if (filter.action == KVM_PMU_EVENT_ALLOW)
bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events);
else
bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events);
}
if (filter.action == KVM_PMU_EVENT_ALLOW)
bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
else
bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
mutex_unlock(&vcpu->kvm->lock);
return 0;
}
case KVM_ARM_VCPU_PMU_V3_INIT:
return kvm_arm_pmu_v3_init(vcpu);
}
@ -884,6 +1034,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ:
case KVM_ARM_VCPU_PMU_V3_INIT:
case KVM_ARM_VCPU_PMU_V3_FILTER:
if (kvm_arm_support_pmu_v3() &&
test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return 0;

View file

@ -31,9 +31,9 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
*/
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
{
struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data);
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
if (!kvm_pmu_switch_needed(attr))
if (!ctx || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@ -47,7 +47,10 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
*/
void kvm_clr_pmu_events(u32 clr)
{
struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data);
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
if (!ctx)
return;
ctx->pmu_events.events_host &= ~clr;
ctx->pmu_events.events_guest &= ~clr;
@ -173,7 +176,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
return;
preempt_disable();
host = this_cpu_ptr(&kvm_host_data);
host = this_cpu_ptr_hyp_sym(kvm_host_data);
events_guest = host->pmu_events.events_guest;
events_host = host->pmu_events.events_host;
@ -193,7 +196,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
if (!has_vhe())
return;
host = this_cpu_ptr(&kvm_host_data);
host = this_cpu_ptr_hyp_sym(kvm_host_data);
events_guest = host->pmu_events.events_guest;
events_host = host->pmu_events.events_host;

View file

@ -335,7 +335,7 @@ u32 get_kvm_ipa_limit(void)
int kvm_set_ipa_limit(void)
{
unsigned int ipa_max, pa_max, va_max, parange, tgran_2;
unsigned int parange, tgran_2;
u64 mmfr0;
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
@ -372,39 +372,11 @@ int kvm_set_ipa_limit(void)
break;
}
pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
/* Clamp the IPA limit to the PA size supported by the kernel */
ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
/*
* Since our stage2 table is dependent on the stage1 page table code,
* we must always honor the following condition:
*
* Number of levels in Stage1 >= Number of levels in Stage2.
*
* So clamp the ipa limit further down to limit the number of levels.
* Since we can concatenate upto 16 tables at entry level, we could
* go upto 4bits above the maximum VA addressable with the current
* number of levels.
*/
va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
va_max += 4;
if (va_max < ipa_max)
ipa_max = va_max;
/*
* If the final limit is lower than the real physical address
* limit of the CPUs, report the reason.
*/
if (ipa_max < pa_max)
pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
(va_max < pa_max) ? "Virtual" : "Physical");
WARN(ipa_max < KVM_PHYS_SHIFT,
"KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
kvm_ipa_limit = ipa_max;
kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
"KVM IPA Size Limit (%d bits) is smaller than default size\n",
kvm_ipa_limit);
kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
return 0;
}

View file

@ -769,10 +769,7 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (pmu_access_el0_disabled(vcpu))
return false;
if (!(p->Op2 & 1))
pmceid = read_sysreg(pmceid0_el0);
else
pmceid = read_sysreg(pmceid1_el0);
pmceid = kvm_pmu_get_pmceid(vcpu, (p->Op2 & 1));
p->regval = pmceid;

View file

@ -260,34 +260,14 @@ static int vgic_debug_show(struct seq_file *s, void *v)
return 0;
}
static const struct seq_operations vgic_debug_seq_ops = {
static const struct seq_operations vgic_debug_sops = {
.start = vgic_debug_start,
.next = vgic_debug_next,
.stop = vgic_debug_stop,
.show = vgic_debug_show
};
static int debug_open(struct inode *inode, struct file *file)
{
int ret;
ret = seq_open(file, &vgic_debug_seq_ops);
if (!ret) {
struct seq_file *seq;
/* seq_open will have modified file->private_data */
seq = file->private_data;
seq->private = inode->i_private;
}
return ret;
};
static const struct file_operations vgic_debug_fops = {
.owner = THIS_MODULE,
.open = debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
DEFINE_SEQ_ATTRIBUTE(vgic_debug);
void vgic_debug_init(struct kvm *kvm)
{

View file

@ -662,7 +662,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
if (likely(cpu_if->vgic_sre))
kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if));
kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@ -686,7 +686,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if));
kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
if (has_vhe())
__vgic_v3_deactivate_traps(cpu_if);

View file

@ -316,8 +316,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags,
if (thread_info_flags & (1 << TIF_SIGPENDING))
do_signal(regs, syscall);
if (thread_info_flags & (1 << TIF_NOTIFY_RESUME)) {
clear_thread_flag(TIF_NOTIFY_RESUME);
if (thread_info_flags & (1 << TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}
}

View file

@ -261,7 +261,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}

View file

@ -282,8 +282,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
if (thread_info_flags & _TIF_NOTIFY_RESUME)
tracehook_notify_resume(regs);
}
}

View file

@ -180,7 +180,6 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
return 1;
}

View file

@ -176,7 +176,7 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
ia64_do_signal(scr, in_syscall);
}
if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
if (test_thread_flag(TIF_NOTIFY_RESUME)) {
local_irq_enable(); /* force interrupt enable */
tracehook_notify_resume(&scr->pt);
}

View file

@ -1136,6 +1136,6 @@ void do_notify_resume(struct pt_regs *regs)
if (test_thread_flag(TIF_SIGPENDING))
do_signal(regs);
if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}

View file

@ -316,6 +316,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, int in_syscall)
if (test_thread_flag(TIF_SIGPENDING))
do_signal(regs, in_syscall);
if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}

View file

@ -341,7 +341,7 @@ struct kvm_mips_tlb {
#define KVM_MIPS_GUEST_TLB_SIZE 64
struct kvm_vcpu_arch {
void *guest_ebase;
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
int (*vcpu_run)(struct kvm_vcpu *vcpu);
/* Host registers preserved across guest mode execution */
unsigned long host_stack;
@ -852,7 +852,7 @@ int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
/* Debug: dump vcpu state */
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern int kvm_mips_handle_exit(struct kvm_vcpu *vcpu);
/* Building of entry/exception code */
int kvm_mips_entry_setup(void);

View file

@ -907,7 +907,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}

View file

@ -205,7 +205,7 @@ static inline void build_set_exc_base(u32 **p, unsigned int reg)
* Assemble the start of the vcpu_run function to run a guest VCPU. The function
* conforms to the following prototype:
*
* int vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
* int vcpu_run(struct kvm_vcpu *vcpu);
*
* The exit from the guest and return to the caller is handled by the code
* generated by kvm_mips_build_ret_to_host().
@ -218,8 +218,7 @@ void *kvm_mips_build_vcpu_run(void *addr)
unsigned int i;
/*
* A0: run
* A1: vcpu
* A0: vcpu
*/
/* k0/k1 not being used in host kernel context */
@ -238,10 +237,10 @@ void *kvm_mips_build_vcpu_run(void *addr)
kvm_mips_build_save_scratch(&p, V1, K1);
/* VCPU scratch register has pointer to vcpu */
UASM_i_MTC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
UASM_i_MTC0(&p, A0, scratch_vcpu[0], scratch_vcpu[1]);
/* Offset into vcpu->arch */
UASM_i_ADDIU(&p, K1, A1, offsetof(struct kvm_vcpu, arch));
UASM_i_ADDIU(&p, K1, A0, offsetof(struct kvm_vcpu, arch));
/*
* Save the host stack to VCPU, used for exception processing
@ -645,10 +644,7 @@ void *kvm_mips_build_exit(void *addr)
/* Now that context has been saved, we can use other registers */
/* Restore vcpu */
UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
/* Restore run (vcpu->run) */
UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1);
UASM_i_MFC0(&p, S0, scratch_vcpu[0], scratch_vcpu[1]);
/*
* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
@ -810,7 +806,6 @@ void *kvm_mips_build_exit(void *addr)
* with this in the kernel
*/
uasm_i_move(&p, A0, S0);
uasm_i_move(&p, A1, S1);
UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
uasm_i_jalr(&p, RA, T9);
UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ);
@ -852,7 +847,7 @@ static void *kvm_mips_build_ret_from_exit(void *addr)
* guest, reload k1
*/
uasm_i_move(&p, K1, S1);
uasm_i_move(&p, K1, S0);
UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
/*
@ -886,8 +881,8 @@ static void *kvm_mips_build_ret_to_guest(void *addr)
{
u32 *p = addr;
/* Put the saved pointer to vcpu (s1) back into the scratch register */
UASM_i_MTC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
/* Put the saved pointer to vcpu (s0) back into the scratch register */
UASM_i_MTC0(&p, S0, scratch_vcpu[0], scratch_vcpu[1]);
/* Load up the Guest EBASE to minimize the window where BEV is set */
UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);

View file

@ -1199,8 +1199,9 @@ static void kvm_mips_set_c0_status(void)
/*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 cause = vcpu->arch.host_cp0_cause;
u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f;
u32 __user *opc = (u32 __user *) vcpu->arch.pc;

View file

@ -1241,7 +1241,7 @@ static int kvm_trap_emul_vcpu_run(struct kvm_vcpu *vcpu)
*/
kvm_mips_suspend_mm(cpu);
r = vcpu->arch.vcpu_run(vcpu->run, vcpu);
r = vcpu->arch.vcpu_run(vcpu);
/* We may have migrated while handling guest exits */
cpu = smp_processor_id();

View file

@ -3266,7 +3266,7 @@ static int kvm_vz_vcpu_run(struct kvm_vcpu *vcpu)
kvm_vz_vcpu_load_tlb(vcpu, cpu);
kvm_vz_vcpu_load_wired(vcpu);
r = vcpu->arch.vcpu_run(vcpu->run, vcpu);
r = vcpu->arch.vcpu_run(vcpu);
kvm_vz_vcpu_save_wired(vcpu);

View file

@ -379,8 +379,6 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags)
if (thread_flags & _TIF_SIGPENDING)
do_signal(regs);
if (thread_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
if (thread_flags & _TIF_NOTIFY_RESUME)
tracehook_notify_resume(regs);
}
}

View file

@ -317,7 +317,7 @@ asmlinkage int do_notify_resume(struct pt_regs *regs)
*/
return restart;
}
} else if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
} else if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
return 0;

View file

@ -311,7 +311,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
}
syscall = 0;
} else {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
}

View file

@ -606,8 +606,6 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
if (test_thread_flag(TIF_SIGPENDING))
do_signal(regs, in_syscall);
if (test_thread_flag(TIF_NOTIFY_RESUME)) {
clear_thread_flag(TIF_NOTIFY_RESUME);
if (test_thread_flag(TIF_NOTIFY_RESUME))
tracehook_notify_resume(regs);
}
}

View file

@ -326,6 +326,7 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
struct kvmppc_xics *xics_device;
struct kvmppc_xive *xive; /* Current XIVE device in use */
struct {
struct kvmppc_xive *native;

Some files were not shown because too many files have changed in this diff Show more