-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAmPCczkACgkQONu9yGCS
aT4tnRAAsW8h/ohmhP+O2lQ9Ekw6s9VB6KB4aJzLhQXqIZlrzk2DP3CiLxQ7DkFc
AcHwFYq+sERo8O7dK6pbCW0zNvLUpbK2wJhwMHujJfSUFboXX85NR6u90U67pBKS
p+yVkDSx8LNc7c676xQ7ey5rO1K2fQQ266gexjI9WOkjIFOfplVkZ7tkvt51VwAD
mNvOQsZdCE6xs+T3t9YMOtAx3wW8vl1wW3QDWCHznQwOJiMEjfNEOUY/+xELnnWz
DVONWPHTFNKQHZwIuVUFZdNuORq5WXoIaMZdaEFkhuOtRMnQ9l+wi8iMxX8zkgBn
Ji/dPu3GdAZsJU4/rXHkY2AeZV1oJc3NLYmevoRZirTqMQCqyM+blif7Rf34kBi7
6jlGPWOjyMNe58shfHmjWTl/u4pMDoYOnm5XO+1oL+2Xg8QWCucqAlfPFB3uMh6n
aL4ymzo5QRy1+tx8v7o1NOjnGWydvYn3O4fwJVkYTFaJZZr9EI7gpTmEBw/gwfuf
4TH6BC++Ai/8vgKUhpdnWuTrjomWalPTcPBgQYG4gD7ak2TM1rmgMaCl/THUe36R
zPC8m2sIXKeI4zGi8TeqTgaRvFSKJnuEmVo5OPkG98fZkjekCzWHp0q1+PG2Ecy8
Mu2/AOnwb1aSfOJh2Qajoke/Wed0U6qszop8C/jPRh8D1uTmRbI=
=sOTj
-----END PGP SIGNATURE-----
Merge 5.10.163 into android12-5.10-lts
Changes in 5.10.163
usb: musb: remove extra check in musb_gadget_vbus_draw
arm64: dts: qcom: ipq6018-cp01-c1: use BLSPI1 pins
arm64: dts: qcom: msm8996: fix GPU OPP table
ARM: dts: qcom: apq8064: fix coresight compatible
arm64: dts: qcom: sdm630: fix UART1 pin bias
arm64: dts: qcom: sdm845-cheza: fix AP suspend pin bias
arm64: dts: qcom: msm8916: Drop MSS fallback compatible
objtool, kcsan: Add volatile read/write instrumentation to whitelist
ARM: dts: stm32: Drop stm32mp15xc.dtsi from Avenger96
ARM: dts: stm32: Fix AV96 WLAN regulator gpio property
drivers: soc: ti: knav_qmss_queue: Mark knav_acc_firmwares as static
soc: qcom: llcc: make irq truly optional
soc: qcom: apr: make code more reuseable
soc: qcom: apr: Add check for idr_alloc and of_property_read_string_index
arm: dts: spear600: Fix clcd interrupt
soc: ti: knav_qmss_queue: Use pm_runtime_resume_and_get instead of pm_runtime_get_sync
soc: ti: knav_qmss_queue: Fix PM disable depth imbalance in knav_queue_probe
soc: ti: smartreflex: Fix PM disable depth imbalance in omap_sr_probe
perf: arm_dsu: Fix hotplug callback leak in dsu_pmu_init()
perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init()
arm64: dts: ti: k3-am65-main: Drop dma-coherent in crypto node
arm64: dts: ti: k3-j721e-main: Drop dma-coherent in crypto node
arm64: dts: mt2712e: Fix unit_address_vs_reg warning for oscillators
arm64: dts: mt2712e: Fix unit address for pinctrl node
arm64: dts: mt2712-evb: Fix vproc fixed regulators unit names
arm64: dts: mt2712-evb: Fix usb vbus regulators unit names
arm64: dts: mediatek: pumpkin-common: Fix devicetree warnings
arm64: dts: mediatek: mt6797: Fix 26M oscillator unit name
ARM: dts: dove: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-370: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-xp: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-375: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-38x: Fix assigned-addresses for every PCIe Root Port
ARM: dts: armada-39x: Fix assigned-addresses for every PCIe Root Port
ARM: dts: turris-omnia: Add ethernet aliases
ARM: dts: turris-omnia: Add switch port 6 node
arm64: dts: armada-3720-turris-mox: Add missing interrupt for RTC
pstore/ram: Fix error return code in ramoops_probe()
ARM: mmp: fix timer_read delay
pstore: Avoid kcore oops by vmap()ing with VM_IOREMAP
tpm/tpm_ftpm_tee: Fix error handling in ftpm_mod_init()
tpm/tpm_crb: Fix error message in __crb_relinquish_locality()
sched/fair: Cleanup task_util and capacity type
sched/uclamp: Fix relationship between uclamp and migration margin
cpuidle: dt: Return the correct numbers of parsed idle states
alpha: fix syscall entry in !AUDUT_SYSCALL case
PM: hibernate: Fix mistake in kerneldoc comment
fs: don't audit the capability check in simple_xattr_list()
cpufreq: qcom-hw: Fix memory leak in qcom_cpufreq_hw_read_lut()
selftests/ftrace: event_triggers: wait longer for test_event_enable
perf: Fix possible memleak in pmu_dev_alloc()
lib/debugobjects: fix stat count and optimize debug_objects_mem_init
platform/x86: huawei-wmi: fix return value calculation
timerqueue: Use rb_entry_safe() in timerqueue_getnext()
proc: fixup uptime selftest
lib/fonts: fix undefined behavior in bit shift for get_default_font
ocfs2: fix memory leak in ocfs2_stack_glue_init()
MIPS: vpe-mt: fix possible memory leak while module exiting
MIPS: vpe-cmp: fix possible memory leak while module exiting
selftests/efivarfs: Add checking of the test return value
PNP: fix name memory leak in pnp_alloc_dev()
perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox()
perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map()
perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box()
platform/chrome: cros_usbpd_notify: Fix error handling in cros_usbpd_notify_init()
irqchip: gic-pm: Use pm_runtime_resume_and_get() in gic_probe()
EDAC/i10nm: fix refcount leak in pci_get_dev_wrapper()
nfsd: don't call nfsd_file_put from client states seqfile display
genirq/irqdesc: Don't try to remove non-existing sysfs files
cpufreq: amd_freq_sensitivity: Add missing pci_dev_put()
libfs: add DEFINE_SIMPLE_ATTRIBUTE_SIGNED for signed value
lib/notifier-error-inject: fix error when writing -errno to debugfs file
docs: fault-injection: fix non-working usage of negative values
debugfs: fix error when writing negative value to atomic_t debugfs file
ocfs2: ocfs2_mount_volume does cleanup job before return error
ocfs2: rewrite error handling of ocfs2_fill_super
ocfs2: fix memory leak in ocfs2_mount_volume()
rapidio: fix possible name leaks when rio_add_device() fails
rapidio: rio: fix possible name leak in rio_register_mport()
clocksource/drivers/sh_cmt: Make sure channel clock supply is enabled
clocksource/drivers/sh_cmt: Access registers according to spec
futex: Move to kernel/futex/
futex: Resend potentially swallowed owner death notification
cpu/hotplug: Make target_store() a nop when target == state
clocksource/drivers/timer-ti-dm: Fix missing clk_disable_unprepare in dmtimer_systimer_init_clock()
ACPICA: Fix use-after-free in acpi_ut_copy_ipackage_to_ipackage()
uprobes/x86: Allow to probe a NOP instruction with 0x66 prefix
x86/xen: Fix memory leak in xen_smp_intr_init{_pv}()
x86/xen: Fix memory leak in xen_init_lock_cpu()
xen/privcmd: Fix a possible warning in privcmd_ioctl_mmap_resource()
PM: runtime: Improve path in rpm_idle() when no callback
PM: runtime: Do not call __rpm_callback() from rpm_idle()
platform/x86: mxm-wmi: fix memleak in mxm_wmi_call_mx[ds|mx]()
platform/x86: intel_scu_ipc: fix possible name leak in __intel_scu_ipc_register()
MIPS: BCM63xx: Add check for NULL for clk in clk_enable
MIPS: OCTEON: warn only once if deprecated link status is being used
fs: sysv: Fix sysv_nblocks() returns wrong value
rapidio: fix possible UAF when kfifo_alloc() fails
eventfd: change int to __u64 in eventfd_signal() ifndef CONFIG_EVENTFD
relay: fix type mismatch when allocating memory in relay_create_buf()
hfs: Fix OOB Write in hfs_asc2mac
rapidio: devices: fix missing put_device in mport_cdev_open
wifi: ath9k: hif_usb: fix memory leak of urbs in ath9k_hif_usb_dealloc_tx_urbs()
wifi: ath9k: hif_usb: Fix use-after-free in ath9k_hif_usb_reg_in_cb()
wifi: rtl8xxxu: Fix reading the vendor of combo chips
drm/bridge: adv7533: remove dynamic lane switching from adv7533 bridge
libbpf: Fix use-after-free in btf_dump_name_dups
libbpf: Fix null-pointer dereference in find_prog_by_sec_insn()
pata_ipx4xx_cf: Fix unsigned comparison with less than zero
media: coda: jpeg: Add check for kmalloc
media: i2c: ad5820: Fix error path
venus: pm_helpers: Fix error check in vcodec_domains_get()
media: exynos4-is: Use v4l2_async_notifier_add_fwnode_remote_subdev
media: exynos4-is: don't rely on the v4l2_async_subdev internals
can: kvaser_usb: do not increase tx statistics when sending error message frames
can: kvaser_usb: kvaser_usb_leaf: Get capabilities from device
can: kvaser_usb: kvaser_usb_leaf: Rename {leaf,usbcan}_cmd_error_event to {leaf,usbcan}_cmd_can_error_event
can: kvaser_usb: kvaser_usb_leaf: Handle CMD_ERROR_EVENT
can: kvaser_usb_leaf: Set Warning state even without bus errors
can: kvaser_usb_leaf: Fix improved state not being reported
can: kvaser_usb_leaf: Fix wrong CAN state after stopping
can: kvaser_usb_leaf: Fix bogus restart events
can: kvaser_usb: Add struct kvaser_usb_busparams
can: kvaser_usb: Compare requested bittiming parameters with actual parameters in do_set_{,data}_bittiming
drm/rockchip: lvds: fix PM usage counter unbalance in poweron
clk: renesas: r9a06g032: Repair grave increment error
spi: Update reference to struct spi_controller
drm/panel/panel-sitronix-st7701: Remove panel on DSI attach failure
ima: Fix fall-through warnings for Clang
ima: Handle -ESTALE returned by ima_filter_rule_match()
drm/msm/hdmi: switch to drm_bridge_connector
drm/msm/hdmi: drop unused GPIO support
bpf: Fix slot type check in check_stack_write_var_off
media: vivid: fix compose size exceed boundary
media: platform: exynos4-is: fix return value check in fimc_md_probe()
bpf: propagate precision in ALU/ALU64 operations
bpf: Check the other end of slot_type for STACK_SPILL
bpf: propagate precision across all frames, not just the last one
clk: qcom: gcc-sm8250: Use retention mode for USB GDSCs
mtd: Fix device name leak when register device failed in add_mtd_device()
Input: joystick - fix Kconfig warning for JOYSTICK_ADC
wifi: rsi: Fix handling of 802.3 EAPOL frames sent via control port
media: camss: Clean up received buffers on failed start of streaming
net, proc: Provide PROC_FS=n fallback for proc_create_net_single_write()
rxrpc: Fix ack.bufferSize to be 0 when generating an ack
drm/radeon: Add the missed acpi_put_table() to fix memory leak
drm/mediatek: Modify dpi power on/off sequence.
ASoC: pxa: fix null-pointer dereference in filter()
regulator: core: fix unbalanced of node refcount in regulator_dev_lookup()
amdgpu/pm: prevent array underflow in vega20_odn_edit_dpm_table()
drm/fourcc: Add packed 10bit YUV 4:2:0 format
drm/fourcc: Fix vsub/hsub for Q410 and Q401
integrity: Fix memory leakage in keyring allocation error path
ima: Fix misuse of dereference of pointer in template_desc_init_fields()
wifi: ath10k: Fix return value in ath10k_pci_init()
mtd: lpddr2_nvm: Fix possible null-ptr-deref
Input: elants_i2c - properly handle the reset GPIO when power is off
media: vidtv: Fix use-after-free in vidtv_bridge_dvb_init()
media: solo6x10: fix possible memory leak in solo_sysfs_init()
media: platform: exynos4-is: Fix error handling in fimc_md_init()
media: videobuf-dma-contig: use dma_mmap_coherent
inet: add READ_ONCE(sk->sk_bound_dev_if) in inet_csk_bind_conflict()
bpf: Move skb->len == 0 checks into __bpf_redirect
HID: hid-sensor-custom: set fixed size for custom attributes
ALSA: pcm: fix undefined behavior in bit shift for SNDRV_PCM_RATE_KNOT
ALSA: seq: fix undefined behavior in bit shift for SNDRV_SEQ_FILTER_USE_EVENT
regulator: core: use kfree_const() to free space conditionally
clk: rockchip: Fix memory leak in rockchip_clk_register_pll()
drm/amdgpu: fix pci device refcount leak
bonding: fix link recovery in mode 2 when updelay is nonzero
mtd: maps: pxa2xx-flash: fix memory leak in probe
drbd: fix an invalid memory access caused by incorrect use of list iterator
ASoC: qcom: Add checks for devm_kcalloc
media: vimc: Fix wrong function called when vimc_init() fails
media: imon: fix a race condition in send_packet()
clk: imx: replace osc_hdmi with dummy
pinctrl: pinconf-generic: add missing of_node_put()
media: dvb-core: Fix ignored return value in dvb_register_frontend()
media: dvb-usb: az6027: fix null-ptr-deref in az6027_i2c_xfer()
media: s5p-mfc: Add variant data for MFC v7 hardware for Exynos 3250 SoC
drm/tegra: Add missing clk_disable_unprepare() in tegra_dc_probe()
ASoC: dt-bindings: wcd9335: fix reset line polarity in example
ASoC: mediatek: mtk-btcvsd: Add checks for write and read of mtk_btcvsd_snd
NFSv4.2: Clear FATTR4_WORD2_SECURITY_LABEL when done decoding
NFSv4.2: Fix a memory stomp in decode_attr_security_label
NFSv4.2: Fix initialisation of struct nfs4_label
NFSv4: Fix a deadlock between nfs4_open_recover_helper() and delegreturn
NFS: Fix an Oops in nfs_d_automount()
ALSA: asihpi: fix missing pci_disable_device()
wifi: iwlwifi: mvm: fix double free on tx path.
ASoC: mediatek: mt8173: Fix debugfs registration for components
ASoC: mediatek: mt8173: Enable IRQ when pdata is ready
drm/amd/pm/smu11: BACO is supported when it's in BACO state
drm/radeon: Fix PCI device refcount leak in radeon_atrm_get_bios()
drm/amdgpu: Fix PCI device refcount leak in amdgpu_atrm_get_bios()
ASoC: pcm512x: Fix PM disable depth imbalance in pcm512x_probe
netfilter: conntrack: set icmpv6 redirects as RELATED
bpf, sockmap: Fix repeated calls to sock_put() when msg has more_data
bpf, sockmap: Fix data loss caused by using apply_bytes on ingress redirect
bonding: uninitialized variable in bond_miimon_inspect()
spi: spidev: mask SPI_CS_HIGH in SPI_IOC_RD_MODE
wifi: mac80211: fix memory leak in ieee80211_if_add()
wifi: cfg80211: Fix not unregister reg_pdev when load_builtin_regdb_keys() fails
wifi: mt76: fix coverity overrun-call in mt76_get_txpower()
regulator: core: fix module refcount leak in set_supply()
clk: qcom: clk-krait: fix wrong div2 functions
hsr: Add a rcu-read lock to hsr_forward_skb().
net: hsr: generate supervision frame without HSR/PRP tag
hsr: Disable netpoll.
hsr: Synchronize sending frames to have always incremented outgoing seq nr.
hsr: Synchronize sequence number updates.
configfs: fix possible memory leak in configfs_create_dir()
regulator: core: fix resource leak in regulator_register()
hwmon: (jc42) Convert register access and caching to regmap/regcache
hwmon: (jc42) Restore the min/max/critical temperatures on resume
bpf, sockmap: fix race in sock_map_free()
ALSA: pcm: Set missing stop_operating flag at undoing trigger start
media: saa7164: fix missing pci_disable_device()
ALSA: mts64: fix possible null-ptr-defer in snd_mts64_interrupt
xprtrdma: Fix regbuf data not freed in rpcrdma_req_create()
SUNRPC: Fix missing release socket in rpc_sockname()
NFSv4.x: Fail client initialisation if state manager thread can't run
mmc: alcor: fix return value check of mmc_add_host()
mmc: moxart: fix return value check of mmc_add_host()
mmc: mxcmmc: fix return value check of mmc_add_host()
mmc: pxamci: fix return value check of mmc_add_host()
mmc: rtsx_usb_sdmmc: fix return value check of mmc_add_host()
mmc: toshsd: fix return value check of mmc_add_host()
mmc: vub300: fix return value check of mmc_add_host()
mmc: wmt-sdmmc: fix return value check of mmc_add_host()
mmc: atmel-mci: fix return value check of mmc_add_host()
mmc: omap_hsmmc: fix return value check of mmc_add_host()
mmc: meson-gx: fix return value check of mmc_add_host()
mmc: via-sdmmc: fix return value check of mmc_add_host()
mmc: wbsd: fix return value check of mmc_add_host()
mmc: mmci: fix return value check of mmc_add_host()
media: c8sectpfe: Add of_node_put() when breaking out of loop
media: coda: Add check for dcoda_iram_alloc
media: coda: Add check for kmalloc
clk: samsung: Fix memory leak in _samsung_clk_register_pll()
spi: spi-gpio: Don't set MOSI as an input if not 3WIRE mode
wifi: rtl8xxxu: Add __packed to struct rtl8723bu_c2h
wifi: rtl8xxxu: Fix the channel width reporting
wifi: brcmfmac: Fix error return code in brcmf_sdio_download_firmware()
blktrace: Fix output non-blktrace event when blk_classic option enabled
clk: socfpga: clk-pll: Remove unused variable 'rc'
clk: socfpga: use clk_hw_register for a5/c5
clk: socfpga: Fix memory leak in socfpga_gate_init()
net: vmw_vsock: vmci: Check memcpy_from_msg()
net: defxx: Fix missing err handling in dfx_init()
net: stmmac: selftests: fix potential memleak in stmmac_test_arpoffload()
drivers: net: qlcnic: Fix potential memory leak in qlcnic_sriov_init()
of: overlay: fix null pointer dereferencing in find_dup_cset_node_entry() and find_dup_cset_prop()
ethernet: s2io: don't call dev_kfree_skb() under spin_lock_irqsave()
net: farsync: Fix kmemleak when rmmods farsync
net/tunnel: wait until all sk_user_data reader finish before releasing the sock
net: apple: mace: don't call dev_kfree_skb() under spin_lock_irqsave()
net: apple: bmac: don't call dev_kfree_skb() under spin_lock_irqsave()
net: emaclite: don't call dev_kfree_skb() under spin_lock_irqsave()
net: ethernet: dnet: don't call dev_kfree_skb() under spin_lock_irqsave()
hamradio: don't call dev_kfree_skb() under spin_lock_irqsave()
net: amd: lance: don't call dev_kfree_skb() under spin_lock_irqsave()
net: amd-xgbe: Fix logic around active and passive cables
net: amd-xgbe: Check only the minimum speed for active/passive cables
can: tcan4x5x: Remove invalid write in clear_interrupts
net: lan9303: Fix read error execution path
ntb_netdev: Use dev_kfree_skb_any() in interrupt context
sctp: sysctl: make extra pointers netns aware
Bluetooth: btusb: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_qca: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_ll: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_h5: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_bcsp: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: hci_core: don't call kfree_skb() under spin_lock_irqsave()
Bluetooth: RFCOMM: don't call kfree_skb() under spin_lock_irqsave()
stmmac: fix potential division by 0
apparmor: fix a memleak in multi_transaction_new()
apparmor: fix lockdep warning when removing a namespace
apparmor: Fix abi check to include v8 abi
crypto: sun8i-ss - use dma_addr instead u32
crypto: nitrox - avoid double free on error path in nitrox_sriov_init()
scsi: core: Fix a race between scsi_done() and scsi_timeout()
apparmor: Use pointer to struct aa_label for lbs_cred
PCI: dwc: Fix n_fts[] array overrun
RDMA/core: Fix order of nldev_exit call
PCI: pci-epf-test: Register notifier if only core_init_notifier is enabled
f2fs: Fix the race condition of resize flag between resizefs
crypto: rockchip - do not do custom power management
crypto: rockchip - do not store mode globally
crypto: rockchip - add fallback for cipher
crypto: rockchip - add fallback for ahash
crypto: rockchip - better handle cipher key
crypto: rockchip - remove non-aligned handling
crypto: rockchip - delete unneeded variable initialization
crypto: rockchip - rework by using crypto_engine
apparmor: Fix memleak in alloc_ns()
f2fs: fix normal discard process
RDMA/siw: Fix immediate work request flush to completion queue
RDMA/nldev: Return "-EAGAIN" if the cm_id isn't from expected port
RDMA/siw: Set defined status for work completion with undefined status
scsi: scsi_debug: Fix a warning in resp_write_scat()
crypto: ccree - Remove debugfs when platform_driver_register failed
crypto: cryptd - Use request context instead of stack for sub-request
crypto: hisilicon/qm - add missing pci_dev_put() in q_num_set()
RDMA/hns: Repacing 'dseg_len' by macros in fill_ext_sge_inl_data()
RDMA/hns: Fix ext_sge num error when post send
PCI: Check for alloc failure in pci_request_irq()
RDMA/hfi: Decrease PCI device reference count in error path
crypto: ccree - Make cc_debugfs_global_fini() available for module init function
RDMA/hns: fix memory leak in hns_roce_alloc_mr()
RDMA/rxe: Fix NULL-ptr-deref in rxe_qp_do_cleanup() when socket create failed
scsi: hpsa: Fix possible memory leak in hpsa_init_one()
crypto: tcrypt - Fix multibuffer skcipher speed test mem leak
padata: Always leave BHs disabled when running ->parallel()
padata: Fix list iterator in padata_do_serial()
scsi: mpt3sas: Fix possible resource leaks in mpt3sas_transport_port_add()
scsi: hpsa: Fix error handling in hpsa_add_sas_host()
scsi: hpsa: Fix possible memory leak in hpsa_add_sas_device()
scsi: scsi_debug: Fix a warning in resp_verify()
scsi: scsi_debug: Fix a warning in resp_report_zones()
scsi: fcoe: Fix possible name leak when device_register() fails
scsi: scsi_debug: Fix possible name leak in sdebug_add_host_helper()
scsi: ipr: Fix WARNING in ipr_init()
scsi: fcoe: Fix transport not deattached when fcoe_if_init() fails
scsi: snic: Fix possible UAF in snic_tgt_create()
RDMA/nldev: Add checks for nla_nest_start() in fill_stat_counter_qps()
f2fs: avoid victim selection from previous victim section
RDMA/nldev: Fix failure to send large messages
crypto: amlogic - Remove kcalloc without check
crypto: omap-sham - Use pm_runtime_resume_and_get() in omap_sham_probe()
riscv/mm: add arch hook arch_clear_hugepage_flags
RDMA/hfi1: Fix error return code in parse_platform_config()
RDMA/srp: Fix error return code in srp_parse_options()
orangefs: Fix sysfs not cleanup when dev init failed
RDMA/hns: Fix PBL page MTR find
RDMA/hns: Fix page size cap from firmware
crypto: img-hash - Fix variable dereferenced before check 'hdev->req'
hwrng: amd - Fix PCI device refcount leak
hwrng: geode - Fix PCI device refcount leak
IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces
drivers: dio: fix possible memory leak in dio_init()
serial: tegra: Read DMA status before terminating
class: fix possible memory leak in __class_register()
vfio: platform: Do not pass return buffer to ACPI _RST method
uio: uio_dmem_genirq: Fix missing unlock in irq configuration
uio: uio_dmem_genirq: Fix deadlock between irq config and handling
usb: fotg210-udc: Fix ages old endianness issues
staging: vme_user: Fix possible UAF in tsi148_dma_list_add
usb: typec: Check for ops->exit instead of ops->enter in altmode_exit
usb: typec: tcpci: fix of node refcount leak in tcpci_register_port()
usb: typec: tipd: Fix spurious fwnode_handle_put in error path
serial: amba-pl011: avoid SBSA UART accessing DMACR register
serial: pl011: Do not clear RX FIFO & RX interrupt in unthrottle.
serial: pch: Fix PCI device refcount leak in pch_request_dma()
tty: serial: clean up stop-tx part in altera_uart_tx_chars()
tty: serial: altera_uart_{r,t}x_chars() need only uart_port
serial: altera_uart: fix locking in polling mode
serial: sunsab: Fix error handling in sunsab_init()
test_firmware: fix memory leak in test_firmware_init()
misc: ocxl: fix possible name leak in ocxl_file_register_afu()
ocxl: fix pci device refcount leak when calling get_function_0()
misc: tifm: fix possible memory leak in tifm_7xx1_switch_media()
misc: sgi-gru: fix use-after-free error in gru_set_context_option, gru_fault and gru_handle_user_call_os
firmware: raspberrypi: fix possible memory leak in rpi_firmware_probe()
cxl: fix possible null-ptr-deref in cxl_guest_init_afu|adapter()
cxl: fix possible null-ptr-deref in cxl_pci_init_afu|adapter()
iio: temperature: ltc2983: make bulk write buffer DMA-safe
genirq: Add IRQF_NO_AUTOEN for request_irq/nmi()
iio:imu:adis: Use IRQF_NO_AUTOEN instead of irq request then disable
iio: adis: handle devices that cannot unmask the drdy pin
iio: adis: stylistic changes
iio:imu:adis: Move exports into IIO_ADISLIB namespace
iio: adis: add '__adis_enable_irq()' implementation
counter: stm32-lptimer-cnt: fix the check on arr and cmp registers update
usb: roles: fix of node refcount leak in usb_role_switch_is_parent()
usb: gadget: f_hid: optional SETUP/SET_REPORT mode
usb: gadget: f_hid: fix f_hidg lifetime vs cdev
usb: gadget: f_hid: fix refcount leak on error path
drivers: mcb: fix resource leak in mcb_probe()
mcb: mcb-parse: fix error handing in chameleon_parse_gdd()
chardev: fix error handling in cdev_device_add()
i2c: pxa-pci: fix missing pci_disable_device() on error in ce4100_i2c_probe
staging: rtl8192u: Fix use after free in ieee80211_rx()
staging: rtl8192e: Fix potential use-after-free in rtllib_rx_Monitor()
vme: Fix error not catched in fake_init()
gpiolib: Get rid of redundant 'else'
gpiolib: cdev: fix NULL-pointer dereferences
i2c: mux: reg: check return value after calling platform_get_resource()
i2c: ismt: Fix an out-of-bounds bug in ismt_access()
usb: storage: Add check for kcalloc
tracing/hist: Fix issue of losting command info in error_log
samples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe()
thermal/drivers/imx8mm_thermal: Validate temperature range
fbdev: ssd1307fb: Drop optional dependency
fbdev: pm2fb: fix missing pci_disable_device()
fbdev: via: Fix error in via_core_init()
fbdev: vermilion: decrease reference count in error path
fbdev: uvesafb: Fixes an error handling path in uvesafb_probe()
HSI: omap_ssi_core: fix unbalanced pm_runtime_disable()
HSI: omap_ssi_core: fix possible memory leak in ssi_probe()
power: supply: fix residue sysfs file in error handle route of __power_supply_register()
perf trace: Return error if a system call doesn't exist
perf trace: Use macro RAW_SYSCALL_ARGS_NUM to replace number
perf trace: Handle failure when trace point folder is missed
perf symbol: correction while adjusting symbol
HSI: omap_ssi_core: Fix error handling in ssi_init()
power: supply: fix null pointer dereferencing in power_supply_get_battery_info
RDMA/siw: Fix pointer cast warning
iommu/sun50i: Fix reset release
iommu/sun50i: Consider all fault sources for reset
iommu/sun50i: Fix R/W permission check
iommu/sun50i: Fix flush size
phy: usb: s2 WoL wakeup_count not incremented for USB->Eth devices
include/uapi/linux/swab: Fix potentially missing __always_inline
pwm: tegra: Improve required rate calculation
dmaengine: idxd: Fix crc_val field for completion record
rtc: rtc-cmos: Do not check ACPI_FADT_LOW_POWER_S0
rtc: cmos: Fix event handler registration ordering issue
rtc: cmos: Fix wake alarm breakage
rtc: cmos: fix build on non-ACPI platforms
rtc: cmos: Call cmos_wake_setup() from cmos_do_probe()
rtc: cmos: Call rtc_wake_setup() from cmos_do_probe()
rtc: cmos: Eliminate forward declarations of some functions
rtc: cmos: Rename ACPI-related functions
rtc: cmos: Disable ACPI RTC event on removal
rtc: snvs: Allow a time difference on clock register read
rtc: pcf85063: Fix reading alarm
iommu/amd: Fix pci device refcount leak in ppr_notifier()
iommu/fsl_pamu: Fix resource leak in fsl_pamu_probe()
macintosh: fix possible memory leak in macio_add_one_device()
macintosh/macio-adb: check the return value of ioremap()
powerpc/52xx: Fix a resource leak in an error handling path
cxl: Fix refcount leak in cxl_calc_capp_routing
powerpc/xmon: Enable breakpoints on 8xx
powerpc/xmon: Fix -Wswitch-unreachable warning in bpt_cmds
powerpc/xive: add missing iounmap() in error path in xive_spapr_populate_irq_data()
kbuild: remove unneeded mkdir for external modules_install
kbuild: unify modules(_install) for in-tree and external modules
kbuild: refactor single builds of *.ko
powerpc/perf: callchain validate kernel stack pointer bounds
powerpc/83xx/mpc832x_rdb: call platform_device_put() in error case in of_fsl_spi_probe()
powerpc/hv-gpci: Fix hv_gpci event list
selftests/powerpc: Fix resource leaks
iommu/sun50i: Remove IOMMU_DOMAIN_IDENTITY
pwm: sifive: Call pwm_sifive_update_clock() while mutex is held
remoteproc: sysmon: fix memory leak in qcom_add_sysmon_subdev()
remoteproc: qcom_q6v5_pas: disable wakeup on probe fail or remove
remoteproc: qcom_q6v5_pas: detach power domains on remove
remoteproc: qcom_q6v5_pas: Fix missing of_node_put() in adsp_alloc_memory_region()
powerpc/eeh: Drop redundant spinlock initialization
powerpc/pseries/eeh: use correct API for error log size
netfilter: flowtable: really fix NAT IPv6 offload
rtc: st-lpc: Add missing clk_disable_unprepare in st_rtc_probe()
rtc: pic32: Move devm_rtc_allocate_device earlier in pic32_rtc_probe()
rtc: pcf85063: fix pcf85063_clkout_control
NFSD: Remove spurious cb_setup_err tracepoint
nfsd: under NFSv4.1, fix double svc_xprt_put on rpc_create failure
net: macsec: fix net device access prior to holding a lock
mISDN: hfcsusb: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave()
mISDN: hfcpci: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave()
mISDN: hfcmulti: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave()
nfc: pn533: Clear nfc_target before being used
r6040: Fix kmemleak in probe and remove
net: switch to storing KCOV handle directly in sk_buff
net: add inline function skb_csum_is_sctp
net: igc: use skb_csum_is_sctp instead of protocol check
net: add a helper to avoid issues with HW TX timestamping and SO_TXTIME
igc: Enhance Qbv scheduling by using first flag bit
igc: Use strict cycles for Qbv scheduling
igc: Add checking for basetime less than zero
igc: recalculate Qbv end_time by considering cycle time
igc: Lift TAPRIO schedule restriction
igc: Set Qbv start_time and end_time to end_time if not being configured in GCL
rtc: mxc_v2: Add missing clk_disable_unprepare()
selftests: devlink: fix the fd redirect in dummy_reporter_test
openvswitch: Fix flow lookup to use unmasked key
skbuff: Account for tail adjustment during pull operations
mailbox: zynq-ipi: fix error handling while device_register() fails
net_sched: reject TCF_EM_SIMPLE case for complex ematch module
rxrpc: Fix missing unlock in rxrpc_do_sendmsg()
myri10ge: Fix an error handling path in myri10ge_probe()
net: stream: purge sk_error_queue in sk_stream_kill_queues()
rcu: Fix __this_cpu_read() lockdep warning in rcu_force_quiescent_state()
arm64: make is_ttbrX_addr() noinstr-safe
video: hyperv_fb: Avoid taking busy spinlock on panic path
x86/hyperv: Remove unregister syscore call from Hyper-V cleanup
binfmt_misc: fix shift-out-of-bounds in check_special_flags
fs: jfs: fix shift-out-of-bounds in dbAllocAG
udf: Avoid double brelse() in udf_rename()
fs: jfs: fix shift-out-of-bounds in dbDiscardAG
ACPICA: Fix error code path in acpi_ds_call_control_method()
nilfs2: fix shift-out-of-bounds/overflow in nilfs_sb2_bad_offset()
nilfs2: fix shift-out-of-bounds due to too large exponent of block size
acct: fix potential integer overflow in encode_comp_t()
hfs: fix OOB Read in __hfs_brec_find
drm/etnaviv: add missing quirks for GC300
brcmfmac: return error when getting invalid max_flowrings from dongle
wifi: ath9k: verify the expected usb_endpoints are present
wifi: ar5523: Fix use-after-free on ar5523_cmd() timed out
ASoC: codecs: rt298: Add quirk for KBL-R RVP platform
ipmi: fix memleak when unload ipmi driver
drm/amd/display: prevent memory leak
qed (gcc13): use u16 for fid to be big enough
bpf: make sure skb->len != 0 when redirecting to a tunneling device
net: ethernet: ti: Fix return type of netcp_ndo_start_xmit()
hamradio: baycom_epp: Fix return type of baycom_send_packet()
wifi: brcmfmac: Fix potential shift-out-of-bounds in brcmf_fw_alloc_request()
igb: Do not free q_vector unless new one was allocated
drm/amdgpu: Fix type of second parameter in trans_msg() callback
drm/amdgpu: Fix type of second parameter in odn_edit_dpm_table() callback
s390/ctcm: Fix return type of ctc{mp,}m_tx()
s390/netiucv: Fix return type of netiucv_tx()
s390/lcs: Fix return type of lcs_start_xmit()
drm/msm: Use drm_mode_copy()
drm/rockchip: Use drm_mode_copy()
drm/sti: Use drm_mode_copy()
drivers/md/md-bitmap: check the return value of md_bitmap_get_counter()
md/raid1: stop mdx_raid1 thread when raid1 array run failed
drm/amd/display: fix array index out of bound error in bios parser
net: add atomic_long_t to net_device_stats fields
mrp: introduce active flags to prevent UAF when applicant uninit
ppp: associate skb with a device at tx
bpf: Prevent decl_tag from being referenced in func_proto arg
ethtool: avoiding integer overflow in ethtool_phys_id()
media: dvb-frontends: fix leak of memory fw
media: dvbdev: adopts refcnt to avoid UAF
media: dvb-usb: fix memory leak in dvb_usb_adapter_init()
blk-mq: fix possible memleak when register 'hctx' failed
libbpf: Avoid enum forward-declarations in public API in C++ mode
regulator: core: fix use_count leakage when handling boot-on
mmc: f-sdh30: Add quirks for broken timeout clock capability
mmc: renesas_sdhi: better reset from HS400 mode
media: si470x: Fix use-after-free in si470x_int_in_callback()
clk: st: Fix memory leak in st_of_quadfs_setup()
hugetlbfs: fix null-ptr-deref in hugetlbfs_parse_param()
drm/fsl-dcu: Fix return type of fsl_dcu_drm_connector_mode_valid()
drm/sti: Fix return type of sti_{dvo,hda,hdmi}_connector_mode_valid()
orangefs: Fix kmemleak in orangefs_prepare_debugfs_help_string()
orangefs: Fix kmemleak in orangefs_{kernel,client}_debug_init()
hwmon: (jc42) Fix missing unlock on error in jc42_write()
ALSA/ASoC: hda: move/rename snd_hdac_ext_stop_streams to hdac_stream.c
ALSA: hda: add snd_hdac_stop_streams() helper
ASoC: Intel: Skylake: Fix driver hang during shutdown
ASoC: mediatek: mt8173-rt5650-rt5514: fix refcount leak in mt8173_rt5650_rt5514_dev_probe()
ASoC: audio-graph-card: fix refcount leak of cpu_ep in __graph_for_each_link()
ASoC: rockchip: pdm: Add missing clk_disable_unprepare() in rockchip_pdm_runtime_resume()
ASoC: wm8994: Fix potential deadlock
ASoC: rockchip: spdif: Add missing clk_disable_unprepare() in rk_spdif_runtime_resume()
ASoC: rt5670: Remove unbalanced pm_runtime_put()
LoadPin: Ignore the "contents" argument of the LSM hooks
pstore: Switch pmsg_lock to an rt_mutex to avoid priority inversion
perf debug: Set debug_peo_args and redirect_to_stderr variable to correct values in perf_quiet_option()
afs: Fix lost servers_outstanding count
pstore: Make sure CONFIG_PSTORE_PMSG selects CONFIG_RT_MUTEXES
ima: Simplify ima_lsm_copy_rule
ALSA: usb-audio: add the quirk for KT0206 device
ALSA: hda/realtek: Add quirk for Lenovo TianYi510Pro-14IOB
ALSA: hda/hdmi: Add HP Device 0x8711 to force connect list
usb: dwc3: Fix race between dwc3_set_mode and __dwc3_set_mode
usb: dwc3: core: defer probe on ulpi_read_id timeout
HID: wacom: Ensure bootloader PID is usable in hidraw mode
HID: mcp2221: don't connect hidraw
reiserfs: Add missing calls to reiserfs_security_free()
iio: adc: ad_sigma_delta: do not use internal iio_dev lock
iio: adc128s052: add proper .data members in adc128_of_match table
regulator: core: fix deadlock on regulator enable
gcov: add support for checksum field
ovl: fix use inode directly in rcu-walk mode
media: dvbdev: fix build warning due to comments
media: dvbdev: fix refcnt bug
pwm: tegra: Fix 32 bit build
usb: dwc3: qcom: Fix memory leak in dwc3_qcom_interconnect_init
cifs: fix oops during encryption
nvme-pci: fix doorbell buffer value endianness
nvme-pci: fix mempool alloc size
nvme-pci: fix page size checks
ata: ahci: Fix PCS quirk application for suspend
nvme: fix the NVME_CMD_EFFECTS_CSE_MASK definition
nvmet: don't defer passthrough commands with trivial effects to the workqueue
objtool: Fix SEGFAULT
powerpc/rtas: avoid device tree lookups in rtas_os_term()
powerpc/rtas: avoid scheduling in rtas_os_term()
HID: multitouch: fix Asus ExpertBook P2 P2451FA trackpoint
HID: plantronics: Additional PIDs for double volume key presses quirk
pstore/zone: Use GFP_ATOMIC to allocate zone buffer
hfsplus: fix bug causing custom uid and gid being unable to be assigned with mount
binfmt: Fix error return code in load_elf_fdpic_binary()
ovl: Use ovl mounter's fsuid and fsgid in ovl_link()
ALSA: line6: correct midi status byte when receiving data from podxt
ALSA: line6: fix stack overflow in line6_midi_transmit
pnode: terminate at peers of source
md: fix a crash in mempool_free
mm, compaction: fix fast_isolate_around() to stay within boundaries
f2fs: should put a page when checking the summary info
mmc: vub300: fix warning - do not call blocking ops when !TASK_RUNNING
tpm: acpi: Call acpi_put_table() to fix memory leak
tpm: tpm_crb: Add the missed acpi_put_table() to fix memory leak
tpm: tpm_tis: Add the missed acpi_put_table() to fix memory leak
SUNRPC: Don't leak netobj memory when gss_read_proxy_verf() fails
kcsan: Instrument memcpy/memset/memmove with newer Clang
ASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio
ASoC/SoundWire: dai: expand 'stream' concept beyond SoundWire
net/mlx5e: Fix nullptr in mlx5e_tc_add_fdb_flow()
wifi: rtlwifi: remove always-true condition pointed out by GCC 12
wifi: rtlwifi: 8192de: correct checking of IQK reload
torture: Exclude "NOHZ tick-stop error" from fatal errors
rcu: Prevent lockdep-RCU splats on lock acquisition/release
net/af_packet: add VLAN support for AF_PACKET SOCK_RAW GSO
net/af_packet: make sure to pull mac header
media: stv0288: use explicitly signed char
soc: qcom: Select REMAP_MMIO for LLCC driver
kest.pl: Fix grub2 menu handling for rebooting
ktest.pl minconfig: Unset configs instead of just removing them
jbd2: use the correct print format
arm64: dts: qcom: sdm845-db845c: correct SPI2 pins drive strength
mmc: sdhci-sprd: Disable CLK_AUTO when the clock is less than 400K
btrfs: fix resolving backrefs for inline extent followed by prealloc
ARM: ux500: do not directly dereference __iomem
arm64: dts: qcom: sdm850-lenovo-yoga-c630: correct I2C12 pins drive strength
selftests: Use optional USERCFLAGS and USERLDFLAGS
PM/devfreq: governor: Add a private governor_data for governor
cpufreq: Init completion before kobject_init_and_add()
ALSA: patch_realtek: Fix Dell Inspiron Plus 16
ALSA: hda/realtek: Apply dual codec fixup for Dell Latitude laptops
dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort
dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata
dm thin: Use last transaction's pmd->root when commit failed
dm thin: resume even if in FAIL mode
dm thin: Fix UAF in run_timer_softirq()
dm integrity: Fix UAF in dm_integrity_dtr()
dm clone: Fix UAF in clone_dtr()
dm cache: Fix UAF in destroy()
dm cache: set needs_check flag after aborting metadata
tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx'
perf/core: Call LSM hook after copying perf_event_attr
KVM: nVMX: Inject #GP, not #UD, if "generic" VMXON CR0/CR4 check fails
x86/microcode/intel: Do not retry microcode reloading on the APs
ftrace/x86: Add back ftrace_expected for ftrace bug reports
x86/kprobes: Fix kprobes instruction boudary check with CONFIG_RETHUNK
tracing/hist: Fix wrong return value in parse_action_params()
tracing: Fix infinite loop in tracing_read_pipe on overflowed print_trace_line
staging: media: tegra-video: fix chan->mipi value on error
ARM: 9256/1: NWFPE: avoid compiler-generated __aeabi_uldivmod
media: dvb-core: Fix double free in dvb_register_device()
media: dvb-core: Fix UAF due to refcount races at releasing
cifs: fix confusing debug message
cifs: fix missing display of three mount options
rtc: ds1347: fix value written to century register
md/bitmap: Fix bitmap chunk size overflow issues
efi: Add iMac Pro 2017 to uefi skip cert quirk
wifi: wilc1000: sdio: fix module autoloading
ASoC: jz4740-i2s: Handle independent FIFO flush bits
ipmi: fix long wait in unload when IPMI disconnect
mtd: spi-nor: Check for zero erase size in spi_nor_find_best_erase_type()
ima: Fix a potential NULL pointer access in ima_restore_measurement_list
ipmi: fix use after free in _ipmi_destroy_user()
PCI: Fix pci_device_is_present() for VFs by checking PF
PCI/sysfs: Fix double free in error path
crypto: n2 - add missing hash statesize
driver core: Fix bus_type.match() error handling in __driver_attach()
iommu/amd: Fix ivrs_acpihid cmdline parsing code
remoteproc: core: Do pm_relax when in RPROC_OFFLINE state
parisc: led: Fix potential null-ptr-deref in start_task()
device_cgroup: Roll back to original exceptions after copy failure
drm/connector: send hotplug uevent on connector cleanup
drm/vmwgfx: Validate the box size for the snooped cursor
drm/i915/dsi: fix VBT send packet port selection for dual link DSI
drm/ingenic: Fix missing platform_driver_unregister() call in ingenic_drm_init()
ext4: silence the warning when evicting inode with dioread_nolock
ext4: add inode table check in __ext4_get_inode_loc to aovid possible infinite loop
ext4: fix use-after-free in ext4_orphan_cleanup
ext4: fix undefined behavior in bit shift for ext4_check_flag_values
ext4: add EXT4_IGET_BAD flag to prevent unexpected bad inode
ext4: add helper to check quota inums
ext4: fix bug_on in __es_tree_search caused by bad quota inode
ext4: fix reserved cluster accounting in __es_remove_extent()
ext4: check and assert if marking an no_delete evicting inode dirty
ext4: fix bug_on in __es_tree_search caused by bad boot loader inode
ext4: init quota for 'old.inode' in 'ext4_rename'
ext4: fix delayed allocation bug in ext4_clu_mapped for bigalloc + inline
ext4: fix corruption when online resizing a 1K bigalloc fs
ext4: fix error code return to user-space in ext4_get_branch()
ext4: avoid BUG_ON when creating xattrs
ext4: fix inode leak in ext4_xattr_inode_create() on an error path
ext4: initialize quota before expanding inode in setproject ioctl
ext4: avoid unaccounted block allocation when expanding inode
ext4: allocate extended attribute value in vmalloc area
drm/amdgpu: handle polaris10/11 overlap asics (v2)
drm/amdgpu: make display pinning more flexible (v2)
ARM: renumber bits related to _TIF_WORK_MASK
perf/x86/intel/uncore: Generalize I/O stacks to PMON mapping procedure
perf/x86/intel/uncore: Clear attr_update properly
btrfs: replace strncpy() with strscpy()
x86/mce: Get rid of msr_ops
x86/MCE/AMD: Clear DFR errors found in THR handler
media: s5p-mfc: Fix to handle reference queue during finishing
media: s5p-mfc: Clear workbit to handle error condition
media: s5p-mfc: Fix in register read and write for H264
perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor
perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data
x86/kprobes: Convert to insn_decode()
x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK
staging: media: tegra-video: fix device_node use after free
ravb: Fix "failed to switch device to config mode" message during unbind
riscv/stacktrace: Fix stack output without ra on the stack top
riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument
ext4: goto right label 'failed_mount3a'
ext4: correct inconsistent error msg in nojournal mode
mm/highmem: Lift memcpy_[to|from]_page to core
ext4: use memcpy_to_page() in pagecache_write()
fs: ext4: initialize fsdata in pagecache_write()
ext4: move functions in super.c
ext4: simplify ext4 error translation
ext4: fix various seppling typos
ext4: fix leaking uninitialized memory in fast-commit journal
ext4: use kmemdup() to replace kmalloc + memcpy
mbcache: don't reclaim used entries
mbcache: add functions to delete entry if unused
ext4: remove EA inode entry from mbcache on inode eviction
ext4: unindent codeblock in ext4_xattr_block_set()
ext4: fix race when reusing xattr blocks
mbcache: automatically delete entries from cache on freeing
ext4: fix deadlock due to mbcache entry corruption
SUNRPC: ensure the matching upcall is in-flight upon downcall
bpf: pull before calling skb_postpull_rcsum()
drm/panfrost: Fix GEM handle creation ref-counting
vmxnet3: correctly report csum_level for encapsulated packet
veth: Fix race with AF_XDP exposing old or uninitialized descriptors
nfsd: shut down the NFSv4 state objects before the filecache
net: hns3: add interrupts re-initialization while doing VF FLR
net: sched: fix memory leak in tcindex_set_parms
qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure
nfc: Fix potential resource leaks
vhost/vsock: Fix error handling in vhost_vsock_init()
vringh: fix range used in iotlb_translate()
vhost: fix range used in translate_desc()
net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error path
net/mlx5: Avoid recovery in probe flows
net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by default
net/mlx5e: Fix hw mtu initializing at XDP SQ allocation
net: amd-xgbe: add missed tasklet_kill
net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe
RDMA/mlx5: Fix validation of max_rd_atomic caps for DC
drm/meson: Reduce the FIFO lines held when AFBC is not used
filelock: new helper: vfs_inode_has_locks
ceph: switch to vfs_inode_has_locks() to fix file lock bug
gpio: sifive: Fix refcount leak in sifive_gpio_probe
net: sched: atm: dont intepret cls results when asked to drop
net: sched: cbq: dont intepret cls results when asked to drop
netfilter: ipset: fix hash:net,port,net hang with /0 subnet
netfilter: ipset: Rework long task execution when adding/deleting entries
perf tools: Fix resources leak in perf_data__open_dir()
drivers/net/bonding/bond_3ad: return when there's no aggregator
usb: rndis_host: Secure rndis_query check against int overflow
drm/i915: unpin on error in intel_vgpu_shadow_mm_pin()
caif: fix memory leak in cfctrl_linkup_request()
udf: Fix extension of the last extent in the file
ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071 tablet
nvme: fix multipath crash caused by flush request when blktrace is enabled
x86/bugs: Flush IBP in ib_prctl_set()
nfsd: fix handling of readdir in v4root vs. mount upcall timeout
fbdev: matroxfb: G200eW: Increase max memory from 1 MB to 16 MB
riscv: uaccess: fix type of 0 variable on error in get_user()
drm/i915/gvt: fix gvt debugfs destroy
drm/i915/gvt: fix vgpu debugfs clean in remove
ext4: don't allow journal inode to have encrypt flag
selftests: set the BUILD variable to absolute path
hfs/hfsplus: use WARN_ON for sanity check
hfs/hfsplus: avoid WARN_ON() for sanity check, use proper error handling
mbcache: Avoid nesting of cache->c_list_lock under bit locks
efi: random: combine bootloader provided RNG seed with RNG protocol output
io_uring: Fix unsigned 'res' comparison with zero in io_fixup_rw_res()
parisc: Align parisc MADV_XXX constants with all other architectures
ext4: disable fast-commit of encrypted dir operations
ext4: don't set up encryption key during jbd2 transaction
fsl_lpuart: Don't enable interrupts too early
serial: fixup backport of "serial: Deassert Transmit Enable on probe in driver-specific way"
mptcp: mark ops structures as ro_after_init
mptcp: remove MPTCP 'ifdef' in TCP SYN cookies
mptcp: dedicated request sock for subflow in v6
mptcp: use proper req destructor for IPv6
net: sched: disallow noqueue for qdisc classes
net/ulp: prevent ULP without clone op from entering the LISTEN status
ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF
ALSA: hda/hdmi: Add a HP device 0x8715 to force connect list
ALSA: hda - Enable headset mic on another Dell laptop with ALC3254
Linux 5.10.163
Change-Id: I9026971760be8484f1e1fa607f9f91243cc87785
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2934 lines
71 KiB
C
2934 lines
71 KiB
C
/* CPU control.
|
|
* (C) 2001, 2002, 2003, 2004 Rusty Russell
|
|
*
|
|
* This code is licenced under the GPL.
|
|
*/
|
|
#include <linux/sched/mm.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/init.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/sched/hotplug.h>
|
|
#include <linux/sched/isolation.h>
|
|
#include <linux/sched/task.h>
|
|
#include <linux/sched/smt.h>
|
|
#include <linux/unistd.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/oom.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/export.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/stop_machine.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/suspend.h>
|
|
#include <linux/lockdep.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/nmi.h>
|
|
#include <linux/smpboot.h>
|
|
#include <linux/relay.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/scs.h>
|
|
#include <linux/percpu-rwsem.h>
|
|
#include <linux/cpuset.h>
|
|
#include <linux/random.h>
|
|
#include <uapi/linux/sched/types.h>
|
|
|
|
#include <trace/events/power.h>
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/cpuhp.h>
|
|
|
|
#undef CREATE_TRACE_POINTS
|
|
#include <trace/hooks/sched.h>
|
|
#include <trace/hooks/cpu.h>
|
|
|
|
#include "smpboot.h"
|
|
|
|
/**
|
|
* cpuhp_cpu_state - Per cpu hotplug state storage
|
|
* @state: The current cpu state
|
|
* @target: The target state
|
|
* @thread: Pointer to the hotplug thread
|
|
* @should_run: Thread should execute
|
|
* @rollback: Perform a rollback
|
|
* @single: Single callback invocation
|
|
* @bringup: Single callback bringup or teardown selector
|
|
* @cb_state: The state for a single callback (install/uninstall)
|
|
* @result: Result of the operation
|
|
* @done_up: Signal completion to the issuer of the task for cpu-up
|
|
* @done_down: Signal completion to the issuer of the task for cpu-down
|
|
*/
|
|
struct cpuhp_cpu_state {
|
|
enum cpuhp_state state;
|
|
enum cpuhp_state target;
|
|
enum cpuhp_state fail;
|
|
#ifdef CONFIG_SMP
|
|
struct task_struct *thread;
|
|
bool should_run;
|
|
bool rollback;
|
|
bool single;
|
|
bool bringup;
|
|
struct hlist_node *node;
|
|
struct hlist_node *last;
|
|
enum cpuhp_state cb_state;
|
|
int result;
|
|
struct completion done_up;
|
|
struct completion done_down;
|
|
#endif
|
|
};
|
|
|
|
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
|
|
.fail = CPUHP_INVALID,
|
|
};
|
|
|
|
#ifdef CONFIG_SMP
|
|
cpumask_t cpus_booted_once_mask;
|
|
#endif
|
|
|
|
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
|
|
static struct lockdep_map cpuhp_state_up_map =
|
|
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
|
|
static struct lockdep_map cpuhp_state_down_map =
|
|
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
|
|
|
|
|
|
static inline void cpuhp_lock_acquire(bool bringup)
|
|
{
|
|
lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
|
|
}
|
|
|
|
static inline void cpuhp_lock_release(bool bringup)
|
|
{
|
|
lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
|
|
}
|
|
#else
|
|
|
|
static inline void cpuhp_lock_acquire(bool bringup) { }
|
|
static inline void cpuhp_lock_release(bool bringup) { }
|
|
|
|
#endif
|
|
|
|
/**
|
|
* cpuhp_step - Hotplug state machine step
|
|
* @name: Name of the step
|
|
* @startup: Startup function of the step
|
|
* @teardown: Teardown function of the step
|
|
* @cant_stop: Bringup/teardown can't be stopped at this step
|
|
*/
|
|
struct cpuhp_step {
|
|
const char *name;
|
|
union {
|
|
int (*single)(unsigned int cpu);
|
|
int (*multi)(unsigned int cpu,
|
|
struct hlist_node *node);
|
|
} startup;
|
|
union {
|
|
int (*single)(unsigned int cpu);
|
|
int (*multi)(unsigned int cpu,
|
|
struct hlist_node *node);
|
|
} teardown;
|
|
struct hlist_head list;
|
|
bool cant_stop;
|
|
bool multi_instance;
|
|
};
|
|
|
|
static DEFINE_MUTEX(cpuhp_state_mutex);
|
|
static struct cpuhp_step cpuhp_hp_states[];
|
|
|
|
static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
|
|
{
|
|
return cpuhp_hp_states + state;
|
|
}
|
|
|
|
/**
|
|
* cpuhp_invoke_callback _ Invoke the callbacks for a given state
|
|
* @cpu: The cpu for which the callback should be invoked
|
|
* @state: The state to do callbacks for
|
|
* @bringup: True if the bringup callback should be invoked
|
|
* @node: For multi-instance, do a single entry callback for install/remove
|
|
* @lastp: For multi-instance rollback, remember how far we got
|
|
*
|
|
* Called from cpu hotplug and from the state register machinery.
|
|
*/
|
|
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
|
|
bool bringup, struct hlist_node *node,
|
|
struct hlist_node **lastp)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
struct cpuhp_step *step = cpuhp_get_step(state);
|
|
int (*cbm)(unsigned int cpu, struct hlist_node *node);
|
|
int (*cb)(unsigned int cpu);
|
|
int ret, cnt;
|
|
|
|
if (st->fail == state) {
|
|
st->fail = CPUHP_INVALID;
|
|
|
|
if (!(bringup ? step->startup.single : step->teardown.single))
|
|
return 0;
|
|
|
|
return -EAGAIN;
|
|
}
|
|
|
|
if (!step->multi_instance) {
|
|
WARN_ON_ONCE(lastp && *lastp);
|
|
cb = bringup ? step->startup.single : step->teardown.single;
|
|
if (!cb)
|
|
return 0;
|
|
trace_cpuhp_enter(cpu, st->target, state, cb);
|
|
ret = cb(cpu);
|
|
trace_cpuhp_exit(cpu, st->state, state, ret);
|
|
return ret;
|
|
}
|
|
cbm = bringup ? step->startup.multi : step->teardown.multi;
|
|
if (!cbm)
|
|
return 0;
|
|
|
|
/* Single invocation for instance add/remove */
|
|
if (node) {
|
|
WARN_ON_ONCE(lastp && *lastp);
|
|
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
|
|
ret = cbm(cpu, node);
|
|
trace_cpuhp_exit(cpu, st->state, state, ret);
|
|
return ret;
|
|
}
|
|
|
|
/* State transition. Invoke on all instances */
|
|
cnt = 0;
|
|
hlist_for_each(node, &step->list) {
|
|
if (lastp && node == *lastp)
|
|
break;
|
|
|
|
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
|
|
ret = cbm(cpu, node);
|
|
trace_cpuhp_exit(cpu, st->state, state, ret);
|
|
if (ret) {
|
|
if (!lastp)
|
|
goto err;
|
|
|
|
*lastp = node;
|
|
return ret;
|
|
}
|
|
cnt++;
|
|
}
|
|
if (lastp)
|
|
*lastp = NULL;
|
|
return 0;
|
|
err:
|
|
/* Rollback the instances if one failed */
|
|
cbm = !bringup ? step->startup.multi : step->teardown.multi;
|
|
if (!cbm)
|
|
return ret;
|
|
|
|
hlist_for_each(node, &step->list) {
|
|
if (!cnt--)
|
|
break;
|
|
|
|
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
|
|
ret = cbm(cpu, node);
|
|
trace_cpuhp_exit(cpu, st->state, state, ret);
|
|
/*
|
|
* Rollback must not fail,
|
|
*/
|
|
WARN_ON_ONCE(ret);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static bool cpuhp_is_ap_state(enum cpuhp_state state)
|
|
{
|
|
/*
|
|
* The extra check for CPUHP_TEARDOWN_CPU is only for documentation
|
|
* purposes as that state is handled explicitly in cpu_down.
|
|
*/
|
|
return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
|
|
}
|
|
|
|
static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
|
|
{
|
|
struct completion *done = bringup ? &st->done_up : &st->done_down;
|
|
wait_for_completion(done);
|
|
}
|
|
|
|
static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
|
|
{
|
|
struct completion *done = bringup ? &st->done_up : &st->done_down;
|
|
complete(done);
|
|
}
|
|
|
|
/*
|
|
* The former STARTING/DYING states, ran with IRQs disabled and must not fail.
|
|
*/
|
|
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
|
|
{
|
|
return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
|
|
}
|
|
|
|
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
|
|
static DEFINE_MUTEX(cpu_add_remove_lock);
|
|
bool cpuhp_tasks_frozen;
|
|
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
|
|
|
|
/*
|
|
* The following two APIs (cpu_maps_update_begin/done) must be used when
|
|
* attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
|
|
*/
|
|
void cpu_maps_update_begin(void)
|
|
{
|
|
mutex_lock(&cpu_add_remove_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
|
|
|
|
void cpu_maps_update_done(void)
|
|
{
|
|
mutex_unlock(&cpu_add_remove_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_maps_update_done);
|
|
|
|
/*
|
|
* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
|
|
* Should always be manipulated under cpu_add_remove_lock
|
|
*/
|
|
static int cpu_hotplug_disabled;
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
|
|
|
|
void cpus_read_lock(void)
|
|
{
|
|
percpu_down_read(&cpu_hotplug_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpus_read_lock);
|
|
|
|
int cpus_read_trylock(void)
|
|
{
|
|
return percpu_down_read_trylock(&cpu_hotplug_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpus_read_trylock);
|
|
|
|
void cpus_read_unlock(void)
|
|
{
|
|
percpu_up_read(&cpu_hotplug_lock);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpus_read_unlock);
|
|
|
|
void cpus_write_lock(void)
|
|
{
|
|
percpu_down_write(&cpu_hotplug_lock);
|
|
}
|
|
|
|
void cpus_write_unlock(void)
|
|
{
|
|
percpu_up_write(&cpu_hotplug_lock);
|
|
}
|
|
|
|
void lockdep_assert_cpus_held(void)
|
|
{
|
|
/*
|
|
* We can't have hotplug operations before userspace starts running,
|
|
* and some init codepaths will knowingly not take the hotplug lock.
|
|
* This is all valid, so mute lockdep until it makes sense to report
|
|
* unheld locks.
|
|
*/
|
|
if (system_state < SYSTEM_RUNNING)
|
|
return;
|
|
|
|
percpu_rwsem_assert_held(&cpu_hotplug_lock);
|
|
}
|
|
|
|
static void lockdep_acquire_cpus_lock(void)
|
|
{
|
|
rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
|
|
}
|
|
|
|
static void lockdep_release_cpus_lock(void)
|
|
{
|
|
rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
|
|
}
|
|
|
|
/*
|
|
* Wait for currently running CPU hotplug operations to complete (if any) and
|
|
* disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
|
|
* the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
|
|
* hotplug path before performing hotplug operations. So acquiring that lock
|
|
* guarantees mutual exclusion from any currently running hotplug operations.
|
|
*/
|
|
void cpu_hotplug_disable(void)
|
|
{
|
|
cpu_maps_update_begin();
|
|
cpu_hotplug_disabled++;
|
|
cpu_maps_update_done();
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
|
|
|
|
static void __cpu_hotplug_enable(void)
|
|
{
|
|
if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
|
|
return;
|
|
cpu_hotplug_disabled--;
|
|
}
|
|
|
|
void cpu_hotplug_enable(void)
|
|
{
|
|
cpu_maps_update_begin();
|
|
__cpu_hotplug_enable();
|
|
cpu_maps_update_done();
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
|
|
|
|
#else
|
|
|
|
static void lockdep_acquire_cpus_lock(void)
|
|
{
|
|
}
|
|
|
|
static void lockdep_release_cpus_lock(void)
|
|
{
|
|
}
|
|
|
|
#endif /* CONFIG_HOTPLUG_CPU */
|
|
|
|
/*
|
|
* Architectures that need SMT-specific errata handling during SMT hotplug
|
|
* should override this.
|
|
*/
|
|
void __weak arch_smt_update(void) { }
|
|
|
|
#ifdef CONFIG_HOTPLUG_SMT
|
|
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
|
|
|
|
void __init cpu_smt_disable(bool force)
|
|
{
|
|
if (!cpu_smt_possible())
|
|
return;
|
|
|
|
if (force) {
|
|
pr_info("SMT: Force disabled\n");
|
|
cpu_smt_control = CPU_SMT_FORCE_DISABLED;
|
|
} else {
|
|
pr_info("SMT: disabled\n");
|
|
cpu_smt_control = CPU_SMT_DISABLED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The decision whether SMT is supported can only be done after the full
|
|
* CPU identification. Called from architecture code.
|
|
*/
|
|
void __init cpu_smt_check_topology(void)
|
|
{
|
|
if (!topology_smt_supported())
|
|
cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int __init smt_cmdline_disable(char *str)
|
|
{
|
|
cpu_smt_disable(str && !strcmp(str, "force"));
|
|
return 0;
|
|
}
|
|
early_param("nosmt", smt_cmdline_disable);
|
|
|
|
static inline bool cpu_smt_allowed(unsigned int cpu)
|
|
{
|
|
if (cpu_smt_control == CPU_SMT_ENABLED)
|
|
return true;
|
|
|
|
if (topology_is_primary_thread(cpu))
|
|
return true;
|
|
|
|
/*
|
|
* On x86 it's required to boot all logical CPUs at least once so
|
|
* that the init code can get a chance to set CR4.MCE on each
|
|
* CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
|
|
* core will shutdown the machine.
|
|
*/
|
|
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
|
|
}
|
|
|
|
/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
|
|
bool cpu_smt_possible(void)
|
|
{
|
|
return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
|
|
cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_smt_possible);
|
|
#else
|
|
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
|
|
#endif
|
|
|
|
static inline enum cpuhp_state
|
|
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
|
|
{
|
|
enum cpuhp_state prev_state = st->state;
|
|
|
|
st->rollback = false;
|
|
st->last = NULL;
|
|
|
|
st->target = target;
|
|
st->single = false;
|
|
st->bringup = st->state < target;
|
|
|
|
return prev_state;
|
|
}
|
|
|
|
static inline void
|
|
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
|
|
{
|
|
st->rollback = true;
|
|
|
|
/*
|
|
* If we have st->last we need to undo partial multi_instance of this
|
|
* state first. Otherwise start undo at the previous state.
|
|
*/
|
|
if (!st->last) {
|
|
if (st->bringup)
|
|
st->state--;
|
|
else
|
|
st->state++;
|
|
}
|
|
|
|
st->target = prev_state;
|
|
st->bringup = !st->bringup;
|
|
}
|
|
|
|
/* Regular hotplug invocation of the AP hotplug thread */
|
|
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
|
|
{
|
|
if (!st->single && st->state == st->target)
|
|
return;
|
|
|
|
st->result = 0;
|
|
/*
|
|
* Make sure the above stores are visible before should_run becomes
|
|
* true. Paired with the mb() above in cpuhp_thread_fun()
|
|
*/
|
|
smp_mb();
|
|
st->should_run = true;
|
|
wake_up_process(st->thread);
|
|
wait_for_ap_thread(st, st->bringup);
|
|
}
|
|
|
|
static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
|
|
{
|
|
enum cpuhp_state prev_state;
|
|
int ret;
|
|
|
|
prev_state = cpuhp_set_state(st, target);
|
|
__cpuhp_kick_ap(st);
|
|
if ((ret = st->result)) {
|
|
cpuhp_reset_state(st, prev_state);
|
|
__cpuhp_kick_ap(st);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int bringup_wait_for_ap(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
|
|
/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
|
|
wait_for_ap_thread(st, true);
|
|
if (WARN_ON_ONCE((!cpu_online(cpu))))
|
|
return -ECANCELED;
|
|
|
|
/* Unpark the hotplug thread of the target cpu */
|
|
kthread_unpark(st->thread);
|
|
|
|
/*
|
|
* SMT soft disabling on X86 requires to bring the CPU out of the
|
|
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
|
|
* CPU marked itself as booted_once in notify_cpu_starting() so the
|
|
* cpu_smt_allowed() check will now return false if this is not the
|
|
* primary sibling.
|
|
*/
|
|
if (!cpu_smt_allowed(cpu))
|
|
return -ECANCELED;
|
|
|
|
if (st->target <= CPUHP_AP_ONLINE_IDLE)
|
|
return 0;
|
|
|
|
return cpuhp_kick_ap(st, st->target);
|
|
}
|
|
|
|
static int bringup_cpu(unsigned int cpu)
|
|
{
|
|
struct task_struct *idle = idle_thread_get(cpu);
|
|
int ret;
|
|
|
|
/*
|
|
* Reset stale stack state from the last time this CPU was online.
|
|
*/
|
|
scs_task_reset(idle);
|
|
kasan_unpoison_task_stack(idle);
|
|
|
|
/*
|
|
* Some architectures have to walk the irq descriptors to
|
|
* setup the vector space for the cpu which comes online.
|
|
* Prevent irq alloc/free across the bringup.
|
|
*/
|
|
irq_lock_sparse();
|
|
|
|
/* Arch-specific enabling code. */
|
|
ret = __cpu_up(cpu, idle);
|
|
irq_unlock_sparse();
|
|
if (ret)
|
|
return ret;
|
|
return bringup_wait_for_ap(cpu);
|
|
}
|
|
|
|
static int finish_cpu(unsigned int cpu)
|
|
{
|
|
struct task_struct *idle = idle_thread_get(cpu);
|
|
struct mm_struct *mm = idle->active_mm;
|
|
|
|
/*
|
|
* idle_task_exit() will have switched to &init_mm, now
|
|
* clean up any remaining active_mm state.
|
|
*/
|
|
if (mm != &init_mm)
|
|
idle->active_mm = &init_mm;
|
|
mmdrop(mm);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Hotplug state machine related functions
|
|
*/
|
|
|
|
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
|
|
{
|
|
for (st->state--; st->state > st->target; st->state--)
|
|
cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
|
|
}
|
|
|
|
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
|
|
{
|
|
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
|
return true;
|
|
/*
|
|
* When CPU hotplug is disabled, then taking the CPU down is not
|
|
* possible because takedown_cpu() and the architecture and
|
|
* subsystem specific mechanisms are not available. So the CPU
|
|
* which would be completely unplugged again needs to stay around
|
|
* in the current state.
|
|
*/
|
|
return st->state <= CPUHP_BRINGUP_CPU;
|
|
}
|
|
|
|
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
|
|
enum cpuhp_state target)
|
|
{
|
|
enum cpuhp_state prev_state = st->state;
|
|
int ret = 0;
|
|
|
|
while (st->state < target) {
|
|
st->state++;
|
|
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
|
|
if (ret) {
|
|
if (can_rollback_cpu(st)) {
|
|
st->target = prev_state;
|
|
undo_cpu_up(cpu, st);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* The cpu hotplug threads manage the bringup and teardown of the cpus
|
|
*/
|
|
static void cpuhp_create(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
|
|
init_completion(&st->done_up);
|
|
init_completion(&st->done_down);
|
|
}
|
|
|
|
static int cpuhp_should_run(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
|
|
|
|
return st->should_run;
|
|
}
|
|
|
|
/*
|
|
* Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
|
|
* callbacks when a state gets [un]installed at runtime.
|
|
*
|
|
* Each invocation of this function by the smpboot thread does a single AP
|
|
* state callback.
|
|
*
|
|
* It has 3 modes of operation:
|
|
* - single: runs st->cb_state
|
|
* - up: runs ++st->state, while st->state < st->target
|
|
* - down: runs st->state--, while st->state > st->target
|
|
*
|
|
* When complete or on error, should_run is cleared and the completion is fired.
|
|
*/
|
|
static void cpuhp_thread_fun(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
|
|
bool bringup = st->bringup;
|
|
enum cpuhp_state state;
|
|
|
|
if (WARN_ON_ONCE(!st->should_run))
|
|
return;
|
|
|
|
/*
|
|
* ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
|
|
* that if we see ->should_run we also see the rest of the state.
|
|
*/
|
|
smp_mb();
|
|
|
|
/*
|
|
* The BP holds the hotplug lock, but we're now running on the AP,
|
|
* ensure that anybody asserting the lock is held, will actually find
|
|
* it so.
|
|
*/
|
|
lockdep_acquire_cpus_lock();
|
|
cpuhp_lock_acquire(bringup);
|
|
|
|
if (st->single) {
|
|
state = st->cb_state;
|
|
st->should_run = false;
|
|
} else {
|
|
if (bringup) {
|
|
st->state++;
|
|
state = st->state;
|
|
st->should_run = (st->state < st->target);
|
|
WARN_ON_ONCE(st->state > st->target);
|
|
} else {
|
|
state = st->state;
|
|
st->state--;
|
|
st->should_run = (st->state > st->target);
|
|
WARN_ON_ONCE(st->state < st->target);
|
|
}
|
|
}
|
|
|
|
WARN_ON_ONCE(!cpuhp_is_ap_state(state));
|
|
|
|
if (cpuhp_is_atomic_state(state)) {
|
|
local_irq_disable();
|
|
st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
|
|
local_irq_enable();
|
|
|
|
/*
|
|
* STARTING/DYING must not fail!
|
|
*/
|
|
WARN_ON_ONCE(st->result);
|
|
} else {
|
|
st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
|
|
}
|
|
|
|
if (st->result) {
|
|
/*
|
|
* If we fail on a rollback, we're up a creek without no
|
|
* paddle, no way forward, no way back. We loose, thanks for
|
|
* playing.
|
|
*/
|
|
WARN_ON_ONCE(st->rollback);
|
|
st->should_run = false;
|
|
}
|
|
|
|
cpuhp_lock_release(bringup);
|
|
lockdep_release_cpus_lock();
|
|
|
|
if (!st->should_run)
|
|
complete_ap_thread(st, bringup);
|
|
}
|
|
|
|
/* Invoke a single callback on a remote cpu */
|
|
static int
|
|
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
|
|
struct hlist_node *node)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int ret;
|
|
|
|
if (!cpu_online(cpu))
|
|
return 0;
|
|
|
|
cpuhp_lock_acquire(false);
|
|
cpuhp_lock_release(false);
|
|
|
|
cpuhp_lock_acquire(true);
|
|
cpuhp_lock_release(true);
|
|
|
|
/*
|
|
* If we are up and running, use the hotplug thread. For early calls
|
|
* we invoke the thread function directly.
|
|
*/
|
|
if (!st->thread)
|
|
return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
|
|
|
|
st->rollback = false;
|
|
st->last = NULL;
|
|
|
|
st->node = node;
|
|
st->bringup = bringup;
|
|
st->cb_state = state;
|
|
st->single = true;
|
|
|
|
__cpuhp_kick_ap(st);
|
|
|
|
/*
|
|
* If we failed and did a partial, do a rollback.
|
|
*/
|
|
if ((ret = st->result) && st->last) {
|
|
st->rollback = true;
|
|
st->bringup = !bringup;
|
|
|
|
__cpuhp_kick_ap(st);
|
|
}
|
|
|
|
/*
|
|
* Clean up the leftovers so the next hotplug operation wont use stale
|
|
* data.
|
|
*/
|
|
st->node = st->last = NULL;
|
|
return ret;
|
|
}
|
|
|
|
static int cpuhp_kick_ap_work(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
enum cpuhp_state prev_state = st->state;
|
|
int ret;
|
|
|
|
cpuhp_lock_acquire(false);
|
|
cpuhp_lock_release(false);
|
|
|
|
cpuhp_lock_acquire(true);
|
|
cpuhp_lock_release(true);
|
|
|
|
trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
|
|
ret = cpuhp_kick_ap(st, st->target);
|
|
trace_cpuhp_exit(cpu, st->state, prev_state, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct smp_hotplug_thread cpuhp_threads = {
|
|
.store = &cpuhp_state.thread,
|
|
.create = &cpuhp_create,
|
|
.thread_should_run = cpuhp_should_run,
|
|
.thread_fn = cpuhp_thread_fun,
|
|
.thread_comm = "cpuhp/%u",
|
|
.selfparking = true,
|
|
};
|
|
|
|
void __init cpuhp_threads_init(void)
|
|
{
|
|
BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
|
|
kthread_unpark(this_cpu_read(cpuhp_state.thread));
|
|
}
|
|
|
|
/*
|
|
*
|
|
* Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
|
|
* protected region.
|
|
*
|
|
* The operation is still serialized against concurrent CPU hotplug via
|
|
* cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
|
|
* serialized against other hotplug related activity like adding or
|
|
* removing of state callbacks and state instances, which invoke either the
|
|
* startup or the teardown callback of the affected state.
|
|
*
|
|
* This is required for subsystems which are unfixable vs. CPU hotplug and
|
|
* evade lock inversion problems by scheduling work which has to be
|
|
* completed _before_ cpu_up()/_cpu_down() returns.
|
|
*
|
|
* Don't even think about adding anything to this for any new code or even
|
|
* drivers. It's only purpose is to keep existing lock order trainwrecks
|
|
* working.
|
|
*
|
|
* For cpu_down() there might be valid reasons to finish cleanups which are
|
|
* not required to be done under cpu_hotplug_lock, but that's a different
|
|
* story and would be not invoked via this.
|
|
*/
|
|
static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
|
|
{
|
|
/*
|
|
* cpusets delegate hotplug operations to a worker to "solve" the
|
|
* lock order problems. Wait for the worker, but only if tasks are
|
|
* _not_ frozen (suspend, hibernate) as that would wait forever.
|
|
*
|
|
* The wait is required because otherwise the hotplug operation
|
|
* returns with inconsistent state, which could even be observed in
|
|
* user space when a new CPU is brought up. The CPU plug uevent
|
|
* would be delivered and user space reacting on it would fail to
|
|
* move tasks to the newly plugged CPU up to the point where the
|
|
* work has finished because up to that point the newly plugged CPU
|
|
* is not assignable in cpusets/cgroups. On unplug that's not
|
|
* necessarily a visible issue, but it is still inconsistent state,
|
|
* which is the real problem which needs to be "fixed". This can't
|
|
* prevent the transient state between scheduling the work and
|
|
* returning from waiting for it.
|
|
*/
|
|
if (!tasks_frozen)
|
|
cpuset_wait_for_hotplug();
|
|
}
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
#ifndef arch_clear_mm_cpumask_cpu
|
|
#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
|
|
#endif
|
|
|
|
/**
|
|
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
|
|
* @cpu: a CPU id
|
|
*
|
|
* This function walks all processes, finds a valid mm struct for each one and
|
|
* then clears a corresponding bit in mm's cpumask. While this all sounds
|
|
* trivial, there are various non-obvious corner cases, which this function
|
|
* tries to solve in a safe manner.
|
|
*
|
|
* Also note that the function uses a somewhat relaxed locking scheme, so it may
|
|
* be called only for an already offlined CPU.
|
|
*/
|
|
void clear_tasks_mm_cpumask(int cpu)
|
|
{
|
|
struct task_struct *p;
|
|
|
|
/*
|
|
* This function is called after the cpu is taken down and marked
|
|
* offline, so its not like new tasks will ever get this cpu set in
|
|
* their mm mask. -- Peter Zijlstra
|
|
* Thus, we may use rcu_read_lock() here, instead of grabbing
|
|
* full-fledged tasklist_lock.
|
|
*/
|
|
WARN_ON(cpu_online(cpu));
|
|
rcu_read_lock();
|
|
for_each_process(p) {
|
|
struct task_struct *t;
|
|
|
|
/*
|
|
* Main thread might exit, but other threads may still have
|
|
* a valid mm. Find one.
|
|
*/
|
|
t = find_lock_task_mm(p);
|
|
if (!t)
|
|
continue;
|
|
arch_clear_mm_cpumask_cpu(cpu, t->mm);
|
|
task_unlock(t);
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/* Take this CPU down. */
|
|
static int take_cpu_down(void *_param)
|
|
{
|
|
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
|
|
enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
|
|
int err, cpu = smp_processor_id();
|
|
int ret;
|
|
|
|
/* Ensure this CPU doesn't handle any more interrupts. */
|
|
err = __cpu_disable();
|
|
if (err < 0)
|
|
return err;
|
|
|
|
/*
|
|
* We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
|
|
* do this step again.
|
|
*/
|
|
WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
|
|
st->state--;
|
|
/* Invoke the former CPU_DYING callbacks */
|
|
for (; st->state > target; st->state--) {
|
|
ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
|
|
/*
|
|
* DYING must not fail!
|
|
*/
|
|
WARN_ON_ONCE(ret);
|
|
}
|
|
|
|
/* Give up timekeeping duties */
|
|
tick_handover_do_timer();
|
|
/* Remove CPU from timer broadcasting */
|
|
tick_offline_cpu(cpu);
|
|
/* Park the stopper thread */
|
|
stop_machine_park(cpu);
|
|
return 0;
|
|
}
|
|
|
|
static int takedown_cpu(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int err;
|
|
|
|
/* Park the smpboot threads */
|
|
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
|
|
|
|
/*
|
|
* Prevent irq alloc/free while the dying cpu reorganizes the
|
|
* interrupt affinities.
|
|
*/
|
|
irq_lock_sparse();
|
|
|
|
/*
|
|
* So now all preempt/rcu users must observe !cpu_active().
|
|
*/
|
|
err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
|
|
if (err) {
|
|
/* CPU refused to die */
|
|
irq_unlock_sparse();
|
|
/* Unpark the hotplug thread so we can rollback there */
|
|
kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
|
|
return err;
|
|
}
|
|
BUG_ON(cpu_online(cpu));
|
|
|
|
/*
|
|
* The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
|
|
* all runnable tasks from the CPU, there's only the idle task left now
|
|
* that the migration thread is done doing the stop_machine thing.
|
|
*
|
|
* Wait for the stop thread to go away.
|
|
*/
|
|
wait_for_ap_thread(st, false);
|
|
BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
|
|
|
|
/* Interrupts are moved away from the dying cpu, reenable alloc/free */
|
|
irq_unlock_sparse();
|
|
|
|
hotplug_cpu__broadcast_tick_pull(cpu);
|
|
/* This actually kills the CPU. */
|
|
__cpu_die(cpu);
|
|
|
|
tick_cleanup_dead_cpu(cpu);
|
|
rcutree_migrate_callbacks(cpu);
|
|
return 0;
|
|
}
|
|
|
|
static void cpuhp_complete_idle_dead(void *arg)
|
|
{
|
|
struct cpuhp_cpu_state *st = arg;
|
|
|
|
complete_ap_thread(st, false);
|
|
}
|
|
|
|
void cpuhp_report_idle_dead(void)
|
|
{
|
|
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
|
|
|
|
BUG_ON(st->state != CPUHP_AP_OFFLINE);
|
|
rcu_report_dead(smp_processor_id());
|
|
st->state = CPUHP_AP_IDLE_DEAD;
|
|
/*
|
|
* We cannot call complete after rcu_report_dead() so we delegate it
|
|
* to an online cpu.
|
|
*/
|
|
smp_call_function_single(cpumask_first(cpu_online_mask),
|
|
cpuhp_complete_idle_dead, st, 0);
|
|
}
|
|
|
|
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
|
|
{
|
|
for (st->state++; st->state < st->target; st->state++)
|
|
cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
|
|
}
|
|
|
|
static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
|
|
enum cpuhp_state target)
|
|
{
|
|
enum cpuhp_state prev_state = st->state;
|
|
int ret = 0;
|
|
|
|
for (; st->state > target; st->state--) {
|
|
ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
|
|
if (ret) {
|
|
st->target = prev_state;
|
|
if (st->state < prev_state)
|
|
undo_cpu_down(cpu, st);
|
|
break;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* Requires cpu_add_remove_lock to be held */
|
|
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
|
|
enum cpuhp_state target)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int prev_state, ret = 0;
|
|
|
|
if (num_active_cpus() == 1 && cpu_active(cpu))
|
|
return -EBUSY;
|
|
|
|
if (!cpu_present(cpu))
|
|
return -EINVAL;
|
|
|
|
cpus_write_lock();
|
|
|
|
cpuhp_tasks_frozen = tasks_frozen;
|
|
|
|
prev_state = cpuhp_set_state(st, target);
|
|
/*
|
|
* If the current CPU state is in the range of the AP hotplug thread,
|
|
* then we need to kick the thread.
|
|
*/
|
|
if (st->state > CPUHP_TEARDOWN_CPU) {
|
|
st->target = max((int)target, CPUHP_TEARDOWN_CPU);
|
|
ret = cpuhp_kick_ap_work(cpu);
|
|
/*
|
|
* The AP side has done the error rollback already. Just
|
|
* return the error code..
|
|
*/
|
|
if (ret)
|
|
goto out;
|
|
|
|
/*
|
|
* We might have stopped still in the range of the AP hotplug
|
|
* thread. Nothing to do anymore.
|
|
*/
|
|
if (st->state > CPUHP_TEARDOWN_CPU)
|
|
goto out;
|
|
|
|
st->target = target;
|
|
}
|
|
/*
|
|
* The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
|
|
* to do the further cleanups.
|
|
*/
|
|
ret = cpuhp_down_callbacks(cpu, st, target);
|
|
if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
|
|
cpuhp_reset_state(st, prev_state);
|
|
__cpuhp_kick_ap(st);
|
|
}
|
|
|
|
out:
|
|
cpus_write_unlock();
|
|
/*
|
|
* Do post unplug cleanup. This is still protected against
|
|
* concurrent CPU hotplug via cpu_add_remove_lock.
|
|
*/
|
|
lockup_detector_cleanup();
|
|
arch_smt_update();
|
|
cpu_up_down_serialize_trainwrecks(tasks_frozen);
|
|
return ret;
|
|
}
|
|
|
|
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
|
|
{
|
|
if (cpu_hotplug_disabled)
|
|
return -EBUSY;
|
|
return _cpu_down(cpu, 0, target);
|
|
}
|
|
|
|
static int cpu_down(unsigned int cpu, enum cpuhp_state target)
|
|
{
|
|
int err;
|
|
|
|
cpu_maps_update_begin();
|
|
err = cpu_down_maps_locked(cpu, target);
|
|
cpu_maps_update_done();
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* cpu_device_down - Bring down a cpu device
|
|
* @dev: Pointer to the cpu device to offline
|
|
*
|
|
* This function is meant to be used by device core cpu subsystem only.
|
|
*
|
|
* Other subsystems should use remove_cpu() instead.
|
|
*/
|
|
int cpu_device_down(struct device *dev)
|
|
{
|
|
return cpu_down(dev->id, CPUHP_OFFLINE);
|
|
}
|
|
|
|
int remove_cpu(unsigned int cpu)
|
|
{
|
|
int ret;
|
|
|
|
lock_device_hotplug();
|
|
ret = device_offline(get_cpu_device(cpu));
|
|
unlock_device_hotplug();
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(remove_cpu);
|
|
|
|
extern bool dl_cpu_busy(unsigned int cpu);
|
|
|
|
int __pause_drain_rq(struct cpumask *cpus)
|
|
{
|
|
unsigned int cpu;
|
|
int err = 0;
|
|
|
|
/*
|
|
* Disabling preemption avoids that one of the stopper, started from
|
|
* sched_cpu_drain_rq(), blocks firing draining for the whole cpumask.
|
|
*/
|
|
preempt_disable();
|
|
for_each_cpu(cpu, cpus) {
|
|
err = sched_cpu_drain_rq(cpu);
|
|
if (err)
|
|
break;
|
|
}
|
|
preempt_enable();
|
|
|
|
return err;
|
|
}
|
|
|
|
void __wait_drain_rq(struct cpumask *cpus)
|
|
{
|
|
unsigned int cpu;
|
|
|
|
for_each_cpu(cpu, cpus)
|
|
sched_cpu_drain_rq_wait(cpu);
|
|
}
|
|
|
|
/* if rt task, set to cfs and return previous prio */
|
|
static int pause_reduce_prio(void)
|
|
{
|
|
int prev_prio = -1;
|
|
|
|
if (current->prio < MAX_RT_PRIO) {
|
|
struct sched_param param = { .sched_priority = 0 };
|
|
|
|
prev_prio = current->prio;
|
|
sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
|
}
|
|
|
|
return prev_prio;
|
|
}
|
|
|
|
/* if previous prio was set, restore */
|
|
static void pause_restore_prio(int prev_prio)
|
|
{
|
|
if (prev_prio >= 0 && prev_prio < MAX_RT_PRIO) {
|
|
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1-prev_prio };
|
|
|
|
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
|
|
}
|
|
}
|
|
|
|
int pause_cpus(struct cpumask *cpus)
|
|
{
|
|
int err = 0;
|
|
int cpu;
|
|
u64 start_time = 0;
|
|
int prev_prio;
|
|
|
|
start_time = sched_clock();
|
|
|
|
cpu_maps_update_begin();
|
|
|
|
if (cpu_hotplug_disabled) {
|
|
err = -EBUSY;
|
|
goto err_cpu_maps_update;
|
|
}
|
|
|
|
/* Pausing an already inactive CPU isn't an error */
|
|
cpumask_and(cpus, cpus, cpu_active_mask);
|
|
|
|
for_each_cpu(cpu, cpus) {
|
|
if (!cpu_online(cpu) || dl_cpu_busy(cpu) ||
|
|
get_cpu_device(cpu)->offline_disabled == true) {
|
|
err = -EBUSY;
|
|
goto err_cpu_maps_update;
|
|
}
|
|
}
|
|
|
|
if (cpumask_weight(cpus) >= num_active_cpus()) {
|
|
err = -EBUSY;
|
|
goto err_cpu_maps_update;
|
|
}
|
|
|
|
if (cpumask_empty(cpus))
|
|
goto err_cpu_maps_update;
|
|
|
|
/*
|
|
* Lazy migration:
|
|
*
|
|
* We do care about how fast a CPU can go idle and stay this in this
|
|
* state. If we try to take the cpus_write_lock() here, we would have
|
|
* to wait for a few dozens of ms, as this function might schedule.
|
|
* However, we can, as a first step, flip the active mask and migrate
|
|
* anything currently on the run-queue, to give a chance to the paused
|
|
* CPUs to reach quickly an idle state. There's a risk meanwhile for
|
|
* another CPU to observe an out-of-date active_mask or to incompletely
|
|
* update a cpuset. Both problems would be resolved later in the slow
|
|
* path, which ensures active_mask synchronization, triggers a cpuset
|
|
* rebuild and migrate any task that would have escaped the lazy
|
|
* migration.
|
|
*/
|
|
for_each_cpu(cpu, cpus)
|
|
set_cpu_active(cpu, false);
|
|
err = __pause_drain_rq(cpus);
|
|
if (err) {
|
|
__wait_drain_rq(cpus);
|
|
for_each_cpu(cpu, cpus)
|
|
set_cpu_active(cpu, true);
|
|
goto err_cpu_maps_update;
|
|
}
|
|
|
|
prev_prio = pause_reduce_prio();
|
|
|
|
/*
|
|
* Slow path deactivation:
|
|
*
|
|
* Now that paused CPUs are most likely idle, we can go through a
|
|
* complete scheduler deactivation.
|
|
*
|
|
* The cpu_active_mask being already set and cpus_write_lock calling
|
|
* synchronize_rcu(), we know that all preempt-disabled and RCU users
|
|
* will observe the updated value.
|
|
*/
|
|
cpus_write_lock();
|
|
|
|
__wait_drain_rq(cpus);
|
|
|
|
cpuhp_tasks_frozen = 0;
|
|
|
|
if (sched_cpus_deactivate_nosync(cpus)) {
|
|
err = -EBUSY;
|
|
goto err_cpus_write_unlock;
|
|
}
|
|
|
|
err = __pause_drain_rq(cpus);
|
|
__wait_drain_rq(cpus);
|
|
if (err) {
|
|
for_each_cpu(cpu, cpus)
|
|
sched_cpu_activate(cpu);
|
|
goto err_cpus_write_unlock;
|
|
}
|
|
|
|
/*
|
|
* Even if living on the side of the regular HP path, pause is using
|
|
* one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the
|
|
* current state of the CPU.
|
|
*/
|
|
for_each_cpu(cpu, cpus) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
|
|
st->state = CPUHP_AP_ACTIVE - 1;
|
|
st->target = st->state;
|
|
}
|
|
|
|
err_cpus_write_unlock:
|
|
cpus_write_unlock();
|
|
pause_restore_prio(prev_prio);
|
|
err_cpu_maps_update:
|
|
cpu_maps_update_done();
|
|
|
|
trace_cpuhp_pause(cpus, start_time, 1);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(pause_cpus);
|
|
|
|
int resume_cpus(struct cpumask *cpus)
|
|
{
|
|
unsigned int cpu;
|
|
int err = 0;
|
|
u64 start_time = 0;
|
|
int prev_prio;
|
|
|
|
start_time = sched_clock();
|
|
|
|
cpu_maps_update_begin();
|
|
|
|
if (cpu_hotplug_disabled) {
|
|
err = -EBUSY;
|
|
goto err_cpu_maps_update;
|
|
}
|
|
|
|
/* Resuming an already active CPU isn't an error */
|
|
cpumask_andnot(cpus, cpus, cpu_active_mask);
|
|
|
|
for_each_cpu(cpu, cpus) {
|
|
if (!cpu_online(cpu)) {
|
|
err = -EBUSY;
|
|
goto err_cpu_maps_update;
|
|
}
|
|
}
|
|
|
|
if (cpumask_empty(cpus))
|
|
goto err_cpu_maps_update;
|
|
|
|
for_each_cpu(cpu, cpus)
|
|
set_cpu_active(cpu, true);
|
|
|
|
trace_android_rvh_resume_cpus(cpus, &err);
|
|
if (err)
|
|
goto err_cpu_maps_update;
|
|
|
|
prev_prio = pause_reduce_prio();
|
|
|
|
/* Lazy Resume. Build domains through schedule a workqueue on
|
|
* resuming cpu. This is so that the resuming cpu can work more
|
|
* early, and cannot add additional load to other busy cpu.
|
|
*/
|
|
cpuset_update_active_cpus_affine(cpumask_first(cpus));
|
|
|
|
cpus_write_lock();
|
|
|
|
cpuhp_tasks_frozen = 0;
|
|
|
|
if (sched_cpus_activate(cpus)) {
|
|
err = -EBUSY;
|
|
goto err_cpus_write_unlock;
|
|
}
|
|
|
|
/*
|
|
* see pause_cpus.
|
|
*/
|
|
for_each_cpu(cpu, cpus) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
|
|
st->state = CPUHP_ONLINE;
|
|
st->target = st->state;
|
|
}
|
|
|
|
err_cpus_write_unlock:
|
|
cpus_write_unlock();
|
|
pause_restore_prio(prev_prio);
|
|
err_cpu_maps_update:
|
|
cpu_maps_update_done();
|
|
|
|
trace_cpuhp_pause(cpus, start_time, 0);
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL_GPL(resume_cpus);
|
|
|
|
void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
|
|
{
|
|
unsigned int cpu;
|
|
int error;
|
|
|
|
cpu_maps_update_begin();
|
|
|
|
/*
|
|
* Make certain the cpu I'm about to reboot on is online.
|
|
*
|
|
* This is inline to what migrate_to_reboot_cpu() already do.
|
|
*/
|
|
if (!cpu_online(primary_cpu))
|
|
primary_cpu = cpumask_first(cpu_online_mask);
|
|
|
|
for_each_online_cpu(cpu) {
|
|
if (cpu == primary_cpu)
|
|
continue;
|
|
|
|
error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
|
|
if (error) {
|
|
pr_err("Failed to offline CPU%d - error=%d",
|
|
cpu, error);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Ensure all but the reboot CPU are offline.
|
|
*/
|
|
BUG_ON(num_online_cpus() > 1);
|
|
|
|
/*
|
|
* Make sure the CPUs won't be enabled by someone else after this
|
|
* point. Kexec will reboot to a new kernel shortly resetting
|
|
* everything along the way.
|
|
*/
|
|
cpu_hotplug_disabled++;
|
|
|
|
cpu_maps_update_done();
|
|
}
|
|
|
|
#else
|
|
#define takedown_cpu NULL
|
|
#endif /*CONFIG_HOTPLUG_CPU*/
|
|
|
|
/**
|
|
* notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
|
|
* @cpu: cpu that just started
|
|
*
|
|
* It must be called by the arch code on the new cpu, before the new cpu
|
|
* enables interrupts and before the "boot" cpu returns from __cpu_up().
|
|
*/
|
|
void notify_cpu_starting(unsigned int cpu)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
|
|
int ret;
|
|
|
|
rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
|
|
cpumask_set_cpu(cpu, &cpus_booted_once_mask);
|
|
while (st->state < target) {
|
|
st->state++;
|
|
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
|
|
/*
|
|
* STARTING must not fail!
|
|
*/
|
|
WARN_ON_ONCE(ret);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Called from the idle task. Wake up the controlling task which brings the
|
|
* hotplug thread of the upcoming CPU up and then delegates the rest of the
|
|
* online bringup to the hotplug thread.
|
|
*/
|
|
void cpuhp_online_idle(enum cpuhp_state state)
|
|
{
|
|
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
|
|
|
|
/* Happens for the boot cpu */
|
|
if (state != CPUHP_AP_ONLINE_IDLE)
|
|
return;
|
|
|
|
/*
|
|
* Unpart the stopper thread before we start the idle loop (and start
|
|
* scheduling); this ensures the stopper task is always available.
|
|
*/
|
|
stop_machine_unpark(smp_processor_id());
|
|
|
|
st->state = CPUHP_AP_ONLINE_IDLE;
|
|
complete_ap_thread(st, true);
|
|
}
|
|
|
|
static int switch_to_rt_policy(void)
|
|
{
|
|
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|
|
unsigned int policy = current->policy;
|
|
|
|
if (policy == SCHED_NORMAL)
|
|
/* Switch to SCHED_FIFO from SCHED_NORMAL. */
|
|
return sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
static int switch_to_fair_policy(void)
|
|
{
|
|
struct sched_param param = { .sched_priority = 0 };
|
|
|
|
return sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
|
}
|
|
|
|
/* Requires cpu_add_remove_lock to be held */
|
|
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
struct task_struct *idle;
|
|
int ret = 0;
|
|
|
|
cpus_write_lock();
|
|
|
|
if (!cpu_present(cpu)) {
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* The caller of cpu_up() might have raced with another
|
|
* caller. Nothing to do.
|
|
*/
|
|
if (st->state >= target)
|
|
goto out;
|
|
|
|
if (st->state == CPUHP_OFFLINE) {
|
|
/* Let it fail before we try to bring the cpu up */
|
|
idle = idle_thread_get(cpu);
|
|
if (IS_ERR(idle)) {
|
|
ret = PTR_ERR(idle);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
cpuhp_tasks_frozen = tasks_frozen;
|
|
|
|
cpuhp_set_state(st, target);
|
|
/*
|
|
* If the current CPU state is in the range of the AP hotplug thread,
|
|
* then we need to kick the thread once more.
|
|
*/
|
|
if (st->state > CPUHP_BRINGUP_CPU) {
|
|
ret = cpuhp_kick_ap_work(cpu);
|
|
/*
|
|
* The AP side has done the error rollback already. Just
|
|
* return the error code..
|
|
*/
|
|
if (ret)
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Try to reach the target state. We max out on the BP at
|
|
* CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
|
|
* responsible for bringing it up to the target state.
|
|
*/
|
|
target = min((int)target, CPUHP_BRINGUP_CPU);
|
|
ret = cpuhp_up_callbacks(cpu, st, target);
|
|
out:
|
|
cpus_write_unlock();
|
|
arch_smt_update();
|
|
cpu_up_down_serialize_trainwrecks(tasks_frozen);
|
|
return ret;
|
|
}
|
|
|
|
static int cpu_up(unsigned int cpu, enum cpuhp_state target)
|
|
{
|
|
int err = 0;
|
|
int switch_err;
|
|
|
|
if (!cpu_possible(cpu)) {
|
|
pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
|
|
cpu);
|
|
#if defined(CONFIG_IA64)
|
|
pr_err("please check additional_cpus= boot parameter\n");
|
|
#endif
|
|
return -EINVAL;
|
|
}
|
|
|
|
trace_android_vh_cpu_up(cpu);
|
|
|
|
/*
|
|
* CPU hotplug operations consists of many steps and each step
|
|
* calls a callback of core kernel subsystem. CPU hotplug-in
|
|
* operation may get preempted by other CFS tasks and whole
|
|
* operation of cpu hotplug in CPU gets delayed. Switch the
|
|
* current task to SCHED_FIFO from SCHED_NORMAL, so that
|
|
* hotplug in operation may complete quickly in heavy loaded
|
|
* conditions and new CPU will start handle the workload.
|
|
*/
|
|
|
|
switch_err = switch_to_rt_policy();
|
|
|
|
err = try_online_node(cpu_to_node(cpu));
|
|
if (err)
|
|
goto switch_out;
|
|
|
|
cpu_maps_update_begin();
|
|
|
|
if (cpu_hotplug_disabled) {
|
|
err = -EBUSY;
|
|
goto out;
|
|
}
|
|
if (!cpu_smt_allowed(cpu)) {
|
|
err = -EPERM;
|
|
goto out;
|
|
}
|
|
|
|
err = _cpu_up(cpu, 0, target);
|
|
out:
|
|
cpu_maps_update_done();
|
|
switch_out:
|
|
if (!switch_err) {
|
|
switch_err = switch_to_fair_policy();
|
|
if (switch_err)
|
|
pr_err("Hotplug policy switch err=%d Task %s pid=%d\n",
|
|
switch_err, current->comm, current->pid);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* cpu_device_up - Bring up a cpu device
|
|
* @dev: Pointer to the cpu device to online
|
|
*
|
|
* This function is meant to be used by device core cpu subsystem only.
|
|
*
|
|
* Other subsystems should use add_cpu() instead.
|
|
*/
|
|
int cpu_device_up(struct device *dev)
|
|
{
|
|
return cpu_up(dev->id, CPUHP_ONLINE);
|
|
}
|
|
|
|
int add_cpu(unsigned int cpu)
|
|
{
|
|
int ret;
|
|
|
|
lock_device_hotplug();
|
|
ret = device_online(get_cpu_device(cpu));
|
|
unlock_device_hotplug();
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(add_cpu);
|
|
|
|
/**
|
|
* bringup_hibernate_cpu - Bring up the CPU that we hibernated on
|
|
* @sleep_cpu: The cpu we hibernated on and should be brought up.
|
|
*
|
|
* On some architectures like arm64, we can hibernate on any CPU, but on
|
|
* wake up the CPU we hibernated on might be offline as a side effect of
|
|
* using maxcpus= for example.
|
|
*/
|
|
int bringup_hibernate_cpu(unsigned int sleep_cpu)
|
|
{
|
|
int ret;
|
|
|
|
if (!cpu_online(sleep_cpu)) {
|
|
pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
|
|
ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
|
|
if (ret) {
|
|
pr_err("Failed to bring hibernate-CPU up!\n");
|
|
return ret;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void bringup_nonboot_cpus(unsigned int setup_max_cpus)
|
|
{
|
|
unsigned int cpu;
|
|
|
|
for_each_present_cpu(cpu) {
|
|
if (num_online_cpus() >= setup_max_cpus)
|
|
break;
|
|
if (!cpu_online(cpu))
|
|
cpu_up(cpu, CPUHP_ONLINE);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_PM_SLEEP_SMP
|
|
static cpumask_var_t frozen_cpus;
|
|
|
|
int freeze_secondary_cpus(int primary)
|
|
{
|
|
int cpu, error = 0;
|
|
|
|
cpu_maps_update_begin();
|
|
if (primary == -1) {
|
|
primary = cpumask_first(cpu_online_mask);
|
|
if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
|
|
primary = housekeeping_any_cpu(HK_FLAG_TIMER);
|
|
} else {
|
|
if (!cpu_online(primary))
|
|
primary = cpumask_first(cpu_online_mask);
|
|
}
|
|
|
|
/*
|
|
* We take down all of the non-boot CPUs in one shot to avoid races
|
|
* with the userspace trying to use the CPU hotplug at the same time
|
|
*/
|
|
cpumask_clear(frozen_cpus);
|
|
|
|
pr_info("Disabling non-boot CPUs ...\n");
|
|
for_each_online_cpu(cpu) {
|
|
if (cpu == primary)
|
|
continue;
|
|
|
|
if (pm_wakeup_pending()) {
|
|
pr_info("Wakeup pending. Abort CPU freeze\n");
|
|
error = -EBUSY;
|
|
break;
|
|
}
|
|
|
|
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
|
|
error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
|
|
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
|
|
if (!error)
|
|
cpumask_set_cpu(cpu, frozen_cpus);
|
|
else {
|
|
pr_err("Error taking CPU%d down: %d\n", cpu, error);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!error)
|
|
BUG_ON(num_online_cpus() > 1);
|
|
else
|
|
pr_err("Non-boot CPUs are not disabled\n");
|
|
|
|
/*
|
|
* Make sure the CPUs won't be enabled by someone else. We need to do
|
|
* this even in case of failure as all freeze_secondary_cpus() users are
|
|
* supposed to do thaw_secondary_cpus() on the failure path.
|
|
*/
|
|
cpu_hotplug_disabled++;
|
|
|
|
cpu_maps_update_done();
|
|
return error;
|
|
}
|
|
|
|
void __weak arch_thaw_secondary_cpus_begin(void)
|
|
{
|
|
}
|
|
|
|
void __weak arch_thaw_secondary_cpus_end(void)
|
|
{
|
|
}
|
|
|
|
void thaw_secondary_cpus(void)
|
|
{
|
|
int cpu, error;
|
|
struct device *cpu_device;
|
|
|
|
/* Allow everyone to use the CPU hotplug again */
|
|
cpu_maps_update_begin();
|
|
__cpu_hotplug_enable();
|
|
if (cpumask_empty(frozen_cpus))
|
|
goto out;
|
|
|
|
pr_info("Enabling non-boot CPUs ...\n");
|
|
|
|
arch_thaw_secondary_cpus_begin();
|
|
|
|
for_each_cpu(cpu, frozen_cpus) {
|
|
trace_suspend_resume(TPS("CPU_ON"), cpu, true);
|
|
error = _cpu_up(cpu, 1, CPUHP_ONLINE);
|
|
trace_suspend_resume(TPS("CPU_ON"), cpu, false);
|
|
if (!error) {
|
|
pr_info("CPU%d is up\n", cpu);
|
|
cpu_device = get_cpu_device(cpu);
|
|
if (!cpu_device)
|
|
pr_err("%s: failed to get cpu%d device\n",
|
|
__func__, cpu);
|
|
else
|
|
kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE);
|
|
continue;
|
|
}
|
|
pr_warn("Error taking CPU%d up: %d\n", cpu, error);
|
|
}
|
|
|
|
arch_thaw_secondary_cpus_end();
|
|
|
|
cpumask_clear(frozen_cpus);
|
|
out:
|
|
cpu_maps_update_done();
|
|
}
|
|
|
|
static int __init alloc_frozen_cpus(void)
|
|
{
|
|
if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
core_initcall(alloc_frozen_cpus);
|
|
|
|
/*
|
|
* When callbacks for CPU hotplug notifications are being executed, we must
|
|
* ensure that the state of the system with respect to the tasks being frozen
|
|
* or not, as reported by the notification, remains unchanged *throughout the
|
|
* duration* of the execution of the callbacks.
|
|
* Hence we need to prevent the freezer from racing with regular CPU hotplug.
|
|
*
|
|
* This synchronization is implemented by mutually excluding regular CPU
|
|
* hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
|
|
* Hibernate notifications.
|
|
*/
|
|
static int
|
|
cpu_hotplug_pm_callback(struct notifier_block *nb,
|
|
unsigned long action, void *ptr)
|
|
{
|
|
switch (action) {
|
|
|
|
case PM_SUSPEND_PREPARE:
|
|
case PM_HIBERNATION_PREPARE:
|
|
cpu_hotplug_disable();
|
|
break;
|
|
|
|
case PM_POST_SUSPEND:
|
|
case PM_POST_HIBERNATION:
|
|
cpu_hotplug_enable();
|
|
break;
|
|
|
|
default:
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
|
|
static int __init cpu_hotplug_pm_sync_init(void)
|
|
{
|
|
/*
|
|
* cpu_hotplug_pm_callback has higher priority than x86
|
|
* bsp_pm_callback which depends on cpu_hotplug_pm_callback
|
|
* to disable cpu hotplug to avoid cpu hotplug race.
|
|
*/
|
|
pm_notifier(cpu_hotplug_pm_callback, 0);
|
|
return 0;
|
|
}
|
|
core_initcall(cpu_hotplug_pm_sync_init);
|
|
|
|
#endif /* CONFIG_PM_SLEEP_SMP */
|
|
|
|
int __boot_cpu_id;
|
|
|
|
/* Horrific hacks because we can't add more to cpuhp_hp_states. */
|
|
static int random_and_perf_prepare_fusion(unsigned int cpu)
|
|
{
|
|
#ifdef CONFIG_PERF_EVENTS
|
|
perf_event_init_cpu(cpu);
|
|
#endif
|
|
random_prepare_cpu(cpu);
|
|
return 0;
|
|
}
|
|
static int random_and_workqueue_online_fusion(unsigned int cpu)
|
|
{
|
|
workqueue_online_cpu(cpu);
|
|
random_online_cpu(cpu);
|
|
return 0;
|
|
}
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
/* Boot processor state steps */
|
|
static struct cpuhp_step cpuhp_hp_states[] = {
|
|
[CPUHP_OFFLINE] = {
|
|
.name = "offline",
|
|
.startup.single = NULL,
|
|
.teardown.single = NULL,
|
|
},
|
|
#ifdef CONFIG_SMP
|
|
[CPUHP_CREATE_THREADS]= {
|
|
.name = "threads:prepare",
|
|
.startup.single = smpboot_create_threads,
|
|
.teardown.single = NULL,
|
|
.cant_stop = true,
|
|
},
|
|
[CPUHP_PERF_PREPARE] = {
|
|
.name = "perf:prepare",
|
|
.startup.single = random_and_perf_prepare_fusion,
|
|
.teardown.single = perf_event_exit_cpu,
|
|
},
|
|
[CPUHP_WORKQUEUE_PREP] = {
|
|
.name = "workqueue:prepare",
|
|
.startup.single = workqueue_prepare_cpu,
|
|
.teardown.single = NULL,
|
|
},
|
|
[CPUHP_HRTIMERS_PREPARE] = {
|
|
.name = "hrtimers:prepare",
|
|
.startup.single = hrtimers_prepare_cpu,
|
|
.teardown.single = hrtimers_dead_cpu,
|
|
},
|
|
[CPUHP_SMPCFD_PREPARE] = {
|
|
.name = "smpcfd:prepare",
|
|
.startup.single = smpcfd_prepare_cpu,
|
|
.teardown.single = smpcfd_dead_cpu,
|
|
},
|
|
[CPUHP_RELAY_PREPARE] = {
|
|
.name = "relay:prepare",
|
|
.startup.single = relay_prepare_cpu,
|
|
.teardown.single = NULL,
|
|
},
|
|
[CPUHP_SLAB_PREPARE] = {
|
|
.name = "slab:prepare",
|
|
.startup.single = slab_prepare_cpu,
|
|
.teardown.single = slab_dead_cpu,
|
|
},
|
|
[CPUHP_RCUTREE_PREP] = {
|
|
.name = "RCU/tree:prepare",
|
|
.startup.single = rcutree_prepare_cpu,
|
|
.teardown.single = rcutree_dead_cpu,
|
|
},
|
|
/*
|
|
* On the tear-down path, timers_dead_cpu() must be invoked
|
|
* before blk_mq_queue_reinit_notify() from notify_dead(),
|
|
* otherwise a RCU stall occurs.
|
|
*/
|
|
[CPUHP_TIMERS_PREPARE] = {
|
|
.name = "timers:prepare",
|
|
.startup.single = timers_prepare_cpu,
|
|
.teardown.single = timers_dead_cpu,
|
|
},
|
|
/* Kicks the plugged cpu into life */
|
|
[CPUHP_BRINGUP_CPU] = {
|
|
.name = "cpu:bringup",
|
|
.startup.single = bringup_cpu,
|
|
.teardown.single = finish_cpu,
|
|
.cant_stop = true,
|
|
},
|
|
/* Final state before CPU kills itself */
|
|
[CPUHP_AP_IDLE_DEAD] = {
|
|
.name = "idle:dead",
|
|
},
|
|
/*
|
|
* Last state before CPU enters the idle loop to die. Transient state
|
|
* for synchronization.
|
|
*/
|
|
[CPUHP_AP_OFFLINE] = {
|
|
.name = "ap:offline",
|
|
.cant_stop = true,
|
|
},
|
|
/* First state is scheduler control. Interrupts are disabled */
|
|
[CPUHP_AP_SCHED_STARTING] = {
|
|
.name = "sched:starting",
|
|
.startup.single = sched_cpu_starting,
|
|
.teardown.single = sched_cpu_dying,
|
|
},
|
|
[CPUHP_AP_RCUTREE_DYING] = {
|
|
.name = "RCU/tree:dying",
|
|
.startup.single = NULL,
|
|
.teardown.single = rcutree_dying_cpu,
|
|
},
|
|
[CPUHP_AP_SMPCFD_DYING] = {
|
|
.name = "smpcfd:dying",
|
|
.startup.single = NULL,
|
|
.teardown.single = smpcfd_dying_cpu,
|
|
},
|
|
/* Entry state on starting. Interrupts enabled from here on. Transient
|
|
* state for synchronsization */
|
|
[CPUHP_AP_ONLINE] = {
|
|
.name = "ap:online",
|
|
},
|
|
/*
|
|
* Handled on controll processor until the plugged processor manages
|
|
* this itself.
|
|
*/
|
|
[CPUHP_TEARDOWN_CPU] = {
|
|
.name = "cpu:teardown",
|
|
.startup.single = NULL,
|
|
.teardown.single = takedown_cpu,
|
|
.cant_stop = true,
|
|
},
|
|
/* Handle smpboot threads park/unpark */
|
|
[CPUHP_AP_SMPBOOT_THREADS] = {
|
|
.name = "smpboot/threads:online",
|
|
.startup.single = smpboot_unpark_threads,
|
|
.teardown.single = smpboot_park_threads,
|
|
},
|
|
[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
|
|
.name = "irq/affinity:online",
|
|
.startup.single = irq_affinity_online_cpu,
|
|
.teardown.single = NULL,
|
|
},
|
|
[CPUHP_AP_PERF_ONLINE] = {
|
|
.name = "perf:online",
|
|
.startup.single = perf_event_init_cpu,
|
|
.teardown.single = perf_event_exit_cpu,
|
|
},
|
|
[CPUHP_AP_WATCHDOG_ONLINE] = {
|
|
.name = "lockup_detector:online",
|
|
.startup.single = lockup_detector_online_cpu,
|
|
.teardown.single = lockup_detector_offline_cpu,
|
|
},
|
|
[CPUHP_AP_WORKQUEUE_ONLINE] = {
|
|
.name = "workqueue:online",
|
|
.startup.single = random_and_workqueue_online_fusion,
|
|
.teardown.single = workqueue_offline_cpu,
|
|
},
|
|
[CPUHP_AP_RCUTREE_ONLINE] = {
|
|
.name = "RCU/tree:online",
|
|
.startup.single = rcutree_online_cpu,
|
|
.teardown.single = rcutree_offline_cpu,
|
|
},
|
|
#endif
|
|
/*
|
|
* The dynamically registered state space is here
|
|
*/
|
|
|
|
#ifdef CONFIG_SMP
|
|
/* Last state is scheduler control setting the cpu active */
|
|
[CPUHP_AP_ACTIVE] = {
|
|
.name = "sched:active",
|
|
.startup.single = sched_cpu_activate,
|
|
.teardown.single = sched_cpu_deactivate,
|
|
},
|
|
#endif
|
|
|
|
/* CPU is fully up and running. */
|
|
[CPUHP_ONLINE] = {
|
|
.name = "online",
|
|
.startup.single = NULL,
|
|
.teardown.single = NULL,
|
|
},
|
|
};
|
|
|
|
/* Sanity check for callbacks */
|
|
static int cpuhp_cb_check(enum cpuhp_state state)
|
|
{
|
|
if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Returns a free for dynamic slot assignment of the Online state. The states
|
|
* are protected by the cpuhp_slot_states mutex and an empty slot is identified
|
|
* by having no name assigned.
|
|
*/
|
|
static int cpuhp_reserve_state(enum cpuhp_state state)
|
|
{
|
|
enum cpuhp_state i, end;
|
|
struct cpuhp_step *step;
|
|
|
|
switch (state) {
|
|
case CPUHP_AP_ONLINE_DYN:
|
|
step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
|
|
end = CPUHP_AP_ONLINE_DYN_END;
|
|
break;
|
|
case CPUHP_BP_PREPARE_DYN:
|
|
step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
|
|
end = CPUHP_BP_PREPARE_DYN_END;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (i = state; i <= end; i++, step++) {
|
|
if (!step->name)
|
|
return i;
|
|
}
|
|
WARN(1, "No more dynamic states available for CPU hotplug\n");
|
|
return -ENOSPC;
|
|
}
|
|
|
|
static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
|
|
int (*startup)(unsigned int cpu),
|
|
int (*teardown)(unsigned int cpu),
|
|
bool multi_instance)
|
|
{
|
|
/* (Un)Install the callbacks for further cpu hotplug operations */
|
|
struct cpuhp_step *sp;
|
|
int ret = 0;
|
|
|
|
/*
|
|
* If name is NULL, then the state gets removed.
|
|
*
|
|
* CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
|
|
* the first allocation from these dynamic ranges, so the removal
|
|
* would trigger a new allocation and clear the wrong (already
|
|
* empty) state, leaving the callbacks of the to be cleared state
|
|
* dangling, which causes wreckage on the next hotplug operation.
|
|
*/
|
|
if (name && (state == CPUHP_AP_ONLINE_DYN ||
|
|
state == CPUHP_BP_PREPARE_DYN)) {
|
|
ret = cpuhp_reserve_state(state);
|
|
if (ret < 0)
|
|
return ret;
|
|
state = ret;
|
|
}
|
|
sp = cpuhp_get_step(state);
|
|
if (name && sp->name)
|
|
return -EBUSY;
|
|
|
|
sp->startup.single = startup;
|
|
sp->teardown.single = teardown;
|
|
sp->name = name;
|
|
sp->multi_instance = multi_instance;
|
|
INIT_HLIST_HEAD(&sp->list);
|
|
return ret;
|
|
}
|
|
|
|
static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
|
|
{
|
|
return cpuhp_get_step(state)->teardown.single;
|
|
}
|
|
|
|
/*
|
|
* Call the startup/teardown function for a step either on the AP or
|
|
* on the current CPU.
|
|
*/
|
|
static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
|
|
struct hlist_node *node)
|
|
{
|
|
struct cpuhp_step *sp = cpuhp_get_step(state);
|
|
int ret;
|
|
|
|
/*
|
|
* If there's nothing to do, we done.
|
|
* Relies on the union for multi_instance.
|
|
*/
|
|
if ((bringup && !sp->startup.single) ||
|
|
(!bringup && !sp->teardown.single))
|
|
return 0;
|
|
/*
|
|
* The non AP bound callbacks can fail on bringup. On teardown
|
|
* e.g. module removal we crash for now.
|
|
*/
|
|
#ifdef CONFIG_SMP
|
|
if (cpuhp_is_ap_state(state))
|
|
ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
|
|
else
|
|
ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
|
|
#else
|
|
ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
|
|
#endif
|
|
BUG_ON(ret && !bringup);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Called from __cpuhp_setup_state on a recoverable failure.
|
|
*
|
|
* Note: The teardown callbacks for rollback are not allowed to fail!
|
|
*/
|
|
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
|
|
struct hlist_node *node)
|
|
{
|
|
int cpu;
|
|
|
|
/* Roll back the already executed steps on the other cpus */
|
|
for_each_present_cpu(cpu) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int cpustate = st->state;
|
|
|
|
if (cpu >= failedcpu)
|
|
break;
|
|
|
|
/* Did we invoke the startup call on that cpu ? */
|
|
if (cpustate >= state)
|
|
cpuhp_issue_call(cpu, state, false, node);
|
|
}
|
|
}
|
|
|
|
int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
|
|
struct hlist_node *node,
|
|
bool invoke)
|
|
{
|
|
struct cpuhp_step *sp;
|
|
int cpu;
|
|
int ret;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
sp = cpuhp_get_step(state);
|
|
if (sp->multi_instance == false)
|
|
return -EINVAL;
|
|
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
|
|
if (!invoke || !sp->startup.multi)
|
|
goto add_node;
|
|
|
|
/*
|
|
* Try to call the startup callback for each present cpu
|
|
* depending on the hotplug state of the cpu.
|
|
*/
|
|
for_each_present_cpu(cpu) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int cpustate = st->state;
|
|
|
|
if (cpustate < state)
|
|
continue;
|
|
|
|
ret = cpuhp_issue_call(cpu, state, true, node);
|
|
if (ret) {
|
|
if (sp->teardown.multi)
|
|
cpuhp_rollback_install(cpu, state, node);
|
|
goto unlock;
|
|
}
|
|
}
|
|
add_node:
|
|
ret = 0;
|
|
hlist_add_head(node, &sp->list);
|
|
unlock:
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
return ret;
|
|
}
|
|
|
|
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
|
|
bool invoke)
|
|
{
|
|
int ret;
|
|
|
|
cpus_read_lock();
|
|
ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
|
|
cpus_read_unlock();
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
|
|
|
|
/**
|
|
* __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
|
|
* @state: The state to setup
|
|
* @invoke: If true, the startup function is invoked for cpus where
|
|
* cpu state >= @state
|
|
* @startup: startup callback function
|
|
* @teardown: teardown callback function
|
|
* @multi_instance: State is set up for multiple instances which get
|
|
* added afterwards.
|
|
*
|
|
* The caller needs to hold cpus read locked while calling this function.
|
|
* Returns:
|
|
* On success:
|
|
* Positive state number if @state is CPUHP_AP_ONLINE_DYN
|
|
* 0 for all other states
|
|
* On failure: proper (negative) error code
|
|
*/
|
|
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
|
|
const char *name, bool invoke,
|
|
int (*startup)(unsigned int cpu),
|
|
int (*teardown)(unsigned int cpu),
|
|
bool multi_instance)
|
|
{
|
|
int cpu, ret = 0;
|
|
bool dynstate;
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
if (cpuhp_cb_check(state) || !name)
|
|
return -EINVAL;
|
|
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
|
|
ret = cpuhp_store_callbacks(state, name, startup, teardown,
|
|
multi_instance);
|
|
|
|
dynstate = state == CPUHP_AP_ONLINE_DYN;
|
|
if (ret > 0 && dynstate) {
|
|
state = ret;
|
|
ret = 0;
|
|
}
|
|
|
|
if (ret || !invoke || !startup)
|
|
goto out;
|
|
|
|
/*
|
|
* Try to call the startup callback for each present cpu
|
|
* depending on the hotplug state of the cpu.
|
|
*/
|
|
for_each_present_cpu(cpu) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int cpustate = st->state;
|
|
|
|
if (cpustate < state)
|
|
continue;
|
|
|
|
ret = cpuhp_issue_call(cpu, state, true, NULL);
|
|
if (ret) {
|
|
if (teardown)
|
|
cpuhp_rollback_install(cpu, state, NULL);
|
|
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
/*
|
|
* If the requested state is CPUHP_AP_ONLINE_DYN, return the
|
|
* dynamically allocated state in case of success.
|
|
*/
|
|
if (!ret && dynstate)
|
|
return state;
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
|
|
|
|
int __cpuhp_setup_state(enum cpuhp_state state,
|
|
const char *name, bool invoke,
|
|
int (*startup)(unsigned int cpu),
|
|
int (*teardown)(unsigned int cpu),
|
|
bool multi_instance)
|
|
{
|
|
int ret;
|
|
|
|
cpus_read_lock();
|
|
ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
|
|
teardown, multi_instance);
|
|
cpus_read_unlock();
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(__cpuhp_setup_state);
|
|
|
|
int __cpuhp_state_remove_instance(enum cpuhp_state state,
|
|
struct hlist_node *node, bool invoke)
|
|
{
|
|
struct cpuhp_step *sp = cpuhp_get_step(state);
|
|
int cpu;
|
|
|
|
BUG_ON(cpuhp_cb_check(state));
|
|
|
|
if (!sp->multi_instance)
|
|
return -EINVAL;
|
|
|
|
cpus_read_lock();
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
|
|
if (!invoke || !cpuhp_get_teardown_cb(state))
|
|
goto remove;
|
|
/*
|
|
* Call the teardown callback for each present cpu depending
|
|
* on the hotplug state of the cpu. This function is not
|
|
* allowed to fail currently!
|
|
*/
|
|
for_each_present_cpu(cpu) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int cpustate = st->state;
|
|
|
|
if (cpustate >= state)
|
|
cpuhp_issue_call(cpu, state, false, node);
|
|
}
|
|
|
|
remove:
|
|
hlist_del(node);
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
cpus_read_unlock();
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
|
|
|
|
/**
|
|
* __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
|
|
* @state: The state to remove
|
|
* @invoke: If true, the teardown function is invoked for cpus where
|
|
* cpu state >= @state
|
|
*
|
|
* The caller needs to hold cpus read locked while calling this function.
|
|
* The teardown callback is currently not allowed to fail. Think
|
|
* about module removal!
|
|
*/
|
|
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
|
|
{
|
|
struct cpuhp_step *sp = cpuhp_get_step(state);
|
|
int cpu;
|
|
|
|
BUG_ON(cpuhp_cb_check(state));
|
|
|
|
lockdep_assert_cpus_held();
|
|
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
if (sp->multi_instance) {
|
|
WARN(!hlist_empty(&sp->list),
|
|
"Error: Removing state %d which has instances left.\n",
|
|
state);
|
|
goto remove;
|
|
}
|
|
|
|
if (!invoke || !cpuhp_get_teardown_cb(state))
|
|
goto remove;
|
|
|
|
/*
|
|
* Call the teardown callback for each present cpu depending
|
|
* on the hotplug state of the cpu. This function is not
|
|
* allowed to fail currently!
|
|
*/
|
|
for_each_present_cpu(cpu) {
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
|
|
int cpustate = st->state;
|
|
|
|
if (cpustate >= state)
|
|
cpuhp_issue_call(cpu, state, false, NULL);
|
|
}
|
|
remove:
|
|
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
}
|
|
EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
|
|
|
|
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
|
|
{
|
|
cpus_read_lock();
|
|
__cpuhp_remove_state_cpuslocked(state, invoke);
|
|
cpus_read_unlock();
|
|
}
|
|
EXPORT_SYMBOL(__cpuhp_remove_state);
|
|
|
|
#ifdef CONFIG_HOTPLUG_SMT
|
|
static void cpuhp_offline_cpu_device(unsigned int cpu)
|
|
{
|
|
struct device *dev = get_cpu_device(cpu);
|
|
|
|
dev->offline = true;
|
|
/* Tell user space about the state change */
|
|
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
|
|
}
|
|
|
|
static void cpuhp_online_cpu_device(unsigned int cpu)
|
|
{
|
|
struct device *dev = get_cpu_device(cpu);
|
|
|
|
dev->offline = false;
|
|
/* Tell user space about the state change */
|
|
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
|
|
}
|
|
|
|
int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
|
|
{
|
|
int cpu, ret = 0;
|
|
|
|
cpu_maps_update_begin();
|
|
for_each_online_cpu(cpu) {
|
|
if (topology_is_primary_thread(cpu))
|
|
continue;
|
|
ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
|
|
if (ret)
|
|
break;
|
|
/*
|
|
* As this needs to hold the cpu maps lock it's impossible
|
|
* to call device_offline() because that ends up calling
|
|
* cpu_down() which takes cpu maps lock. cpu maps lock
|
|
* needs to be held as this might race against in kernel
|
|
* abusers of the hotplug machinery (thermal management).
|
|
*
|
|
* So nothing would update device:offline state. That would
|
|
* leave the sysfs entry stale and prevent onlining after
|
|
* smt control has been changed to 'off' again. This is
|
|
* called under the sysfs hotplug lock, so it is properly
|
|
* serialized against the regular offline usage.
|
|
*/
|
|
cpuhp_offline_cpu_device(cpu);
|
|
}
|
|
if (!ret)
|
|
cpu_smt_control = ctrlval;
|
|
cpu_maps_update_done();
|
|
return ret;
|
|
}
|
|
|
|
int cpuhp_smt_enable(void)
|
|
{
|
|
int cpu, ret = 0;
|
|
|
|
cpu_maps_update_begin();
|
|
cpu_smt_control = CPU_SMT_ENABLED;
|
|
for_each_present_cpu(cpu) {
|
|
/* Skip online CPUs and CPUs on offline nodes */
|
|
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
|
|
continue;
|
|
ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
|
|
if (ret)
|
|
break;
|
|
/* See comment in cpuhp_smt_disable() */
|
|
cpuhp_online_cpu_device(cpu);
|
|
}
|
|
cpu_maps_update_done();
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
|
|
static ssize_t show_cpuhp_state(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
|
|
|
|
return sprintf(buf, "%d\n", st->state);
|
|
}
|
|
static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
|
|
|
|
static ssize_t write_cpuhp_target(struct device *dev,
|
|
struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
|
|
struct cpuhp_step *sp;
|
|
int target, ret;
|
|
|
|
ret = kstrtoint(buf, 10, &target);
|
|
if (ret)
|
|
return ret;
|
|
|
|
#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
|
|
if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
|
|
return -EINVAL;
|
|
#else
|
|
if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
|
|
return -EINVAL;
|
|
#endif
|
|
|
|
ret = lock_device_hotplug_sysfs();
|
|
if (ret)
|
|
return ret;
|
|
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
sp = cpuhp_get_step(target);
|
|
ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
if (ret)
|
|
goto out;
|
|
|
|
if (st->state < target)
|
|
ret = cpu_up(dev->id, target);
|
|
else if (st->state > target)
|
|
ret = cpu_down(dev->id, target);
|
|
else if (WARN_ON(st->target != target))
|
|
st->target = target;
|
|
out:
|
|
unlock_device_hotplug();
|
|
return ret ? ret : count;
|
|
}
|
|
|
|
static ssize_t show_cpuhp_target(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
|
|
|
|
return sprintf(buf, "%d\n", st->target);
|
|
}
|
|
static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
|
|
|
|
|
|
static ssize_t write_cpuhp_fail(struct device *dev,
|
|
struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
|
|
struct cpuhp_step *sp;
|
|
int fail, ret;
|
|
|
|
ret = kstrtoint(buf, 10, &fail);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Cannot fail STARTING/DYING callbacks.
|
|
*/
|
|
if (cpuhp_is_atomic_state(fail))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Cannot fail anything that doesn't have callbacks.
|
|
*/
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
sp = cpuhp_get_step(fail);
|
|
if (!sp->startup.single && !sp->teardown.single)
|
|
ret = -EINVAL;
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
if (ret)
|
|
return ret;
|
|
|
|
st->fail = fail;
|
|
|
|
return count;
|
|
}
|
|
|
|
static ssize_t show_cpuhp_fail(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
|
|
|
|
return sprintf(buf, "%d\n", st->fail);
|
|
}
|
|
|
|
static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
|
|
|
|
static struct attribute *cpuhp_cpu_attrs[] = {
|
|
&dev_attr_state.attr,
|
|
&dev_attr_target.attr,
|
|
&dev_attr_fail.attr,
|
|
NULL
|
|
};
|
|
|
|
static const struct attribute_group cpuhp_cpu_attr_group = {
|
|
.attrs = cpuhp_cpu_attrs,
|
|
.name = "hotplug",
|
|
NULL
|
|
};
|
|
|
|
static ssize_t show_cpuhp_states(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
ssize_t cur, res = 0;
|
|
int i;
|
|
|
|
mutex_lock(&cpuhp_state_mutex);
|
|
for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
|
|
struct cpuhp_step *sp = cpuhp_get_step(i);
|
|
|
|
if (sp->name) {
|
|
cur = sprintf(buf, "%3d: %s\n", i, sp->name);
|
|
buf += cur;
|
|
res += cur;
|
|
}
|
|
}
|
|
mutex_unlock(&cpuhp_state_mutex);
|
|
return res;
|
|
}
|
|
static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
|
|
|
|
static struct attribute *cpuhp_cpu_root_attrs[] = {
|
|
&dev_attr_states.attr,
|
|
NULL
|
|
};
|
|
|
|
static const struct attribute_group cpuhp_cpu_root_attr_group = {
|
|
.attrs = cpuhp_cpu_root_attrs,
|
|
.name = "hotplug",
|
|
NULL
|
|
};
|
|
|
|
#ifdef CONFIG_HOTPLUG_SMT
|
|
|
|
static ssize_t
|
|
__store_smt_control(struct device *dev, struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
int ctrlval, ret;
|
|
|
|
if (sysfs_streq(buf, "on"))
|
|
ctrlval = CPU_SMT_ENABLED;
|
|
else if (sysfs_streq(buf, "off"))
|
|
ctrlval = CPU_SMT_DISABLED;
|
|
else if (sysfs_streq(buf, "forceoff"))
|
|
ctrlval = CPU_SMT_FORCE_DISABLED;
|
|
else
|
|
return -EINVAL;
|
|
|
|
if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
|
|
return -EPERM;
|
|
|
|
if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
|
|
return -ENODEV;
|
|
|
|
ret = lock_device_hotplug_sysfs();
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (ctrlval != cpu_smt_control) {
|
|
switch (ctrlval) {
|
|
case CPU_SMT_ENABLED:
|
|
ret = cpuhp_smt_enable();
|
|
break;
|
|
case CPU_SMT_DISABLED:
|
|
case CPU_SMT_FORCE_DISABLED:
|
|
ret = cpuhp_smt_disable(ctrlval);
|
|
break;
|
|
}
|
|
}
|
|
|
|
unlock_device_hotplug();
|
|
return ret ? ret : count;
|
|
}
|
|
|
|
#else /* !CONFIG_HOTPLUG_SMT */
|
|
static ssize_t
|
|
__store_smt_control(struct device *dev, struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
#endif /* CONFIG_HOTPLUG_SMT */
|
|
|
|
static const char *smt_states[] = {
|
|
[CPU_SMT_ENABLED] = "on",
|
|
[CPU_SMT_DISABLED] = "off",
|
|
[CPU_SMT_FORCE_DISABLED] = "forceoff",
|
|
[CPU_SMT_NOT_SUPPORTED] = "notsupported",
|
|
[CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
|
|
};
|
|
|
|
static ssize_t
|
|
show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
|
|
{
|
|
const char *state = smt_states[cpu_smt_control];
|
|
|
|
return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
|
|
}
|
|
|
|
static ssize_t
|
|
store_smt_control(struct device *dev, struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
return __store_smt_control(dev, attr, buf, count);
|
|
}
|
|
static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
|
|
|
|
static ssize_t
|
|
show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
|
|
{
|
|
return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
|
|
}
|
|
static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
|
|
|
|
static struct attribute *cpuhp_smt_attrs[] = {
|
|
&dev_attr_control.attr,
|
|
&dev_attr_active.attr,
|
|
NULL
|
|
};
|
|
|
|
static const struct attribute_group cpuhp_smt_attr_group = {
|
|
.attrs = cpuhp_smt_attrs,
|
|
.name = "smt",
|
|
NULL
|
|
};
|
|
|
|
static int __init cpu_smt_sysfs_init(void)
|
|
{
|
|
return sysfs_create_group(&cpu_subsys.dev_root->kobj,
|
|
&cpuhp_smt_attr_group);
|
|
}
|
|
|
|
static int __init cpuhp_sysfs_init(void)
|
|
{
|
|
int cpu, ret;
|
|
|
|
ret = cpu_smt_sysfs_init();
|
|
if (ret)
|
|
return ret;
|
|
|
|
ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
|
|
&cpuhp_cpu_root_attr_group);
|
|
if (ret)
|
|
return ret;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
struct device *dev = get_cpu_device(cpu);
|
|
|
|
if (!dev)
|
|
continue;
|
|
ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
return 0;
|
|
}
|
|
device_initcall(cpuhp_sysfs_init);
|
|
#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
|
|
|
|
/*
|
|
* cpu_bit_bitmap[] is a special, "compressed" data structure that
|
|
* represents all NR_CPUS bits binary values of 1<<nr.
|
|
*
|
|
* It is used by cpumask_of() to get a constant address to a CPU
|
|
* mask value that has a single bit set only.
|
|
*/
|
|
|
|
/* cpu_bit_bitmap[0] is empty - so we can back into it */
|
|
#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
|
|
#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
|
|
#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
|
|
#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
|
|
|
|
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
|
|
|
|
MASK_DECLARE_8(0), MASK_DECLARE_8(8),
|
|
MASK_DECLARE_8(16), MASK_DECLARE_8(24),
|
|
#if BITS_PER_LONG > 32
|
|
MASK_DECLARE_8(32), MASK_DECLARE_8(40),
|
|
MASK_DECLARE_8(48), MASK_DECLARE_8(56),
|
|
#endif
|
|
};
|
|
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
|
|
|
|
const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
|
|
EXPORT_SYMBOL(cpu_all_bits);
|
|
|
|
#ifdef CONFIG_INIT_ALL_POSSIBLE
|
|
struct cpumask __cpu_possible_mask __read_mostly
|
|
= {CPU_BITS_ALL};
|
|
#else
|
|
struct cpumask __cpu_possible_mask __read_mostly;
|
|
#endif
|
|
EXPORT_SYMBOL(__cpu_possible_mask);
|
|
|
|
struct cpumask __cpu_online_mask __read_mostly;
|
|
EXPORT_SYMBOL(__cpu_online_mask);
|
|
|
|
struct cpumask __cpu_present_mask __read_mostly;
|
|
EXPORT_SYMBOL(__cpu_present_mask);
|
|
|
|
struct cpumask __cpu_active_mask __read_mostly;
|
|
EXPORT_SYMBOL(__cpu_active_mask);
|
|
|
|
atomic_t __num_online_cpus __read_mostly;
|
|
EXPORT_SYMBOL(__num_online_cpus);
|
|
|
|
void init_cpu_present(const struct cpumask *src)
|
|
{
|
|
cpumask_copy(&__cpu_present_mask, src);
|
|
}
|
|
|
|
void init_cpu_possible(const struct cpumask *src)
|
|
{
|
|
cpumask_copy(&__cpu_possible_mask, src);
|
|
}
|
|
|
|
void init_cpu_online(const struct cpumask *src)
|
|
{
|
|
cpumask_copy(&__cpu_online_mask, src);
|
|
}
|
|
|
|
void set_cpu_online(unsigned int cpu, bool online)
|
|
{
|
|
/*
|
|
* atomic_inc/dec() is required to handle the horrid abuse of this
|
|
* function by the reboot and kexec code which invoke it from
|
|
* IPI/NMI broadcasts when shutting down CPUs. Invocation from
|
|
* regular CPU hotplug is properly serialized.
|
|
*
|
|
* Note, that the fact that __num_online_cpus is of type atomic_t
|
|
* does not protect readers which are not serialized against
|
|
* concurrent hotplug operations.
|
|
*/
|
|
if (online) {
|
|
if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
|
|
atomic_inc(&__num_online_cpus);
|
|
} else {
|
|
if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
|
|
atomic_dec(&__num_online_cpus);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Activate the first processor.
|
|
*/
|
|
void __init boot_cpu_init(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
|
|
/* Mark the boot cpu "present", "online" etc for SMP and UP case */
|
|
set_cpu_online(cpu, true);
|
|
set_cpu_active(cpu, true);
|
|
set_cpu_present(cpu, true);
|
|
set_cpu_possible(cpu, true);
|
|
|
|
#ifdef CONFIG_SMP
|
|
__boot_cpu_id = cpu;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Must be called _AFTER_ setting up the per_cpu areas
|
|
*/
|
|
void __init boot_cpu_hotplug_init(void)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
|
|
#endif
|
|
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
|
|
}
|
|
|
|
/*
|
|
* These are used for a global "mitigations=" cmdline option for toggling
|
|
* optional CPU mitigations.
|
|
*/
|
|
enum cpu_mitigations {
|
|
CPU_MITIGATIONS_OFF,
|
|
CPU_MITIGATIONS_AUTO,
|
|
CPU_MITIGATIONS_AUTO_NOSMT,
|
|
};
|
|
|
|
static enum cpu_mitigations cpu_mitigations __ro_after_init =
|
|
CPU_MITIGATIONS_AUTO;
|
|
|
|
static int __init mitigations_parse_cmdline(char *arg)
|
|
{
|
|
if (!strcmp(arg, "off"))
|
|
cpu_mitigations = CPU_MITIGATIONS_OFF;
|
|
else if (!strcmp(arg, "auto"))
|
|
cpu_mitigations = CPU_MITIGATIONS_AUTO;
|
|
else if (!strcmp(arg, "auto,nosmt"))
|
|
cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
|
|
else
|
|
pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
|
|
arg);
|
|
|
|
return 0;
|
|
}
|
|
early_param("mitigations", mitigations_parse_cmdline);
|
|
|
|
/* mitigations=off */
|
|
bool cpu_mitigations_off(void)
|
|
{
|
|
return cpu_mitigations == CPU_MITIGATIONS_OFF;
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_mitigations_off);
|
|
|
|
/* mitigations=auto,nosmt */
|
|
bool cpu_mitigations_auto_nosmt(void)
|
|
{
|
|
return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
|