From 2b02985bf83e6da9d9165c5f2165af1b97d76edf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 14 Dec 2020 16:04:20 -0600 Subject: [PATCH 001/103] objtool: Fix seg fault with Clang non-section symbols commit 44f6a7c0755d8dd453c70557e11687bb080a6f21 upstream. The Clang assembler likes to strip section symbols, which means objtool can't reference some text code by its section. This confuses objtool greatly, causing it to seg fault. The fix is similar to what was done before, for ORC reloc generation: e81e07244325 ("objtool: Support Clang non-section symbols in ORC generation") Factor out that code into a common helper and use it for static call reloc generation as well. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nick Desaulniers Reviewed-by: Miroslav Benes Link: https://github.com/ClangBuiltLinux/linux/issues/1207 Link: https://lkml.kernel.org/r/ba6b6c0f0dd5acbba66e403955a967d9fdd1726a.1607983452.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 11 +++++++++-- tools/objtool/elf.c | 26 ++++++++++++++++++++++++++ tools/objtool/elf.h | 2 ++ tools/objtool/orc_gen.c | 29 +++++------------------------ 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 956383d5fa62..4bd30315eb62 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -467,13 +467,20 @@ static int create_static_call_sections(struct objtool_file *file) /* populate reloc for 'addr' */ reloc = malloc(sizeof(*reloc)); + if (!reloc) { perror("malloc"); return -1; } memset(reloc, 0, sizeof(*reloc)); - reloc->sym = insn->sec->sym; - reloc->addend = insn->offset; + + insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); + if (!reloc->sym) { + WARN_FUNC("static call tramp: missing containing symbol", + insn->sec, insn->offset); + return -1; + } + reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(struct static_call_site); reloc->sec = reloc_sec; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index f4f3e8d99593..d8421e1d06be 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -262,6 +262,32 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns return find_reloc_by_dest_range(elf, sec, offset, 1); } +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc) +{ + if (sec->sym) { + reloc->sym = sec->sym; + reloc->addend = offset; + return; + } + + /* + * The Clang assembler strips section symbols, so we have to reference + * the function symbol instead: + */ + reloc->sym = find_symbol_containing(sec, offset); + if (!reloc->sym) { + /* + * Hack alert. This happens when we need to reference the NOP + * pad insn immediately after the function. + */ + reloc->sym = find_symbol_containing(sec, offset - 1); + } + + if (reloc->sym) + reloc->addend = offset - reloc->sym->offset; +} + static int read_sections(struct elf *elf) { Elf_Scn *s = NULL; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 807f8c670097..e6890cc70a25 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -140,6 +140,8 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc); int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 235663b96adc..9ce68b385a1b 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -105,30 +105,11 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti } memset(reloc, 0, sizeof(*reloc)); - if (insn_sec->sym) { - reloc->sym = insn_sec->sym; - reloc->addend = insn_off; - } else { - /* - * The Clang assembler doesn't produce section symbols, so we - * have to reference the function symbol instead: - */ - reloc->sym = find_symbol_containing(insn_sec, insn_off); - if (!reloc->sym) { - /* - * Hack alert. This happens when we need to reference - * the NOP pad insn immediately after the function. - */ - reloc->sym = find_symbol_containing(insn_sec, - insn_off - 1); - } - if (!reloc->sym) { - WARN("missing symbol for insn at offset 0x%lx\n", - insn_off); - return -1; - } - - reloc->addend = insn_off - reloc->sym->offset; + insn_to_reloc_sym_addend(insn_sec, insn_off, reloc); + if (!reloc->sym) { + WARN("missing symbol for insn at offset 0x%lx", + insn_off); + return -1; } reloc->type = R_X86_64_PC32; From 039e0f627397c0de67438a0f8e55f459ddc7900c Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 4 Feb 2021 19:41:12 -0800 Subject: [PATCH 002/103] Revert "dts: phy: add GPIO number and active state used for phy reset" commit 3da3cc1b5f47115b16b5ffeeb4bf09ec331b0164 upstream. VSC8541 phys need a special reset sequence, which the driver doesn't currentlny support. As a result enabling the reset via GPIO essentially guarnteees that the device won't work correctly. We've been relying on bootloaders to reset the device for years, with this revert we'll go back to doing so until we can sort out how to get the reset sequence into the kernel. This reverts commit a0fa9d727043da2238432471e85de0bdb8a8df65. Fixes: a0fa9d727043 ("dts: phy: add GPIO number and active state used for phy reset") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 24d75a146e02..60846e88ae4b 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -90,7 +90,6 @@ phy0: ethernet-phy@0 { compatible = "ethernet-phy-id0007.0771"; reg = <0>; - reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>; }; }; From e072d454faa45847a1544b47e88a1377a36ac720 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 8 Feb 2021 15:51:53 +0100 Subject: [PATCH 003/103] gpio: mxs: GPIO_MXS should not default to y unconditionally commit 97c6e28d388a5000d780d2a63c32f422827f5aa3 upstream. Merely enabling CONFIG_COMPILE_TEST should not enable additional code. To fix this, restrict the automatic enabling of GPIO_MXS to ARCH_MXS, and ask the user in case of compile-testing. Fixes: 6876ca311bfca5d7 ("gpio: mxs: add COMPILE_TEST support for GPIO_MXS") Cc: Signed-off-by: Geert Uytterhoeven Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index f20ac3d69424..14751c7ccd1f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -428,8 +428,9 @@ config GPIO_MXC select GENERIC_IRQ_CHIP config GPIO_MXS - def_bool y + bool "Freescale MXS GPIO support" if COMPILE_TEST depends on ARCH_MXS || COMPILE_TEST + default y if ARCH_MXS select GPIO_GENERIC select GENERIC_IRQ_CHIP From 10538b869a642109741bd9491a27ccb8d215027e Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 9 Feb 2021 16:31:04 +0300 Subject: [PATCH 004/103] gpio: ep93xx: fix BUG_ON port F usage commit 8b81a7ab8055d01d827ef66374b126eeac3bd108 upstream. Two index spaces and ep93xx_gpio_port are confusing. Instead add a separate struct to store necessary data and remove ep93xx_gpio_port. - add struct to store IRQ related data for each IRQ capable chip - replace offset array with defined offsets - add IRQ registers offset for each IRQ capable chip into ep93xx_gpio_banks ------------[ cut here ]------------ kernel BUG at drivers/gpio/gpio-ep93xx.c:64! ---[ end trace 3f6544e133e9f5ae ]--- Fixes: fd935fc421e74 ("gpio: ep93xx: Do not pingpong irq numbers") Cc: Reviewed-by: Alexander Sverdlin Tested-by: Alexander Sverdlin Signed-off-by: Nikita Shubin Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ep93xx.c | 186 ++++++++++++++++++++----------------- 1 file changed, 99 insertions(+), 87 deletions(-) diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 226da8df6f10..64d6c2b4282e 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -25,6 +25,9 @@ /* Maximum value for gpio line identifiers */ #define EP93XX_GPIO_LINE_MAX 63 +/* Number of GPIO chips in EP93XX */ +#define EP93XX_GPIO_CHIP_NUM 8 + /* Maximum value for irq capable line identifiers */ #define EP93XX_GPIO_LINE_MAX_IRQ 23 @@ -34,74 +37,74 @@ */ #define EP93XX_GPIO_F_IRQ_BASE 80 +struct ep93xx_gpio_irq_chip { + u8 irq_offset; + u8 int_unmasked; + u8 int_enabled; + u8 int_type1; + u8 int_type2; + u8 int_debounce; +}; + +struct ep93xx_gpio_chip { + struct gpio_chip gc; + struct ep93xx_gpio_irq_chip *eic; +}; + struct ep93xx_gpio { void __iomem *base; - struct gpio_chip gc[8]; + struct ep93xx_gpio_chip gc[EP93XX_GPIO_CHIP_NUM]; }; +#define to_ep93xx_gpio_chip(x) container_of(x, struct ep93xx_gpio_chip, gc) + +static struct ep93xx_gpio_irq_chip *to_ep93xx_gpio_irq_chip(struct gpio_chip *gc) +{ + struct ep93xx_gpio_chip *egc = to_ep93xx_gpio_chip(gc); + + return egc->eic; +} + /************************************************************************* * Interrupt handling for EP93xx on-chip GPIOs *************************************************************************/ -static unsigned char gpio_int_unmasked[3]; -static unsigned char gpio_int_enabled[3]; -static unsigned char gpio_int_type1[3]; -static unsigned char gpio_int_type2[3]; -static unsigned char gpio_int_debounce[3]; +#define EP93XX_INT_TYPE1_OFFSET 0x00 +#define EP93XX_INT_TYPE2_OFFSET 0x04 +#define EP93XX_INT_EOI_OFFSET 0x08 +#define EP93XX_INT_EN_OFFSET 0x0c +#define EP93XX_INT_STATUS_OFFSET 0x10 +#define EP93XX_INT_RAW_STATUS_OFFSET 0x14 +#define EP93XX_INT_DEBOUNCE_OFFSET 0x18 -/* Port ordering is: A B F */ -static const u8 int_type1_register_offset[3] = { 0x90, 0xac, 0x4c }; -static const u8 int_type2_register_offset[3] = { 0x94, 0xb0, 0x50 }; -static const u8 eoi_register_offset[3] = { 0x98, 0xb4, 0x54 }; -static const u8 int_en_register_offset[3] = { 0x9c, 0xb8, 0x58 }; -static const u8 int_debounce_register_offset[3] = { 0xa8, 0xc4, 0x64 }; - -static void ep93xx_gpio_update_int_params(struct ep93xx_gpio *epg, unsigned port) +static void ep93xx_gpio_update_int_params(struct ep93xx_gpio *epg, + struct ep93xx_gpio_irq_chip *eic) { - BUG_ON(port > 2); + writeb_relaxed(0, epg->base + eic->irq_offset + EP93XX_INT_EN_OFFSET); - writeb_relaxed(0, epg->base + int_en_register_offset[port]); + writeb_relaxed(eic->int_type2, + epg->base + eic->irq_offset + EP93XX_INT_TYPE2_OFFSET); - writeb_relaxed(gpio_int_type2[port], - epg->base + int_type2_register_offset[port]); + writeb_relaxed(eic->int_type1, + epg->base + eic->irq_offset + EP93XX_INT_TYPE1_OFFSET); - writeb_relaxed(gpio_int_type1[port], - epg->base + int_type1_register_offset[port]); - - writeb(gpio_int_unmasked[port] & gpio_int_enabled[port], - epg->base + int_en_register_offset[port]); -} - -static int ep93xx_gpio_port(struct gpio_chip *gc) -{ - struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = 0; - - while (port < ARRAY_SIZE(epg->gc) && gc != &epg->gc[port]) - port++; - - /* This should not happen but is there as a last safeguard */ - if (port == ARRAY_SIZE(epg->gc)) { - pr_crit("can't find the GPIO port\n"); - return 0; - } - - return port; + writeb_relaxed(eic->int_unmasked & eic->int_enabled, + epg->base + eic->irq_offset + EP93XX_INT_EN_OFFSET); } static void ep93xx_gpio_int_debounce(struct gpio_chip *gc, unsigned int offset, bool enable) { struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); int port_mask = BIT(offset); if (enable) - gpio_int_debounce[port] |= port_mask; + eic->int_debounce |= port_mask; else - gpio_int_debounce[port] &= ~port_mask; + eic->int_debounce &= ~port_mask; - writeb(gpio_int_debounce[port], - epg->base + int_debounce_register_offset[port]); + writeb(eic->int_debounce, + epg->base + eic->irq_offset + EP93XX_INT_DEBOUNCE_OFFSET); } static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) @@ -122,12 +125,12 @@ static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) */ stat = readb(epg->base + EP93XX_GPIO_A_INT_STATUS); for_each_set_bit(offset, &stat, 8) - generic_handle_irq(irq_find_mapping(epg->gc[0].irq.domain, + generic_handle_irq(irq_find_mapping(epg->gc[0].gc.irq.domain, offset)); stat = readb(epg->base + EP93XX_GPIO_B_INT_STATUS); for_each_set_bit(offset, &stat, 8) - generic_handle_irq(irq_find_mapping(epg->gc[1].irq.domain, + generic_handle_irq(irq_find_mapping(epg->gc[1].gc.irq.domain, offset)); chained_irq_exit(irqchip, desc); @@ -153,52 +156,52 @@ static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc) static void ep93xx_gpio_irq_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); int port_mask = BIT(d->irq & 7); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) { - gpio_int_type2[port] ^= port_mask; /* switch edge direction */ - ep93xx_gpio_update_int_params(epg, port); + eic->int_type2 ^= port_mask; /* switch edge direction */ + ep93xx_gpio_update_int_params(epg, eic); } - writeb(port_mask, epg->base + eoi_register_offset[port]); + writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET); } static void ep93xx_gpio_irq_mask_ack(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); int port_mask = BIT(d->irq & 7); if (irqd_get_trigger_type(d) == IRQ_TYPE_EDGE_BOTH) - gpio_int_type2[port] ^= port_mask; /* switch edge direction */ + eic->int_type2 ^= port_mask; /* switch edge direction */ - gpio_int_unmasked[port] &= ~port_mask; - ep93xx_gpio_update_int_params(epg, port); + eic->int_unmasked &= ~port_mask; + ep93xx_gpio_update_int_params(epg, eic); - writeb(port_mask, epg->base + eoi_register_offset[port]); + writeb(port_mask, epg->base + eic->irq_offset + EP93XX_INT_EOI_OFFSET); } static void ep93xx_gpio_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); - gpio_int_unmasked[port] &= ~BIT(d->irq & 7); - ep93xx_gpio_update_int_params(epg, port); + eic->int_unmasked &= ~BIT(d->irq & 7); + ep93xx_gpio_update_int_params(epg, eic); } static void ep93xx_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); - gpio_int_unmasked[port] |= BIT(d->irq & 7); - ep93xx_gpio_update_int_params(epg, port); + eic->int_unmasked |= BIT(d->irq & 7); + ep93xx_gpio_update_int_params(epg, eic); } /* @@ -209,8 +212,8 @@ static void ep93xx_gpio_irq_unmask(struct irq_data *d) static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct ep93xx_gpio_irq_chip *eic = to_ep93xx_gpio_irq_chip(gc); struct ep93xx_gpio *epg = gpiochip_get_data(gc); - int port = ep93xx_gpio_port(gc); int offset = d->irq & 7; int port_mask = BIT(offset); irq_flow_handler_t handler; @@ -219,32 +222,32 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) switch (type) { case IRQ_TYPE_EDGE_RISING: - gpio_int_type1[port] |= port_mask; - gpio_int_type2[port] |= port_mask; + eic->int_type1 |= port_mask; + eic->int_type2 |= port_mask; handler = handle_edge_irq; break; case IRQ_TYPE_EDGE_FALLING: - gpio_int_type1[port] |= port_mask; - gpio_int_type2[port] &= ~port_mask; + eic->int_type1 |= port_mask; + eic->int_type2 &= ~port_mask; handler = handle_edge_irq; break; case IRQ_TYPE_LEVEL_HIGH: - gpio_int_type1[port] &= ~port_mask; - gpio_int_type2[port] |= port_mask; + eic->int_type1 &= ~port_mask; + eic->int_type2 |= port_mask; handler = handle_level_irq; break; case IRQ_TYPE_LEVEL_LOW: - gpio_int_type1[port] &= ~port_mask; - gpio_int_type2[port] &= ~port_mask; + eic->int_type1 &= ~port_mask; + eic->int_type2 &= ~port_mask; handler = handle_level_irq; break; case IRQ_TYPE_EDGE_BOTH: - gpio_int_type1[port] |= port_mask; + eic->int_type1 |= port_mask; /* set initial polarity based on current input level */ if (gc->get(gc, offset)) - gpio_int_type2[port] &= ~port_mask; /* falling */ + eic->int_type2 &= ~port_mask; /* falling */ else - gpio_int_type2[port] |= port_mask; /* rising */ + eic->int_type2 |= port_mask; /* rising */ handler = handle_edge_irq; break; default: @@ -253,9 +256,9 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) irq_set_handler_locked(d, handler); - gpio_int_enabled[port] |= port_mask; + eic->int_enabled |= port_mask; - ep93xx_gpio_update_int_params(epg, port); + ep93xx_gpio_update_int_params(epg, eic); return 0; } @@ -276,17 +279,19 @@ struct ep93xx_gpio_bank { const char *label; int data; int dir; + int irq; int base; bool has_irq; bool has_hierarchical_irq; unsigned int irq_base; }; -#define EP93XX_GPIO_BANK(_label, _data, _dir, _base, _has_irq, _has_hier, _irq_base) \ +#define EP93XX_GPIO_BANK(_label, _data, _dir, _irq, _base, _has_irq, _has_hier, _irq_base) \ { \ .label = _label, \ .data = _data, \ .dir = _dir, \ + .irq = _irq, \ .base = _base, \ .has_irq = _has_irq, \ .has_hierarchical_irq = _has_hier, \ @@ -295,16 +300,16 @@ struct ep93xx_gpio_bank { static struct ep93xx_gpio_bank ep93xx_gpio_banks[] = { /* Bank A has 8 IRQs */ - EP93XX_GPIO_BANK("A", 0x00, 0x10, 0, true, false, 64), + EP93XX_GPIO_BANK("A", 0x00, 0x10, 0x90, 0, true, false, 64), /* Bank B has 8 IRQs */ - EP93XX_GPIO_BANK("B", 0x04, 0x14, 8, true, false, 72), - EP93XX_GPIO_BANK("C", 0x08, 0x18, 40, false, false, 0), - EP93XX_GPIO_BANK("D", 0x0c, 0x1c, 24, false, false, 0), - EP93XX_GPIO_BANK("E", 0x20, 0x24, 32, false, false, 0), + EP93XX_GPIO_BANK("B", 0x04, 0x14, 0xac, 8, true, false, 72), + EP93XX_GPIO_BANK("C", 0x08, 0x18, 0x00, 40, false, false, 0), + EP93XX_GPIO_BANK("D", 0x0c, 0x1c, 0x00, 24, false, false, 0), + EP93XX_GPIO_BANK("E", 0x20, 0x24, 0x00, 32, false, false, 0), /* Bank F has 8 IRQs */ - EP93XX_GPIO_BANK("F", 0x30, 0x34, 16, false, true, 0), - EP93XX_GPIO_BANK("G", 0x38, 0x3c, 48, false, false, 0), - EP93XX_GPIO_BANK("H", 0x40, 0x44, 56, false, false, 0), + EP93XX_GPIO_BANK("F", 0x30, 0x34, 0x4c, 16, false, true, 0), + EP93XX_GPIO_BANK("G", 0x38, 0x3c, 0x00, 48, false, false, 0), + EP93XX_GPIO_BANK("H", 0x40, 0x44, 0x00, 56, false, false, 0), }; static int ep93xx_gpio_set_config(struct gpio_chip *gc, unsigned offset, @@ -326,13 +331,14 @@ static int ep93xx_gpio_f_to_irq(struct gpio_chip *gc, unsigned offset) return EP93XX_GPIO_F_IRQ_BASE + offset; } -static int ep93xx_gpio_add_bank(struct gpio_chip *gc, +static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, struct platform_device *pdev, struct ep93xx_gpio *epg, struct ep93xx_gpio_bank *bank) { void __iomem *data = epg->base + bank->data; void __iomem *dir = epg->base + bank->dir; + struct gpio_chip *gc = &egc->gc; struct device *dev = &pdev->dev; struct gpio_irq_chip *girq; int err; @@ -347,6 +353,12 @@ static int ep93xx_gpio_add_bank(struct gpio_chip *gc, girq = &gc->irq; if (bank->has_irq || bank->has_hierarchical_irq) { gc->set_config = ep93xx_gpio_set_config; + egc->eic = devm_kcalloc(dev, 1, + sizeof(*egc->eic), + GFP_KERNEL); + if (!egc->eic) + return -ENOMEM; + egc->eic->irq_offset = bank->irq; girq->chip = &ep93xx_gpio_irq_chip; } @@ -415,7 +427,7 @@ static int ep93xx_gpio_probe(struct platform_device *pdev) return PTR_ERR(epg->base); for (i = 0; i < ARRAY_SIZE(ep93xx_gpio_banks); i++) { - struct gpio_chip *gc = &epg->gc[i]; + struct ep93xx_gpio_chip *gc = &epg->gc[i]; struct ep93xx_gpio_bank *bank = &ep93xx_gpio_banks[i]; if (ep93xx_gpio_add_bank(gc, pdev, epg, bank)) From d9b7ea4c818cc20bc1d2662a976a1c7d0cc72111 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 9 Feb 2021 16:31:05 +0300 Subject: [PATCH 005/103] gpio: ep93xx: Fix single irqchip with multi gpiochips commit 28dc10eb77a2db7681b08e3b109764bbe469e347 upstream. Fixes the following warnings which results in interrupts disabled on port B/F: gpio gpiochip1: (B): detected irqchip that is shared with multiple gpiochips: please fix the driver. gpio gpiochip5: (F): detected irqchip that is shared with multiple gpiochips: please fix the driver. - added separate irqchip for each interrupt capable gpiochip - provided unique names for each irqchip Fixes: d2b091961510 ("gpio: ep93xx: Pass irqchip when adding gpiochip") Cc: Signed-off-by: Nikita Shubin Tested-by: Alexander Sverdlin Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-ep93xx.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 64d6c2b4282e..94d9fa0d6aa7 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -38,6 +38,7 @@ #define EP93XX_GPIO_F_IRQ_BASE 80 struct ep93xx_gpio_irq_chip { + struct irq_chip ic; u8 irq_offset; u8 int_unmasked; u8 int_enabled; @@ -263,15 +264,6 @@ static int ep93xx_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip ep93xx_gpio_irq_chip = { - .name = "GPIO", - .irq_ack = ep93xx_gpio_irq_ack, - .irq_mask_ack = ep93xx_gpio_irq_mask_ack, - .irq_mask = ep93xx_gpio_irq_mask, - .irq_unmask = ep93xx_gpio_irq_unmask, - .irq_set_type = ep93xx_gpio_irq_type, -}; - /************************************************************************* * gpiolib interface for EP93xx on-chip GPIOs *************************************************************************/ @@ -331,6 +323,15 @@ static int ep93xx_gpio_f_to_irq(struct gpio_chip *gc, unsigned offset) return EP93XX_GPIO_F_IRQ_BASE + offset; } +static void ep93xx_init_irq_chip(struct device *dev, struct irq_chip *ic) +{ + ic->irq_ack = ep93xx_gpio_irq_ack; + ic->irq_mask_ack = ep93xx_gpio_irq_mask_ack; + ic->irq_mask = ep93xx_gpio_irq_mask; + ic->irq_unmask = ep93xx_gpio_irq_unmask; + ic->irq_set_type = ep93xx_gpio_irq_type; +} + static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, struct platform_device *pdev, struct ep93xx_gpio *epg, @@ -352,6 +353,8 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, girq = &gc->irq; if (bank->has_irq || bank->has_hierarchical_irq) { + struct irq_chip *ic; + gc->set_config = ep93xx_gpio_set_config; egc->eic = devm_kcalloc(dev, 1, sizeof(*egc->eic), @@ -359,7 +362,12 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, if (!egc->eic) return -ENOMEM; egc->eic->irq_offset = bank->irq; - girq->chip = &ep93xx_gpio_irq_chip; + ic = &egc->eic->ic; + ic->name = devm_kasprintf(dev, GFP_KERNEL, "gpio-irq-%s", bank->label); + if (!ic->name) + return -ENOMEM; + ep93xx_init_irq_chip(dev, ic); + girq->chip = ic; } if (bank->has_irq) { @@ -401,7 +409,7 @@ static int ep93xx_gpio_add_bank(struct ep93xx_gpio_chip *egc, gpio_irq = EP93XX_GPIO_F_IRQ_BASE + i; irq_set_chip_data(gpio_irq, &epg->gc[5]); irq_set_chip_and_handler(gpio_irq, - &ep93xx_gpio_irq_chip, + girq->chip, handle_level_irq); irq_clear_status_flags(gpio_irq, IRQ_NOREQUEST); } From a38c1ee16623d35186338dbb545843710ce098ab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 5 Feb 2021 15:40:04 -0500 Subject: [PATCH 006/103] tracing: Do not count ftrace events in top level enable output commit 256cfdd6fdf70c6fcf0f7c8ddb0ebd73ce8f3bc9 upstream. The file /sys/kernel/tracing/events/enable is used to enable all events by echoing in "1", or disabling all events when echoing in "0". To know if all events are enabled, disabled, or some are enabled but not all of them, cating the file should show either "1" (all enabled), "0" (all disabled), or "X" (some enabled but not all of them). This works the same as the "enable" files in the individule system directories (like tracing/events/sched/enable). But when all events are enabled, the top level "enable" file shows "X". The reason is that its checking the "ftrace" events, which are special events that only exist for their format files. These include the format for the function tracer events, that are enabled when the function tracer is enabled, but not by the "enable" file. The check includes these events, which will always be disabled, and even though all true events are enabled, the top level "enable" file will show "X" instead of "1". To fix this, have the check test the event's flags to see if it has the "IGNORE_ENABLE" flag set, and if so, not test it. Cc: stable@vger.kernel.org Fixes: 553552ce1796c ("tracing: Combine event filter_active and enable into single flags field") Reported-by: "Yordan Karadzhov (VMware)" Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 802f3e7d8b8b..ab3cb67b869e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1212,7 +1212,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!trace_event_name(call) || !call->class || !call->class->reg) + if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + !trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) From 7c93d8cff582c459350d6f8906eea6e4cd60d959 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 10 Feb 2021 11:53:22 -0500 Subject: [PATCH 007/103] tracing: Check length before giving out the filter buffer commit b220c049d5196dd94d992dd2dc8cba1a5e6123bf upstream. When filters are used by trace events, a page is allocated on each CPU and used to copy the trace event fields to this page before writing to the ring buffer. The reason to use the filter and not write directly into the ring buffer is because a filter may discard the event and there's more overhead on discarding from the ring buffer than the extra copy. The problem here is that there is no check against the size being allocated when using this page. If an event asks for more than a page size while being filtered, it will get only a page, leading to the caller writing more that what was allocated. Check the length of the request, and if it is more than PAGE_SIZE minus the header default back to allocating from the ring buffer directly. The ring buffer may reject the event if its too big anyway, but it wont overflow. Link: https://lore.kernel.org/ath10k/1612839593-2308-1-git-send-email-wgong@codeaurora.org/ Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff4 ("tracing: Use temp buffer when filtering events") Reported-by: Wen Gong Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3119d68d012d..ee4be813ba85 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2745,7 +2745,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, (entry = this_cpu_read(trace_buffered_event))) { /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); - if (val == 1) { + if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { trace_event_setup(entry, type, flags, pc); entry->array[0] = len; return entry; From bef1f148812dd48786d4f276a664954f06c0b853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 9 Feb 2021 04:19:17 +0200 Subject: [PATCH 008/103] drm/i915: Fix overlay frontbuffer tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5feba0e905c495a217aea9db4ea91093d8fe5dde upstream. We don't have a persistent fb holding a reference to the frontbuffer object, so every time we do the get+put we throw the frontbuffer object immediately away. And so the next time around we get a pristine frontbuffer object with bits==0 even for the old vma. This confuses the frontbuffer tracking code which understandably expects the old frontbuffer to have the overlay's bit set. Fix this by hanging on to the frontbuffer reference until the next flip. And just to make this a bit more clear let's track the frontbuffer explicitly instead of just grabbing it via the old vma. Cc: stable@vger.kernel.org Cc: Chris Wilson Cc: Joonas Lahtinen Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1136 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210209021918.16234-2-ville.syrjala@linux.intel.com Fixes: 8e7cb1799b4f ("drm/i915: Extract intel_frontbuffer active tracking") Reviewed-by: Chris Wilson (cherry picked from commit 553c23bdb4775130f333f07a51b047276bc53f79) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_overlay.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 0095c8cac9b4..b73d51e766ce 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -182,6 +182,7 @@ struct intel_overlay { struct intel_crtc *crtc; struct i915_vma *vma; struct i915_vma *old_vma; + struct intel_frontbuffer *frontbuffer; bool active; bool pfit_active; u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */ @@ -282,21 +283,19 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay, struct i915_vma *vma) { enum pipe pipe = overlay->crtc->pipe; - struct intel_frontbuffer *from = NULL, *to = NULL; + struct intel_frontbuffer *frontbuffer = NULL; drm_WARN_ON(&overlay->i915->drm, overlay->old_vma); - if (overlay->vma) - from = intel_frontbuffer_get(overlay->vma->obj); if (vma) - to = intel_frontbuffer_get(vma->obj); + frontbuffer = intel_frontbuffer_get(vma->obj); - intel_frontbuffer_track(from, to, INTEL_FRONTBUFFER_OVERLAY(pipe)); + intel_frontbuffer_track(overlay->frontbuffer, frontbuffer, + INTEL_FRONTBUFFER_OVERLAY(pipe)); - if (to) - intel_frontbuffer_put(to); - if (from) - intel_frontbuffer_put(from); + if (overlay->frontbuffer) + intel_frontbuffer_put(overlay->frontbuffer); + overlay->frontbuffer = frontbuffer; intel_frontbuffer_flip_prepare(overlay->i915, INTEL_FRONTBUFFER_OVERLAY(pipe)); From 89b0c20d80a1ab8aa155af287ff4af378862964f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 10 Feb 2021 17:06:54 +0000 Subject: [PATCH 009/103] arm/xen: Don't probe xenbus as part of an early initcall commit c4295ab0b485b8bc50d2264bcae2acd06f25caaf upstream. After Commit 3499ba8198cad ("xen: Fix event channel callback via INTX/GSI"), xenbus_probe() will be called too early on Arm. This will recent to a guest hang during boot. If the hang wasn't there, we would have ended up to call xenbus_probe() twice (the second time is in xenbus_probe_initcall()). We don't need to initialize xenbus_probe() early for Arm guest. Therefore, the call in xen_guest_init() is now removed. After this change, there is no more external caller for xenbus_probe(). So the function is turned to a static one. Interestingly there were two prototypes for it. Cc: stable@vger.kernel.org Fixes: 3499ba8198cad ("xen: Fix event channel callback via INTX/GSI") Reported-by: Ian Jackson Signed-off-by: Julien Grall Reviewed-by: David Woodhouse Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20210210170654.5377-1-julien@xen.org Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/enlighten.c | 2 -- drivers/xen/xenbus/xenbus.h | 1 - drivers/xen/xenbus/xenbus_probe.c | 2 +- include/xen/xenbus.h | 2 -- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 5a957a9a0984..8ad576ecd0f1 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -370,8 +370,6 @@ static int __init xen_guest_init(void) return -ENOMEM; } gnttab_init(); - if (!xen_initial_domain()) - xenbus_probe(); /* * Making sure board specific code will not set up ops for diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index dc1537335414..2a93b7c9c159 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -115,7 +115,6 @@ int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); int xenbus_probe_devices(struct xen_bus_type *bus); -void xenbus_probe(void); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 18ffd0551b54..8a75092bb148 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -683,7 +683,7 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(void) +static void xenbus_probe(void) { xenstored_ready = 1; diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 2c43b0ef1e4d..bf3cfc7c35d0 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -192,8 +192,6 @@ void xs_suspend_cancel(void); struct work_struct; -void xenbus_probe(void); - #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ kfree(str); \ From e72a65802a3e053f53ee97b55d6fb4a426cfffa9 Mon Sep 17 00:00:00 2001 From: Odin Ugedal Date: Sat, 16 Jan 2021 18:36:33 +0100 Subject: [PATCH 010/103] cgroup: fix psi monitor for root cgroup commit 385aac1519417b89cb91b77c22e4ca21db563cd0 upstream. Fix NULL pointer dereference when adding new psi monitor to the root cgroup. PSI files for root cgroup was introduced in df5ba5be742 by using system wide psi struct when reading, but file write/monitor was not properly fixed. Since the PSI config for the root cgroup isn't initialized, the current implementation tries to lock a NULL ptr, resulting in a crash. Can be triggered by running this as root: $ tee /sys/fs/cgroup/cpu.pressure <<< "some 10000 1000000" Signed-off-by: Odin Ugedal Reviewed-by: Suren Baghdasaryan Acked-by: Dan Schatzberg Fixes: df5ba5be7425 ("kernel/sched/psi.c: expose pressure metrics on root cgroup") Acked-by: Johannes Weiner Cc: stable@vger.kernel.org # 5.2+ Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e41c21819ba0..5d1fdf7c3ec6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3567,6 +3567,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, { struct psi_trigger *new; struct cgroup *cgrp; + struct psi_group *psi; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) @@ -3575,7 +3576,8 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); - new = psi_trigger_create(&cgrp->psi, buf, nbytes, res); + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; + new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { cgroup_put(cgrp); return PTR_ERR(new); From e11345ed78b9a3e6d581341dfa6833a49cfab60b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Feb 2021 14:03:50 -0500 Subject: [PATCH 011/103] Revert "drm/amd/display: Update NV1x SR latency values" commit cf050f96e0970a557601953ed7269d07a7885078 upstream. This reverts commit 4a3dea8932d3b1199680d2056dd91d31d94d70b7. This causes blank screens for some users. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1482 Cc: Alvin Lee Cc: Jun Lei Cc: Rodrigo Siqueira Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index a92f6e4b2eb8..121643ddb719 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -297,8 +297,8 @@ static struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 11.6, - .sr_enter_plus_exit_time_us = 13.9, + .sr_exit_time_us = 8.6, + .sr_enter_plus_exit_time_us = 10.9, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From 5a36371f8bb5e3c4d0597beafa8a4490a9d195e7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Feb 2021 17:43:03 +0200 Subject: [PATCH 012/103] drm/i915/tgl+: Make sure TypeC FIA is powered up when initializing it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2f51312bebb77962a518b4c6de777dd378b6110a upstream. The TypeC FIA can be powered down if the TC-COLD power state is allowed, so block the TC-COLD state when initializing the FIA. Note that this isn't needed on ICL where the FIA is never modular and which has no generic way to block TC-COLD (except for platforms with a legacy TypeC port and on those too only via these legacy ports, not via a DP-alt/TBT port). Cc: # v5.10+ Cc: José Roberto de Souza Reported-by: Paul Menzel Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3027 Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20210208154303.6839-1-imre.deak@intel.com Reviewed-by: Jos� Roberto de Souza (cherry picked from commit f48993e5d26b079e8c80fff002499a213dbdb1b4) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_tc.c | 67 ++++++++++++++----------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 8f67aef18b2d..1d81da31796e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -23,36 +23,6 @@ static const char *tc_port_mode_name(enum tc_port_mode mode) return names[mode]; } -static void -tc_port_load_fia_params(struct drm_i915_private *i915, - struct intel_digital_port *dig_port) -{ - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(i915, port); - u32 modular_fia; - - if (INTEL_INFO(i915)->display.has_modular_fia) { - modular_fia = intel_uncore_read(&i915->uncore, - PORT_TX_DFLEXDPSP(FIA1)); - drm_WARN_ON(&i915->drm, modular_fia == 0xffffffff); - modular_fia &= MODULAR_FIA_MASK; - } else { - modular_fia = 0; - } - - /* - * Each Modular FIA instance houses 2 TC ports. In SOC that has more - * than two TC ports, there are multiple instances of Modular FIA. - */ - if (modular_fia) { - dig_port->tc_phy_fia = tc_port / 2; - dig_port->tc_phy_fia_idx = tc_port % 2; - } else { - dig_port->tc_phy_fia = FIA1; - dig_port->tc_phy_fia_idx = tc_port; - } -} - static enum intel_display_power_domain tc_cold_get_power_domain(struct intel_digital_port *dig_port) { @@ -646,6 +616,43 @@ void intel_tc_port_put_link(struct intel_digital_port *dig_port) mutex_unlock(&dig_port->tc_lock); } +static bool +tc_has_modular_fia(struct drm_i915_private *i915, struct intel_digital_port *dig_port) +{ + intel_wakeref_t wakeref; + u32 val; + + if (!INTEL_INFO(i915)->display.has_modular_fia) + return false; + + wakeref = tc_cold_block(dig_port); + val = intel_uncore_read(&i915->uncore, PORT_TX_DFLEXDPSP(FIA1)); + tc_cold_unblock(dig_port, wakeref); + + drm_WARN_ON(&i915->drm, val == 0xffffffff); + + return val & MODULAR_FIA_MASK; +} + +static void +tc_port_load_fia_params(struct drm_i915_private *i915, struct intel_digital_port *dig_port) +{ + enum port port = dig_port->base.port; + enum tc_port tc_port = intel_port_to_tc(i915, port); + + /* + * Each Modular FIA instance houses 2 TC ports. In SOC that has more + * than two TC ports, there are multiple instances of Modular FIA. + */ + if (tc_has_modular_fia(i915, dig_port)) { + dig_port->tc_phy_fia = tc_port / 2; + dig_port->tc_phy_fia_idx = tc_port % 2; + } else { + dig_port->tc_phy_fia = FIA1; + dig_port->tc_phy_fia_idx = tc_port; + } +} + void intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); From deae1e6365cd5b32415dc5b4f8ee22f7c0fa4ae3 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 1 Feb 2021 14:01:42 +0200 Subject: [PATCH 013/103] drm/dp_mst: Don't report ports connected if nothing is attached to them commit 873e5bb9fbd99e4a26c448b5c7af942a6d7aa60d upstream. Reporting a port as connected if nothing is attached to them leads to any i2c transactions on this port trying to use an uninitialized i2c adapter, fix this. Let's account for this case even if branch devices have no good reason to report a port as plugged with their peer device type set to 'none'. Fixes: db1a07956968 ("drm/dp_mst: Handle SST-only branch device case") References: https://gitlab.freedesktop.org/drm/intel/-/issues/2987 References: https://gitlab.freedesktop.org/drm/intel/-/issues/1963 Cc: Wayne Lin Cc: Lyude Paul Cc: # v5.5+ Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Imre Deak Reviewed-by: Lyude Paul Reported-by: Thiago Macieira Link: https://patchwork.freedesktop.org/patch/msgid/20210201120145.350258-1-imre.deak@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 7749b0ceabba..17bdad95978a 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4224,6 +4224,7 @@ drm_dp_mst_detect_port(struct drm_connector *connector, switch (port->pdt) { case DP_PEER_DEVICE_NONE: + break; case DP_PEER_DEVICE_MST_BRANCHING: if (!port->mcs) ret = connector_status_connected; From 285b57595d407bcc8f36c898b37cdce12ae1ff48 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 18 Jan 2021 10:28:44 -0700 Subject: [PATCH 014/103] dmaengine: move channel device_node deletion to driver commit e594443196d6e0ef3d3b30320c49b3a4d4f9a547 upstream. Channel device_node deletion is managed by the device driver rather than the dmaengine core. The deletion was accidentally introduced when making channel unregister dynamic. It causes xilinx_dma module to crash on unload as reported by Radhey. Remove chan->device_node delete in dmaengine and also fix up idxd driver. [ 42.142705] Internal error: Oops: 96000044 [#1] SMP [ 42.147566] Modules linked in: xilinx_dma(-) clk_xlnx_clock_wizard uio_pdrv_genirq [ 42.155139] CPU: 1 PID: 2075 Comm: rmmod Not tainted 5.10.1-00026-g3a2e6dd7a05-dirty #192 [ 42.163302] Hardware name: Enclustra XU5 SOM (DT) [ 42.167992] pstate: 40000005 (nZcv daif -PAN -UAO -TCO BTYPE=--) [ 42.173996] pc : xilinx_dma_chan_remove+0x74/0xa0 [xilinx_dma] [ 42.179815] lr : xilinx_dma_chan_remove+0x70/0xa0 [xilinx_dma] [ 42.185636] sp : ffffffc01112bca0 [ 42.188935] x29: ffffffc01112bca0 x28: ffffff80402ea640 xilinx_dma_chan_remove+0x74/0xa0: __list_del at ./include/linux/list.h:112 (inlined by) __list_del_entry at./include/linux/list.h:135 (inlined by) list_del at ./include/linux/list.h:146 (inlined by) xilinx_dma_chan_remove at drivers/dma/xilinx/xilinx_dma.c:2546 Fixes: e81274cd6b52 ("dmaengine: add support to dynamic register/unregister of channels") Reported-by: Radhey Shyam Pandey Signed-off-by: Dave Jiang Tested-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/161099092469.2495902.5064826526660062342.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmaengine.c | 1 - drivers/dma/idxd/dma.c | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 962cbb5e5f7f..fe6a460c4373 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1110,7 +1110,6 @@ static void __dma_async_device_channel_unregister(struct dma_device *device, "%s called while %d clients hold a reference\n", __func__, chan->client_count); mutex_lock(&dma_list_mutex); - list_del(&chan->device_node); device->chancnt--; chan->dev->chan = NULL; mutex_unlock(&dma_list_mutex); diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 0c892cbd72e0..8b14ba0bae1c 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -214,5 +214,8 @@ int idxd_register_dma_channel(struct idxd_wq *wq) void idxd_unregister_dma_channel(struct idxd_wq *wq) { - dma_async_device_channel_unregister(&wq->idxd->dma_dev, &wq->dma_chan); + struct dma_chan *chan = &wq->dma_chan; + + dma_async_device_channel_unregister(&wq->idxd->dma_dev, chan); + list_del(&chan->device_node); } From b03a0d5cc26dcc4a62b6e4198cbc7d16ae6227b3 Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 9 Feb 2021 13:42:14 -0800 Subject: [PATCH 015/103] tmpfs: disallow CONFIG_TMPFS_INODE64 on s390 commit b85a7a8bb5736998b8a681937a9749b350c17988 upstream. Currently there is an assumption in tmpfs that 64-bit architectures also have a 64-bit ino_t. This is not true on s390 which has a 32-bit ino_t. With CONFIG_TMPFS_INODE64=y tmpfs mounts will get 64-bit inode numbers and display "inode64" in the mount options, but passing the "inode64" mount option will fail. This leads to the following behavior: # mkdir mnt # mount -t tmpfs nodev mnt # mount -o remount,rw mnt mount: /home/ubuntu/mnt: mount point not mounted or bad option. As mount sees "inode64" in the mount options and thus passes it in the options for the remount. So prevent CONFIG_TMPFS_INODE64 from being selected on s390. Link: https://lkml.kernel.org/r/20210205230620.518245-1-seth.forshee@canonical.com Fixes: ea3271f7196c ("tmpfs: support 64-bit inums per-sb") Signed-off-by: Seth Forshee Acked-by: Hugh Dickins Cc: Chris Down Cc: Hugh Dickins Cc: Amir Goldstein Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: [5.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/Kconfig b/fs/Kconfig index aa4c12282301..3347ec7bd837 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -203,7 +203,7 @@ config TMPFS_XATTR config TMPFS_INODE64 bool "Use 64-bit ino_t by default in tmpfs" - depends on TMPFS && 64BIT + depends on TMPFS && 64BIT && !S390 default n help tmpfs has historically used only inode numbers as wide as an unsigned From 8c5864d21e010ddd3ec1c10b56b223e2c9fcfabf Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 9 Feb 2021 13:42:17 -0800 Subject: [PATCH 016/103] tmpfs: disallow CONFIG_TMPFS_INODE64 on alpha commit ad69c389ec110ea54f8b0c0884b255340ef1c736 upstream. As with s390, alpha is a 64-bit architecture with a 32-bit ino_t. With CONFIG_TMPFS_INODE64=y tmpfs mounts will get 64-bit inode numbers and display "inode64" in the mount options, whereas passing "inode64" in the mount options will fail. This leads to erroneous behaviours such as this: # mkdir mnt # mount -t tmpfs nodev mnt # mount -o remount,rw mnt mount: /home/ubuntu/mnt: mount point not mounted or bad option. Prevent CONFIG_TMPFS_INODE64 from being selected on alpha. Link: https://lkml.kernel.org/r/20210208215726.608197-1-seth.forshee@canonical.com Fixes: ea3271f7196c ("tmpfs: support 64-bit inums per-sb") Signed-off-by: Seth Forshee Acked-by: Hugh Dickins Cc: Chris Down Cc: Amir Goldstein Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: [5.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/Kconfig b/fs/Kconfig index 3347ec7bd837..da524c4d7b7e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -203,7 +203,7 @@ config TMPFS_XATTR config TMPFS_INODE64 bool "Use 64-bit ino_t by default in tmpfs" - depends on TMPFS && 64BIT && !S390 + depends on TMPFS && 64BIT && !(S390 || ALPHA) default n help tmpfs has historically used only inode numbers as wide as an unsigned From 8e25e1eef8b9bffe101d3b32e9aad508cbe641a2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 8 Dec 2020 16:08:02 +0200 Subject: [PATCH 017/103] soc: ti: omap-prm: Fix boot time errors for rst_map_012 bits 0 and 1 [ Upstream commit 7078a5ba7a58e5db07583b176f8a03e0b8714731 ] We have rst_map_012 used for various accelerators like dsp, ipu and iva. For these use cases, we have rstctrl bit 2 control the subsystem module reset, and have and bits 0 and 1 control the accelerator specific features. If the bootloader, or kexec boot, has left any accelerator specific reset bits deasserted, deasserting bit 2 reset will potentially enable an accelerator with unconfigured MMU and no firmware. And we may get spammed with a lot by warnings on boot with "Data Access in User mode during Functional access", or depending on the accelerator, the system can also just hang. This issue can be quite easily reproduced by setting a rst_map_012 type rstctrl register to 0 or 4 in the bootloader, and booting the system. Let's just assert all reset bits for rst_map_012 type resets. So far it looks like the other rstctrl types don't need this. If it turns out that the other type rstctrl bits also need reset on init, we need to add an instance specific reset mask for the bits to avoid resetting unwanted bits. Reported-by: Carl Philipp Klemm Cc: Philipp Zabel Cc: Santosh Shilimkar Cc: Suman Anna Cc: Tero Kristo Tested-by: Carl Philipp Klemm Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- drivers/soc/ti/omap_prm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/soc/ti/omap_prm.c b/drivers/soc/ti/omap_prm.c index 4d41dc3cdce1..c8b14b3a171f 100644 --- a/drivers/soc/ti/omap_prm.c +++ b/drivers/soc/ti/omap_prm.c @@ -552,6 +552,7 @@ static int omap_prm_reset_init(struct platform_device *pdev, const struct omap_rst_map *map; struct ti_prm_platform_data *pdata = dev_get_platdata(&pdev->dev); char buf[32]; + u32 v; /* * Check if we have controllable resets. If either rstctrl is non-zero @@ -599,6 +600,16 @@ static int omap_prm_reset_init(struct platform_device *pdev, map++; } + /* Quirk handling to assert rst_map_012 bits on reset and avoid errors */ + if (prm->data->rstmap == rst_map_012) { + v = readl_relaxed(reset->prm->base + reset->prm->data->rstctrl); + if ((v & reset->mask) != reset->mask) { + dev_dbg(&pdev->dev, "Asserting all resets: %08x\n", v); + writel_relaxed(reset->mask, reset->prm->base + + reset->prm->data->rstctrl); + } + } + return devm_reset_controller_register(&pdev->dev, &reset->rcdev); } From 413a2353be6b420b35998de364f79cad24061da7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 15 Aug 2020 13:51:12 +0100 Subject: [PATCH 018/103] arm64: dts: rockchip: Fix PCIe DT properties on rk3399 [ Upstream commit 43f20b1c6140896916f4e91aacc166830a7ba849 ] It recently became apparent that the lack of a 'device_type = "pci"' in the PCIe root complex node for rk3399 is a violation of the PCI binding, as documented in IEEE Std 1275-1994. Changes to the kernel's parsing of the DT made such violation fatal, as drivers cannot probe the controller anymore. Add the missing property makes the PCIe node compliant. While we are at it, drop the pointless linux,pci-domain property, which only makes sense when there are multiple host bridges. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200815125112.462652-3-maz@kernel.org Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 7a9a7aca86c6..5df535ad4bbc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -234,6 +234,7 @@ reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>; reg-names = "axi-base", "apb-base"; + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; #interrupt-cells = <1>; @@ -252,7 +253,6 @@ <0 0 0 2 &pcie0_intc 1>, <0 0 0 3 &pcie0_intc 2>, <0 0 0 4 &pcie0_intc 3>; - linux,pci-domain = <0>; max-link-speed = <1>; msi-map = <0x0 &its 0x0 0x1000>; phys = <&pcie_phy 0>, <&pcie_phy 1>, From 6c152ac1b687fc5868ac2011ee1e3597f2213c08 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 21 Dec 2020 16:11:03 -0800 Subject: [PATCH 019/103] arm64: dts: qcom: sdm845: Reserve LPASS clocks in gcc [ Upstream commit 93f2a11580a9732c1d90f9e01a7e9facc825658f ] The GCC_LPASS_Q6_AXI_CLK and GCC_LPASS_SWAY_CLK clocks may not be touched on a typical UEFI based SDM845 device, but when the kernel is built with CONFIG_SDM_LPASSCC_845 this happens, unless they are marked as protected-clocks in the DT. This was done for the MTP and the Pocophone, but not for DB845c and the Lenovo Yoga C630 - causing these to fail to boot if the LPASS clock controller is enabled (which it typically isn't). Tested-by: Vinod Koul #on db845c Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201222001103.3112306-1-bjorn.andersson@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 4 +++- arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 7cc236575ee2..c0b93813ea9a 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -415,7 +415,9 @@ &gcc { protected-clocks = , , - ; + , + , + ; }; &gpu { diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index d70aae77a6e8..888dc23a530e 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -245,7 +245,9 @@ &gcc { protected-clocks = , , - ; + , + , + ; }; &gpu { From 697091f92727bc9be5e81a42ecc361dce00a3374 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 10 Jan 2021 21:09:11 +0200 Subject: [PATCH 020/103] ARM: OMAP2+: Fix suspcious RCU usage splats for omap_enter_idle_coupled [ Upstream commit 06862d789ddde8a99c1e579e934ca17c15a84755 ] We get suspcious RCU usage splats with cpuidle in several places in omap_enter_idle_coupled() with the kernel debug options enabled: RCU used illegally from extended quiescent state! ... (_raw_spin_lock_irqsave) (omap_enter_idle_coupled+0x17c/0x2d8) (omap_enter_idle_coupled) (cpuidle_enter_state) (cpuidle_enter_state_coupled) (cpuidle_enter) Let's use RCU_NONIDLE to suppress these splats. Things got changed around with commit 1098582a0f6c ("sched,idle,rcu: Push rcu_idle deeper into the idle path") that started triggering these warnings. For the tick_broadcast related calls, ideally we'd just switch over to using CPUIDLE_FLAG_TIMER_STOP for omap_enter_idle_coupled() to have the generic cpuidle code handle the tick_broadcast related calls for us and then just drop the tick_broadcast calls here. But we're currently missing the call in the common cpuidle code for tick_broadcast_enable() that CPU1 hotplug needs as described in earlier commit 50d6b3cf9403 ("ARM: OMAP2+: fix lack of timer interrupts on CPU1 after hotplug"). Cc: Daniel Lezcano Cc: Paul E. McKenney Cc: Russell King Acked-by: Paul E. McKenney Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/cpuidle44xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index c8d317fafe2e..de37027ad758 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -151,10 +151,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, (cx->mpu_logic_state == PWRDM_POWER_OFF); /* Enter broadcast mode for periodic timers */ - tick_broadcast_enable(); + RCU_NONIDLE(tick_broadcast_enable()); /* Enter broadcast mode for one-shot timers */ - tick_broadcast_enter(); + RCU_NONIDLE(tick_broadcast_enter()); /* * Call idle CPU PM enter notifier chain so that @@ -166,7 +166,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0) { pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); /* * Call idle CPU cluster PM enter notifier chain @@ -178,7 +178,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, index = 0; cx = state_ptr + index; pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); mpuss_can_lose_context = 0; } } @@ -194,9 +194,9 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context) gic_dist_disable(); - clkdm_deny_idle(cpu_clkdm[1]); - omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON); - clkdm_allow_idle(cpu_clkdm[1]); + RCU_NONIDLE(clkdm_deny_idle(cpu_clkdm[1])); + RCU_NONIDLE(omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON)); + RCU_NONIDLE(clkdm_allow_idle(cpu_clkdm[1])); if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && mpuss_can_lose_context) { @@ -222,7 +222,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, cpu_pm_exit(); cpu_pm_out: - tick_broadcast_exit(); + RCU_NONIDLE(tick_broadcast_exit()); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); From d33b28e01e5c4f350ce2be041574b07575a90ed2 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sun, 17 Jan 2021 19:16:53 +0100 Subject: [PATCH 021/103] arm64: dts: rockchip: remove interrupt-names property from rk3399 vdec node [ Upstream commit 94a5400f8b966c91c49991bae41c2ef911b935ac ] A test with the command below gives this error: /arch/arm64/boot/dts/rockchip/rk3399-evb.dt.yaml: video-codec@ff660000: 'interrupt-names' does not match any of the regexes: 'pinctrl-[0-9]+' The rkvdec driver gets it irq with help of the platform_get_irq() function, so remove the interrupt-names property from the rk3399 vdec node. make ARCH=arm64 dtbs_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/ media/rockchip,vdec.yaml Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20210117181653.24886-1-jbx6244@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 5df535ad4bbc..7e69603fb41c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1278,7 +1278,6 @@ compatible = "rockchip,rk3399-vdec"; reg = <0x0 0xff660000 0x0 0x400>; interrupts = ; - interrupt-names = "vdpu"; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; clock-names = "axi", "ahb", "cabac", "core"; From 2a2e911469b5e0aca3423b3c551e9e4a3b8cec65 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 20 Jan 2021 13:49:41 +0100 Subject: [PATCH 022/103] platform/x86: hp-wmi: Disable tablet-mode reporting by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 67fbe02a5cebc3c653610f12e3c0424e58450153 ] Recently userspace has started making more use of SW_TABLET_MODE (when an input-dev reports this). Specifically recent GNOME3 versions will: 1. When SW_TABLET_MODE is reported and is reporting 0: 1.1 Disable accelerometer-based screen auto-rotation 1.2 Disable automatically showing the on-screen keyboard when a text-input field is focussed 2. When SW_TABLET_MODE is reported and is reporting 1: 2.1 Ignore input-events from the builtin keyboard and touchpad (this is for 360° hinges style 2-in-1s where the keyboard and touchpads are accessible on the back of the tablet when folded into tablet-mode) This means that claiming to support SW_TABLET_MODE when it does not actually work / reports correct values has bad side-effects. The check in the hp-wmi code which is used to decide if the input-dev should claim SW_TABLET_MODE support, only checks if the HPWMI_HARDWARE_QUERY is supported. It does *not* check if the hardware actually is capable of reporting SW_TABLET_MODE. This leads to the hp-wmi input-dev claiming SW_TABLET_MODE support, while in reality it will always report 0 as SW_TABLET_MODE value. This has been seen on a "HP ENVY x360 Convertible 15-cp0xxx" and this likely is the case on a whole lot of other HP models. This problem causes both auto-rotation and on-screen keyboard support to not work on affected x360 models. There is no easy fix for this, but since userspace expects SW_TABLET_MODE reporting to be reliable when advertised it is better to not claim/report SW_TABLET_MODE support at all, then to claim to support it while it does not work. To avoid the mentioned problems, add a new enable_tablet_mode_sw module-parameter which defaults to false. Note I've made this an int using the standard -1=auto, 0=off, 1=on triplett, with the hope that in the future we can come up with a better way to detect SW_TABLET_MODE support. ATM the default auto option just does the same as off. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1918255 Cc: Stefan Brüns Signed-off-by: Hans de Goede Acked-by: Mark Gross Link: https://lore.kernel.org/r/20210120124941.73409-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/hp-wmi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 18bf8aeb5f87..e94e59283ecb 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -32,6 +32,10 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C"); MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); +static int enable_tablet_mode_sw = -1; +module_param(enable_tablet_mode_sw, int, 0444); +MODULE_PARM_DESC(enable_tablet_mode_sw, "Enable SW_TABLET_MODE reporting (-1=auto, 0=no, 1=yes)"); + #define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" #define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4" @@ -654,10 +658,12 @@ static int __init hp_wmi_input_setup(void) } /* Tablet mode */ - val = hp_wmi_hw_state(HPWMI_TABLET_MASK); - if (!(val < 0)) { - __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); - input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); + if (enable_tablet_mode_sw > 0) { + val = hp_wmi_hw_state(HPWMI_TABLET_MASK); + if (val >= 0) { + __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); + input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); + } } err = sparse_keymap_setup(hp_wmi_input_dev, hp_wmi_keymap, NULL); From f66fa5ec47e0d3151061922b99046e294297313f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 20 Jan 2021 23:41:39 +0000 Subject: [PATCH 023/103] arm64: dts: rockchip: Disable display for NanoPi R2S [ Upstream commit 74532de460ec664e5a725507d1b59aa9e4d40776 ] NanoPi R2S is headless, so rightly does not enable any of the display interface hardware, which currently provokes an obnoxious error in the boot log from the fake DRM device failing to find anything to bind to. It probably isn't *too* hard to obviate the fake device shenanigans entirely with a bit of driver reshuffling, but for now let's just disable it here to shut up the spurious error. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/c4553dfad1ad6792c4f22454c135ff55de77e2d6.1611186099.git.robin.murphy@arm.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts index 2ee07d15a6e3..1eecad724f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts @@ -114,6 +114,10 @@ cpu-supply = <&vdd_arm>; }; +&display_subsystem { + status = "disabled"; +}; + &gmac2io { assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clk>, <&gmac_clk>; From cbb9404a50521cdee71346d162b72d3cde270b9e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 28 Jan 2021 10:22:48 +0100 Subject: [PATCH 024/103] ovl: perform vfs_getxattr() with mounter creds [ Upstream commit 554677b97257b0b69378bd74e521edb7e94769ff ] The vfs_getxattr() in ovl_xattr_set() is used to check whether an xattr exist on a lower layer file that is to be removed. If the xattr does not exist, then no need to copy up the file. This call of vfs_getxattr() wasn't wrapped in credential override, and this is probably okay. But for consitency wrap this instance as well. Reported-by: "Eric W. Biederman" Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b584dca845ba..4fadafd8bdc1 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -346,7 +346,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, goto out; if (!value && !upperdentry) { + old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getxattr(realdentry, name, NULL, 0); + revert_creds(old_cred); if (err < 0) goto out_drop_write; } From 02dee03d48314b77a3c60091aa10fa721c3d1edd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 28 Jan 2021 10:22:48 +0100 Subject: [PATCH 025/103] cap: fix conversions on getxattr [ Upstream commit f2b00be488730522d0fb7a8a5de663febdcefe0a ] If a capability is stored on disk in v2 format cap_inode_getsecurity() will currently return in v2 format unconditionally. This is wrong: v2 cap should be equivalent to a v3 cap with zero rootid, and so the same conversions performed on it. If the rootid cannot be mapped, v3 is returned unconverted. Fix this so that both v2 and v3 return -EOVERFLOW if the rootid (or the owner of the fs user namespace in case of v2) cannot be mapped into the current user namespace. Signed-off-by: Miklos Szeredi Acked-by: "Eric W. Biederman" Signed-off-by: Sasha Levin --- security/commoncap.c | 67 ++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 59bf3c1674c8..a6c9bb4441d5 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -371,10 +371,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, { int size, ret; kuid_t kroot; + u32 nsmagic, magic; uid_t root, mappedroot; char *tmpbuf = NULL; struct vfs_cap_data *cap; - struct vfs_ns_cap_data *nscap; + struct vfs_ns_cap_data *nscap = NULL; struct dentry *dentry; struct user_namespace *fs_ns; @@ -396,46 +397,61 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; if (is_v2header((size_t) ret, cap)) { - /* If this is sizeof(vfs_cap_data) then we're ok with the - * on-disk value, so return that. */ - if (alloc) - *buffer = tmpbuf; - else - kfree(tmpbuf); - return ret; - } else if (!is_v3header((size_t) ret, cap)) { - kfree(tmpbuf); - return -EINVAL; + root = 0; + } else if (is_v3header((size_t) ret, cap)) { + nscap = (struct vfs_ns_cap_data *) tmpbuf; + root = le32_to_cpu(nscap->rootid); + } else { + size = -EINVAL; + goto out_free; } - nscap = (struct vfs_ns_cap_data *) tmpbuf; - root = le32_to_cpu(nscap->rootid); kroot = make_kuid(fs_ns, root); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ mappedroot = from_kuid(current_user_ns(), kroot); if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { + size = sizeof(struct vfs_ns_cap_data); if (alloc) { - *buffer = tmpbuf; + if (!nscap) { + /* v2 -> v3 conversion */ + nscap = kzalloc(size, GFP_ATOMIC); + if (!nscap) { + size = -ENOMEM; + goto out_free; + } + nsmagic = VFS_CAP_REVISION_3; + magic = le32_to_cpu(cap->magic_etc); + if (magic & VFS_CAP_FLAGS_EFFECTIVE) + nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; + memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + nscap->magic_etc = cpu_to_le32(nsmagic); + } else { + /* use allocated v3 buffer */ + tmpbuf = NULL; + } nscap->rootid = cpu_to_le32(mappedroot); - } else - kfree(tmpbuf); - return size; + *buffer = nscap; + } + goto out_free; } if (!rootid_owns_currentns(kroot)) { - kfree(tmpbuf); - return -EOPNOTSUPP; + size = -EOVERFLOW; + goto out_free; } /* This comes from a parent namespace. Return as a v2 capability */ size = sizeof(struct vfs_cap_data); if (alloc) { - *buffer = kmalloc(size, GFP_ATOMIC); - if (*buffer) { - struct vfs_cap_data *cap = *buffer; - __le32 nsmagic, magic; + if (nscap) { + /* v3 -> v2 conversion */ + cap = kzalloc(size, GFP_ATOMIC); + if (!cap) { + size = -ENOMEM; + goto out_free; + } magic = VFS_CAP_REVISION_2; nsmagic = le32_to_cpu(nscap->magic_etc); if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) @@ -443,9 +459,12 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); } else { - size = -ENOMEM; + /* use unconverted v2 */ + tmpbuf = NULL; } + *buffer = cap; } +out_free: kfree(tmpbuf); return size; } From 116826d615c1936dff40a7766374733078e43803 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Dec 2020 12:16:08 +0200 Subject: [PATCH 026/103] ovl: skip getxattr of security labels [ Upstream commit 03fedf93593c82538b18476d8c4f0e8f8435ea70 ] When inode has no listxattr op of its own (e.g. squashfs) vfs_listxattr calls the LSM inode_listsecurity hooks to list the xattrs that LSMs will intercept in inode_getxattr hooks. When selinux LSM is installed but not initialized, it will list the security.selinux xattr in inode_listsecurity, but will not intercept it in inode_getxattr. This results in -ENODATA for a getxattr call for an xattr returned by listxattr. This situation was manifested as overlayfs failure to copy up lower files from squashfs when selinux is built-in but not initialized, because ovl_copy_xattr() iterates the lower inode xattrs by vfs_listxattr() and vfs_getxattr(). ovl_copy_xattr() skips copy up of security labels that are indentified by inode_copy_up_xattr LSM hooks, but it does that after vfs_getxattr(). Since we are not going to copy them, skip vfs_getxattr() of the security labels. Reported-by: Michael Labriola Tested-by: Michael Labriola Link: https://lore.kernel.org/linux-unionfs/2nv9d47zt7.fsf@aldarion.sourceruckus.org/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/copy_up.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 955ecd4030f0..89d5d59c7d7a 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -84,6 +84,14 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, if (ovl_is_private_xattr(sb, name)) continue; + + error = security_inode_copy_up_xattr(name); + if (error < 0 && error != -EOPNOTSUPP) + break; + if (error == 1) { + error = 0; + continue; /* Discard */ + } retry: size = vfs_getxattr(old, name, value, value_size); if (size == -ERANGE) @@ -107,13 +115,6 @@ retry: goto retry; } - error = security_inode_copy_up_xattr(name); - if (error < 0 && error != -EOPNOTSUPP) - break; - if (error == 1) { - error = 0; - continue; /* Discard */ - } error = vfs_setxattr(new, name, value, size, 0); if (error) { if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name)) From 020680e36d9718acc8205cc72ed5e9ecb09386ab Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 27 Jan 2021 14:16:01 -0800 Subject: [PATCH 027/103] scsi: lpfc: Fix EEH encountering oops with NVMe traffic [ Upstream commit 8c65830ae1629b03e5d65e9aafae7e2cf5f8b743 ] In testing, in a configuration with Redfish and native NVMe multipath when an EEH is injected, a kernel oops is being encountered: (unreliable) lpfc_nvme_ls_req+0x328/0x720 [lpfc] __nvme_fc_send_ls_req.constprop.13+0x1d8/0x3d0 [nvme_fc] nvme_fc_create_association+0x224/0xd10 [nvme_fc] nvme_fc_reset_ctrl_work+0x110/0x154 [nvme_fc] process_one_work+0x304/0x5d the NBMe transport is issuing a Disconnect LS request, which the driver receives and tries to post but the work queue used by the driver is already being torn down by the eeh. Fix by validating the validity of the work queue before proceeding with the LS transmit. Link: https://lore.kernel.org/r/20210127221601.84878-1-jsmart2021@gmail.com Reviewed-by: Ewan D. Milne Signed-off-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_nvme.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 69f1a0457f51..03c81cec6bc9 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -714,6 +714,9 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, return -ENODEV; } + if (!vport->phba->sli4_hba.nvmels_wq) + return -ENOMEM; + /* * there are two dma buf in the request, actually there is one and * the second one is just the start address + cmd size. From 3895bcd96a9e8e1d3103e77f293dd83c7b52ba8e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 1 Feb 2021 19:00:07 +0000 Subject: [PATCH 028/103] x86/split_lock: Enable the split lock feature on another Alder Lake CPU [ Upstream commit 8acf417805a5f5c69e9ff66f14cab022c2755161 ] Add Alder Lake mobile processor to CPU list to enumerate and enable the split lock feature. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Link: https://lkml.kernel.org/r/20210201190007.4031869-1-fenghua.yu@intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 59a1e3ce3f14..816fdbec795a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), {} }; From 07c8c581d2be28ba988c6f772a26b6de129fb7a8 Mon Sep 17 00:00:00 2001 From: Claus Stovgaard Date: Mon, 1 Feb 2021 22:08:22 +0100 Subject: [PATCH 029/103] nvme-pci: ignore the subsysem NQN on Phison E16 [ Upstream commit c9e95c39280530200cdd0bbd2670e6334a81970b ] Tested both with Corsairs firmware 11.3 and 13.0 for the Corsairs MP600 and both have the issue as reported by the kernel. nvme nvme0: missing or invalid SUBNQN field. Signed-off-by: Claus Stovgaard Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a32494cde61f..4a33287371bd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3247,6 +3247,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ From 7963f3ff8eddea42c3510a5cde0250e8cce017d6 Mon Sep 17 00:00:00 2001 From: George Shen Date: Tue, 22 Dec 2020 14:05:41 -0500 Subject: [PATCH 030/103] drm/amd/display: Fix DPCD translation for LTTPR AUX_RD_INTERVAL [ Upstream commit 2b6b7ab4b1cabfbee1af5d818efcab5d51d62c7e ] [Why] The translation between the DPCD value and the specified AUX_RD_INTERVAL in the DP spec do not match. [How] Update values to match the spec. Signed-off-by: George Shen Reviewed-by: Wenjing Liu Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 17e6fd820139..32b73ea86673 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -877,14 +877,14 @@ static uint32_t translate_training_aux_read_interval(uint32_t dpcd_aux_read_inte switch (dpcd_aux_read_interval) { case 0x01: - aux_rd_interval_us = 400; - break; - case 0x02: aux_rd_interval_us = 4000; break; - case 0x03: + case 0x02: aux_rd_interval_us = 8000; break; + case 0x03: + aux_rd_interval_us = 12000; + break; case 0x04: aux_rd_interval_us = 16000; break; From 4d9a5224d507f609f940e24b499bc34801b4ae23 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Fri, 15 Jan 2021 13:53:15 -0500 Subject: [PATCH 031/103] drm/amd/display: Add more Clock Sources to DCN2.1 [ Upstream commit 1622711beebe887e4f0f8237fea1f09bb48e9a51 ] [WHY] When enabling HDMI on ComboPHY, there are not enough clock sources to complete display detection. [HOW] Initialize more clock sources. Signed-off-by: Sung Lee Reviewed-by: Tony Cheng Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 20441127783b..c99385440412 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -902,6 +902,8 @@ enum dcn20_clk_src_array_id { DCN20_CLK_SRC_PLL0, DCN20_CLK_SRC_PLL1, DCN20_CLK_SRC_PLL2, + DCN20_CLK_SRC_PLL3, + DCN20_CLK_SRC_PLL4, DCN20_CLK_SRC_TOTAL_DCN21 }; @@ -1880,6 +1882,14 @@ static bool dcn21_resource_construct( dcn21_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; From b8dc6255bb2d08ff5f57ddfb4ff0f85fd6433a9f Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Thu, 14 Jan 2021 11:48:57 -0500 Subject: [PATCH 032/103] drm/amd/display: Release DSC before acquiring [ Upstream commit 58180a0cc0c57fe62a799a112f95b60f6935bd96 ] [why] Need to unassign DSC from pipes that are not using it so other pipes can acquire it. That is needed for asic's that have unmatching number of DSC engines from the number of pipes. [how] Before acquiring dsc to stream resources, first remove it. Signed-off-by: Mikita Lipski Reviewed-by: Eryk Brol Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index eee19edeeee5..1e448f1b39a1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -828,6 +828,9 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (computed_streams[i]) continue; + if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) + return false; + mutex_lock(&aconnector->mst_mgr.lock); if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link)) { mutex_unlock(&aconnector->mst_mgr.lock); @@ -845,7 +848,8 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, stream = dc_state->streams[i]; if (stream->timing.flags.DSC == 1) - dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream); + if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) + return false; } return true; From 1898affc717e2d5a0acf9f873136992e41e52e26 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 14 Jan 2021 22:24:14 -0500 Subject: [PATCH 033/103] drm/amd/display: Fix dc_sink kref count in emulated_link_detect [ Upstream commit 3ddc818d9bb877c64f5c649beab97af86c403702 ] [why] prev_sink is not used anywhere else in the function and the reference to it from dc_link is replaced with a new dc_sink. [how] Change dc_sink_retain(prev_sink) to dc_sink_release(prev_sink). Signed-off-by: Victor Lu Reviewed-by: Nicholas Kazlauskas Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 580880212e55..e5e05a6cb62a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1792,8 +1792,8 @@ static void emulated_link_detect(struct dc_link *link) link->type = dc_connection_none; prev_sink = link->local_sink; - if (prev_sink != NULL) - dc_sink_retain(prev_sink); + if (prev_sink) + dc_sink_release(prev_sink); switch (link->connector_signal) { case SIGNAL_TYPE_HDMI_TYPE_A: { From 8466a0c6bc13db5d8dadb87521af965a0b0a7798 Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Thu, 14 Jan 2021 16:27:07 -0500 Subject: [PATCH 034/103] drm/amd/display: Free atomic state after drm_atomic_commit [ Upstream commit 2abaa323d744011982b20b8f3886184d56d23946 ] [why] drm_atomic_commit was changed so that the caller must free their drm_atomic_state reference on successes. [how] Add drm_atomic_commit_put after drm_atomic_commit call in dm_force_atomic_commit. Signed-off-by: Victor Lu Reviewed-by: Roman Li Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e5e05a6cb62a..321df20fcdb9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7870,14 +7870,14 @@ static int dm_force_atomic_commit(struct drm_connector *connector) ret = PTR_ERR_OR_ZERO(conn_state); if (ret) - goto err; + goto out; /* Attach crtc to drm_atomic_state*/ crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base); ret = PTR_ERR_OR_ZERO(crtc_state); if (ret) - goto err; + goto out; /* force a restore */ crtc_state->mode_changed = true; @@ -7887,17 +7887,15 @@ static int dm_force_atomic_commit(struct drm_connector *connector) ret = PTR_ERR_OR_ZERO(plane_state); if (ret) - goto err; - + goto out; /* Call commit internally with the state we just constructed */ ret = drm_atomic_commit(state); - if (!ret) - return 0; -err: - DRM_ERROR("Restoring old state failed with %i\n", ret); +out: drm_atomic_state_put(state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); return ret; } From cd0604db18d22bc39dd8349d6b7ed2f8c79a627c Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Fri, 15 Jan 2021 11:02:48 -0500 Subject: [PATCH 035/103] drm/amd/display: Decrement refcount of dc_sink before reassignment [ Upstream commit 8e92bb0fa75bca9a57e4aba2e36f67d8016a3053 ] [why] An old dc_sink state is causing a memory leak because it is missing a dc_sink_release before a new dc_sink is assigned back to aconnector->dc_sink. [how] Decrement the dc_sink refcount before reassigning it to a new dc_sink. Signed-off-by: Victor Lu Reviewed-by: Rodrigo Siqueira Acked-by: Anson Jacob Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 321df20fcdb9..fdca76fc598c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2261,8 +2261,10 @@ void amdgpu_dm_update_connector_after_detect( * TODO: check if we still need the S3 mode update workaround. * If yes, put it here. */ - if (aconnector->dc_sink) + if (aconnector->dc_sink) { amdgpu_dm_update_freesync_caps(connector, NULL); + dc_sink_release(aconnector->dc_sink); + } aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); From 4fccb50ec95b834c02099ed8e18c7e0ca6f6f923 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 29 Jan 2021 12:31:05 -0500 Subject: [PATCH 036/103] riscv: virt_addr_valid must check the address belongs to linear mapping [ Upstream commit 2ab543823322b564f205cb15d0f0302803c87d11 ] virt_addr_valid macro checks that a virtual address is valid, ie that the address belongs to the linear mapping and that the corresponding physical page exists. Add the missing check that ensures the virtual address belongs to the linear mapping, otherwise __virt_to_phys, when compiled with CONFIG_DEBUG_VIRTUAL enabled, raises a WARN that is interpreted as a kernel bug by syzbot. Signed-off-by: Alexandre Ghiti Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/page.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 2d50f76efe48..64a675c5c30a 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -135,7 +135,10 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #endif /* __ASSEMBLY__ */ -#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + (unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \ +}) #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC From d93178df8f754b8ae5b5c804edcd6d4b64aad5a7 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Tue, 2 Feb 2021 07:18:23 -0700 Subject: [PATCH 037/103] bfq-iosched: Revert "bfq: Fix computation of shallow depth" [ Upstream commit 388c705b95f23f317fa43e6abf9ff07b583b721a ] This reverts commit 6d4d273588378c65915acaf7b2ee74e9dd9c130a. bfq.limit_depth passes word_depths[] as shallow_depth down to sbitmap core sbitmap_get_shallow, which uses just the number to limit the scan depth of each bitmap word, formula: scan_percentage_for_each_word = shallow_depth / (1 << sbimap->shift) * 100% That means the comments's percentiles 50%, 75%, 18%, 37% of bfq are correct. But after commit patch 'bfq: Fix computation of shallow depth', we use sbitmap.depth instead, as a example in following case: sbitmap.depth = 256, map_nr = 4, shift = 6; sbitmap_word.depth = 64. The resulsts of computed bfqd->word_depths[] are {128, 192, 48, 96}, and three of the numbers exceed core dirver's 'sbitmap_word.depth=64' limit nothing. Signed-off-by: Lin Feng Reviewed-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e4eb0fc1c16..9e81d1052091 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); + bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) From 11648f26b074758736a8663bde5ff94f2866e671 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 3 Feb 2021 10:03:20 +0100 Subject: [PATCH 038/103] ARM: dts: lpc32xx: Revert set default clock rate of HCLK PLL [ Upstream commit 5638159f6d93b99ec9743ac7f65563fca3cf413d ] This reverts commit c17e9377aa81664d94b4f2102559fcf2a01ec8e7. The lpc32xx clock driver is not able to actually change the PLL rate as this would require reparenting ARM_CLK, DDRAM_CLK, PERIPH_CLK to SYSCLK, then stop the PLL, update the register, restart the PLL and wait for the PLL to lock and finally reparent ARM_CLK, DDRAM_CLK, PERIPH_CLK to HCLK PLL. Currently, the HCLK driver simply updates the registers but this has no real effect and all the clock rate calculation end up being wrong. This is especially annoying for the peripheral (e.g. UARTs, I2C, SPI). Signed-off-by: Alexandre Belloni Tested-by: Gregory CLEMENT Link: https://lore.kernel.org/r/20210203090320.GA3760268@piout.net' Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/boot/dts/lpc32xx.dtsi | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 7b7ec7b1217b..824393e1bcfb 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -329,9 +329,6 @@ clocks = <&xtal_32k>, <&xtal>; clock-names = "xtal_32k", "xtal"; - - assigned-clocks = <&clk LPC32XX_CLK_HCLK_PLL>; - assigned-clock-rates = <208000000>; }; }; From 3dc2ba46500124ac350f9cd1a378e799f8f7fc88 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 16:29:47 +0100 Subject: [PATCH 039/103] kallsyms: fix nonconverging kallsyms table with lld [ Upstream commit efe6e3068067212b85c2d0474b5ee3b2d0c7adab ] ARM randconfig builds with lld sometimes show a build failure from kallsyms: Inconsistent kallsyms data Try make KALLSYMS_EXTRA_PASS=1 as a workaround The problem is the veneers/thunks getting added by the linker extend the symbol table, which in turn leads to more veneers being needed, so it may take a few extra iterations to converge. This bug has been fixed multiple times before, but comes back every time a new symbol name is used. lld uses a different set of identifiers from ld.bfd, so the additional ones need to be added as well. I looked through the sources and found that arm64 and mips define similar prefixes, so I'm adding those as well, aside from the ones I observed. I'm not sure about powerpc64, which seems to already be handled through a section match, but if it comes back, the "__long_branch_" and "__plt_" prefixes would have to get added as well. Signed-off-by: Arnd Bergmann Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kallsyms.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 7ecd2ccba531..54ad86d13784 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -112,6 +112,12 @@ static bool is_ignored_symbol(const char *name, char type) "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ + "__AArch64ADRPThunk_", /* arm64 lld */ + "__ARMV5PILongThunk_", /* arm lld */ + "__ARMV7PILongThunk_", + "__ThumbV7PILongThunk_", + "__LA25Thunk_", /* mips lld */ + "__microLA25Thunk_", NULL }; From 7913ec05fc02ccd7df83280451504b0a3e543097 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 29 Jan 2021 10:19:07 +0000 Subject: [PATCH 040/103] ARM: ensure the signal page contains defined contents [ Upstream commit 9c698bff66ab4914bb3d71da7dc6112519bde23e ] Ensure that the signal page contains our poison instruction to increase the protection against ROP attacks and also contains well defined contents. Acked-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/signal.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 585edbfccf6d..2f81d3af5f9a 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -693,18 +693,20 @@ struct page *get_signal_page(void) addr = page_address(page); + /* Poison the entire page */ + memset32(addr, __opcode_to_mem_arm(0xe7fddef1), + PAGE_SIZE / sizeof(u32)); + /* Give the signal return code some randomness */ offset = 0x200 + (get_random_int() & 0x7fc); signal_return_offset = offset; - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ + /* Copy signal return handlers into the page */ memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); - ptr = (unsigned long)addr + offset; - flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + /* Flush out all instructions in this page */ + ptr = (unsigned long)addr; + flush_icache_range(ptr, ptr + PAGE_SIZE); return page; } From 249735b011234a65a720952d5451156be8baa50f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 1 Feb 2021 19:40:01 +0000 Subject: [PATCH 041/103] ARM: kexec: fix oops after TLB are invalidated [ Upstream commit 4d62e81b60d4025e2dfcd5ea531cc1394ce9226f ] Giancarlo Ferrari reports the following oops while trying to use kexec: Unable to handle kernel paging request at virtual address 80112f38 pgd = fd7ef03e [80112f38] *pgd=0001141e(bad) Internal error: Oops: 80d [#1] PREEMPT SMP ARM ... This is caused by machine_kexec() trying to set the kernel text to be read/write, so it can poke values into the relocation code before copying it - and an interrupt occuring which changes the page tables. The subsequent writes then hit read-only sections that trigger a data abort resulting in the above oops. Fix this by copying the relocation code, and then writing the variables into the destination, thereby avoiding the need to make the kernel text read/write. Reported-by: Giancarlo Ferrari Tested-by: Giancarlo Ferrari Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/include/asm/kexec-internal.h | 12 +++++++++ arch/arm/kernel/asm-offsets.c | 5 ++++ arch/arm/kernel/machine_kexec.c | 20 ++++++-------- arch/arm/kernel/relocate_kernel.S | 38 ++++++++------------------- 4 files changed, 36 insertions(+), 39 deletions(-) create mode 100644 arch/arm/include/asm/kexec-internal.h diff --git a/arch/arm/include/asm/kexec-internal.h b/arch/arm/include/asm/kexec-internal.h new file mode 100644 index 000000000000..ecc2322db7aa --- /dev/null +++ b/arch/arm/include/asm/kexec-internal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARM_KEXEC_INTERNAL_H +#define _ARM_KEXEC_INTERNAL_H + +struct kexec_relocate_data { + unsigned long kexec_start_address; + unsigned long kexec_indirection_page; + unsigned long kexec_mach_type; + unsigned long kexec_r2; +}; + +#endif diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a1570c8bab25..be8050b0c3df 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -170,5 +171,9 @@ int main(void) DEFINE(MPU_RGN_PRBAR, offsetof(struct mpu_rgn, prbar)); DEFINE(MPU_RGN_PRLAR, offsetof(struct mpu_rgn, prlar)); #endif + DEFINE(KEXEC_START_ADDR, offsetof(struct kexec_relocate_data, kexec_start_address)); + DEFINE(KEXEC_INDIR_PAGE, offsetof(struct kexec_relocate_data, kexec_indirection_page)); + DEFINE(KEXEC_MACH_TYPE, offsetof(struct kexec_relocate_data, kexec_mach_type)); + DEFINE(KEXEC_R2, offsetof(struct kexec_relocate_data, kexec_r2)); return 0; } diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 5d84ad333f05..2b09dad7935e 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -22,11 +23,6 @@ extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; -extern unsigned long kexec_start_address; -extern unsigned long kexec_indirection_page; -extern unsigned long kexec_mach_type; -extern unsigned long kexec_boot_atags; - static atomic_t waiting_for_crash_ipi; /* @@ -159,6 +155,7 @@ void (*kexec_reinit)(void); void machine_kexec(struct kimage *image) { unsigned long page_list, reboot_entry_phys; + struct kexec_relocate_data *data; void (*reboot_entry)(void); void *reboot_code_buffer; @@ -174,18 +171,17 @@ void machine_kexec(struct kimage *image) reboot_code_buffer = page_address(image->control_code_page); - /* Prepare parameters for reboot_code_buffer*/ - set_kernel_text_rw(); - kexec_start_address = image->start; - kexec_indirection_page = page_list; - kexec_mach_type = machine_arch_type; - kexec_boot_atags = image->arch.kernel_r2; - /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, &relocate_new_kernel, relocate_new_kernel_size); + data = reboot_code_buffer + relocate_new_kernel_size; + data->kexec_start_address = image->start; + data->kexec_indirection_page = page_list; + data->kexec_mach_type = machine_arch_type; + data->kexec_r2 = image->arch.kernel_r2; + /* get the identity mapping physical address for the reboot code */ reboot_entry_phys = virt_to_idmap(reboot_entry); diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index 72a08786e16e..218d524360fc 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -5,14 +5,16 @@ #include #include +#include #include .align 3 /* not needed for this code, but keeps fncpy() happy */ ENTRY(relocate_new_kernel) - ldr r0,kexec_indirection_page - ldr r1,kexec_start_address + adr r7, relocate_new_kernel_end + ldr r0, [r7, #KEXEC_INDIR_PAGE] + ldr r1, [r7, #KEXEC_START_ADDR] /* * If there is no indirection page (we are doing crashdumps) @@ -57,34 +59,16 @@ ENTRY(relocate_new_kernel) 2: /* Jump to relocated kernel */ - mov lr,r1 - mov r0,#0 - ldr r1,kexec_mach_type - ldr r2,kexec_boot_atags - ARM( ret lr ) - THUMB( bx lr ) - - .align - - .globl kexec_start_address -kexec_start_address: - .long 0x0 - - .globl kexec_indirection_page -kexec_indirection_page: - .long 0x0 - - .globl kexec_mach_type -kexec_mach_type: - .long 0x0 - - /* phy addr of the atags for the new kernel */ - .globl kexec_boot_atags -kexec_boot_atags: - .long 0x0 + mov lr, r1 + mov r0, #0 + ldr r1, [r7, #KEXEC_MACH_TYPE] + ldr r2, [r7, #KEXEC_R2] + ARM( ret lr ) + THUMB( bx lr ) ENDPROC(relocate_new_kernel) + .align 3 relocate_new_kernel_end: .globl relocate_new_kernel_size From cf1cab6edb06b223520bebfca4e351256000b816 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 4 Feb 2021 18:32:57 -0800 Subject: [PATCH 042/103] ubsan: implement __ubsan_handle_alignment_assumption [ Upstream commit 28abcc963149e06d956d95a18a85f4ba26af746f ] When building ARCH=mips 32r2el_defconfig with CONFIG_UBSAN_ALIGNMENT: ld.lld: error: undefined symbol: __ubsan_handle_alignment_assumption referenced by slab.h:557 (include/linux/slab.h:557) main.o:(do_initcalls) in archive init/built-in.a referenced by slab.h:448 (include/linux/slab.h:448) do_mounts_rd.o:(rd_load_image) in archive init/built-in.a referenced by slab.h:448 (include/linux/slab.h:448) do_mounts_rd.o:(identify_ramdisk_image) in archive init/built-in.a referenced 1579 more times Implement this for the kernel based on LLVM's handleAlignmentAssumptionImpl because the kernel is not linked against the compiler runtime. Link: https://github.com/ClangBuiltLinux/linux/issues/1245 Link: https://github.com/llvm/llvm-project/blob/llvmorg-11.0.1/compiler-rt/lib/ubsan/ubsan_handlers.cpp#L151-L190 Link: https://lkml.kernel.org/r/20210127224451.2587372-1-nathan@kernel.org Signed-off-by: Nathan Chancellor Acked-by: Kees Cook Reviewed-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/ubsan.c | 31 +++++++++++++++++++++++++++++++ lib/ubsan.h | 6 ++++++ 2 files changed, 37 insertions(+) diff --git a/lib/ubsan.c b/lib/ubsan.c index cb9af3f6b77e..adf8dcf3c84e 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -427,3 +427,34 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); + +void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr, + unsigned long align, + unsigned long offset); +void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr, + unsigned long align, + unsigned long offset) +{ + struct alignment_assumption_data *data = _data; + unsigned long real_ptr; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, "alignment-assumption"); + + if (offset) + pr_err("assumption of %lu byte alignment (with offset of %lu byte) for pointer of type %s failed", + align, offset, data->type->type_name); + else + pr_err("assumption of %lu byte alignment for pointer of type %s failed", + align, data->type->type_name); + + real_ptr = ptr - offset; + pr_err("%saddress is %lu aligned, misalignment offset is %lu bytes", + offset ? "offset " : "", BIT(real_ptr ? __ffs(real_ptr) : 0), + real_ptr & (align - 1)); + + ubsan_epilogue(); +} +EXPORT_SYMBOL(__ubsan_handle_alignment_assumption); diff --git a/lib/ubsan.h b/lib/ubsan.h index 7b56c09473a9..9a0b71c5ff9f 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -78,6 +78,12 @@ struct invalid_value_data { struct type_descriptor *type; }; +struct alignment_assumption_data { + struct source_location location; + struct source_location assumption_location; + struct type_descriptor *type; +}; + #if defined(CONFIG_ARCH_SUPPORTS_INT128) typedef __int128 s_max; typedef unsigned __int128 u_max; From 486c1525eba3b0d2ec8b7f621ebf213d7f552f88 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Feb 2021 23:28:29 +0100 Subject: [PATCH 043/103] Revert "lib: Restrict cpumask_local_spread to houskeeping CPUs" [ Upstream commit 2452483d9546de1c540f330469dc4042ff089731 ] This reverts commit 1abdfe706a579a702799fce465bceb9fb01d407c. This change is broken and not solving any problem it claims to solve. Robin reported that cpumask_local_spread() now returns any cpu out of cpu_possible_mask in case that NOHZ_FULL is disabled (runtime or compile time). It can also return any offline or not-present CPU in the housekeeping mask. Before that it was returning a CPU out of online_cpu_mask. While the function is racy against CPU hotplug if the caller does not protect against it, the actual use cases are not caring much about it as they use it mostly as hint for: - the user space affinity hint which is unused by the kernel - memory node selection which is just suboptimal - network queue affinity which might fail but is handled gracefully But the occasional fail vs. hotplug is very different from returning anything from possible_cpu_mask which can have a large amount of offline CPUs obviously. The changelog of the commit claims: "The current implementation of cpumask_local_spread() does not respect the isolated CPUs, i.e., even if a CPU has been isolated for Real-Time task, it will return it to the caller for pinning of its IRQ threads. Having these unwanted IRQ threads on an isolated CPU adds up to a latency overhead." The only correct part of this changelog is: "The current implementation of cpumask_local_spread() does not respect the isolated CPUs." Everything else is just disjunct from reality. Reported-by: Robin Murphy Signed-off-by: Thomas Gleixner Cc: Nitesh Narayan Lal Cc: Marcelo Tosatti Cc: abelits@marvell.com Cc: davem@davemloft.net Link: https://lore.kernel.org/r/87y2g26tnt.fsf@nanos.tec.linutronix.de Signed-off-by: Sasha Levin --- lib/cpumask.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/lib/cpumask.c b/lib/cpumask.c index 85da6ab4fbb5..fb22fb266f93 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -6,7 +6,6 @@ #include #include #include -#include /** * cpumask_next - get the next cpu in a cpumask @@ -206,27 +205,22 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) */ unsigned int cpumask_local_spread(unsigned int i, int node) { - int cpu, hk_flags; - const struct cpumask *mask; + int cpu; - hk_flags = HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ; - mask = housekeeping_cpumask(hk_flags); /* Wrap: we always want a cpu. */ - i %= cpumask_weight(mask); + i %= num_online_cpus(); if (node == NUMA_NO_NODE) { - for_each_cpu(cpu, mask) { + for_each_cpu(cpu, cpu_online_mask) if (i-- == 0) return cpu; - } } else { /* NUMA first. */ - for_each_cpu_and(cpu, cpumask_of_node(node), mask) { + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) if (i-- == 0) return cpu; - } - for_each_cpu(cpu, mask) { + for_each_cpu(cpu, cpu_online_mask) { /* Skip NUMA nodes, done above. */ if (cpumask_test_cpu(cpu, cpumask_of_node(node))) continue; From d070ccc7ad17118a1e46988b0fa097e55e3fab01 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Feb 2021 11:31:31 +0100 Subject: [PATCH 044/103] x86/efi: Remove EFI PGD build time checks [ Upstream commit 816ef8d7a2c4182e19bc06ab65751cb9e3951e94 ] With CONFIG_X86_5LEVEL, CONFIG_UBSAN and CONFIG_UBSAN_UNSIGNED_OVERFLOW enabled, clang fails the build with x86_64-linux-ld: arch/x86/platform/efi/efi_64.o: in function `efi_sync_low_kernel_mappings': efi_64.c:(.text+0x22c): undefined reference to `__compiletime_assert_354' which happens due to -fsanitize=unsigned-integer-overflow being enabled: -fsanitize=unsigned-integer-overflow: Unsigned integer overflow, where the result of an unsigned integer computation cannot be represented in its type. Unlike signed integer overflow, this is not undefined behavior, but it is often unintentional. This sanitizer does not check for lossy implicit conversions performed before such a computation (see -fsanitize=implicit-conversion). and that fires when the (intentional) EFI_VA_START/END defines overflow an unsigned long, leading to the assertion expressions not getting optimized away (on GCC they do)... However, those checks are superfluous: the runtime services mapping code already makes sure the ranges don't overshoot EFI_VA_END as the EFI mapping range is hardcoded. On each runtime services call, it is switched to the EFI-specific PGD and even if mappings manage to escape that last PGD, this won't remain unnoticed for long. So rip them out. See https://github.com/ClangBuiltLinux/linux/issues/256 for more info. Reported-by: Arnd Bergmann Signed-off-by: Borislav Petkov Reviewed-by: Nathan Chancellor Acked-by: Ard Biesheuvel Tested-by: Nick Desaulniers Tested-by: Nathan Chancellor Link: http://lkml.kernel.org/r/20210107223424.4135538-1-arnd@kernel.org Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi_64.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e1e8d4e3a213..8efd003540ca 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -115,31 +115,12 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - /* - * We can share all PGD entries apart from the one entry that - * covers the EFI runtime mapping space. - * - * Make sure the EFI runtime region mappings are guaranteed to - * only span a single PGD entry and that the entry also maps - * other important kernel regions. - */ - MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); - MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != - (EFI_VA_END & PGDIR_MASK)); - pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_k = pgd_offset_k(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - /* - * As with PGDs, we share all P4D entries apart from the one entry - * that covers the EFI runtime mapping space. - */ - BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END)); - BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK)); - pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pgd_k = pgd_offset_k(EFI_VA_END); p4d_efi = p4d_offset(pgd_efi, 0); From e624efe36eb6a111129f12c5c33d5eaa901a67ef Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 7 Dec 2020 17:05:33 +0000 Subject: [PATCH 045/103] lkdtm: don't move ctors to .rodata commit 3f618ab3323407ee4c6a6734a37eb6e9663ebfb9 upstream. When building with KASAN and LKDTM, clang may implictly generate an asan.module_ctor function in the LKDTM rodata object. The Makefile moves the lkdtm_rodata_do_nothing() function into .rodata by renaming the file's .text section to .rodata, and consequently also moves the ctor function into .rodata, leading to a boot time crash (splat below) when the ctor is invoked by do_ctors(). Let's prevent this by marking the function as noinstr rather than notrace, and renaming the file's .noinstr.text to .rodata. Marking the function as noinstr will prevent tracing and kprobes, and will inhibit any undesireable compiler instrumentation. The ctor function (if any) will be placed in .text and will work correctly. Example splat before this patch is applied: [ 0.916359] Unable to handle kernel execute from non-executable memory at virtual address ffffa0006b60f5ac [ 0.922088] Mem abort info: [ 0.922828] ESR = 0x8600000e [ 0.923635] EC = 0x21: IABT (current EL), IL = 32 bits [ 0.925036] SET = 0, FnV = 0 [ 0.925838] EA = 0, S1PTW = 0 [ 0.926714] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000427b3000 [ 0.928489] [ffffa0006b60f5ac] pgd=000000023ffff003, p4d=000000023ffff003, pud=000000023fffe003, pmd=0068000042000f01 [ 0.931330] Internal error: Oops: 8600000e [#1] PREEMPT SMP [ 0.932806] Modules linked in: [ 0.933617] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.0-rc7 #2 [ 0.935620] Hardware name: linux,dummy-virt (DT) [ 0.936924] pstate: 40400005 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 0.938609] pc : asan.module_ctor+0x0/0x14 [ 0.939759] lr : do_basic_setup+0x4c/0x70 [ 0.940889] sp : ffff27b600177e30 [ 0.941815] x29: ffff27b600177e30 x28: 0000000000000000 [ 0.943306] x27: 0000000000000000 x26: 0000000000000000 [ 0.944803] x25: 0000000000000000 x24: 0000000000000000 [ 0.946289] x23: 0000000000000001 x22: 0000000000000000 [ 0.947777] x21: ffffa0006bf4a890 x20: ffffa0006befb6c0 [ 0.949271] x19: ffffa0006bef9358 x18: 0000000000000068 [ 0.950756] x17: fffffffffffffff8 x16: 0000000000000000 [ 0.952246] x15: 0000000000000000 x14: 0000000000000000 [ 0.953734] x13: 00000000838a16d5 x12: 0000000000000001 [ 0.955223] x11: ffff94000da74041 x10: dfffa00000000000 [ 0.956715] x9 : 0000000000000000 x8 : ffffa0006b60f5ac [ 0.958199] x7 : f9f9f9f9f9f9f9f9 x6 : 000000000000003f [ 0.959683] x5 : 0000000000000040 x4 : 0000000000000000 [ 0.961178] x3 : ffffa0006bdc15a0 x2 : 0000000000000005 [ 0.962662] x1 : 00000000000000f9 x0 : ffffa0006bef9350 [ 0.964155] Call trace: [ 0.964844] asan.module_ctor+0x0/0x14 [ 0.965895] kernel_init_freeable+0x158/0x198 [ 0.967115] kernel_init+0x14/0x19c [ 0.968104] ret_from_fork+0x10/0x30 [ 0.969110] Code: 00000003 00000000 00000000 00000000 (00000000) [ 0.970815] ---[ end trace b5339784e20d015c ]--- Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Kees Cook Acked-by: Kees Cook Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20201207170533.10738-1-mark.rutland@arm.com Cc: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lkdtm/Makefile | 2 +- drivers/misc/lkdtm/rodata.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index c70b3822013f..30c8ac24635d 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -16,7 +16,7 @@ KCOV_INSTRUMENT_rodata.o := n OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ - --rename-section .text=.rodata,alloc,readonly,load + --rename-section .noinstr.text=.rodata,alloc,readonly,load targets += rodata.o rodata_objcopy.o $(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE $(call if_changed,objcopy) diff --git a/drivers/misc/lkdtm/rodata.c b/drivers/misc/lkdtm/rodata.c index 58d180af72cf..baacb876d1d9 100644 --- a/drivers/misc/lkdtm/rodata.c +++ b/drivers/misc/lkdtm/rodata.c @@ -5,7 +5,7 @@ */ #include "lkdtm.h" -void notrace lkdtm_rodata_do_nothing(void) +void noinstr lkdtm_rodata_do_nothing(void) { /* Does nothing. We just want an architecture agnostic "return". */ } From 2aba53830f5d02dcd0bb74a00c8b8023df9d1398 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Nov 2020 08:30:38 -0500 Subject: [PATCH 046/103] KVM: x86: cleanup CR3 reserved bits checks [ Upstream commit c1c35cf78bfab31b8cb455259524395c9e4c7cd6 ] If not in long mode, the low bits of CR3 are reserved but not enforced to be zero, so remove those checks. If in long mode, however, the MBZ bits extend down to the highest physical address bit of the guest, excluding the encryption bit. Make the checks consistent with the above, and match them between nested_vmcb_checks and KVM_SET_SREGS. Cc: stable@vger.kernel.org Fixes: 761e41693465 ("KVM: nSVM: Check that MBZ bits in CR3 and CR4 are not set on vmrun of nested guests") Fixes: a780a3ea6282 ("KVM: X86: Fix reserved bits check for MOV to CR3") Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/svm/nested.c | 13 +++---------- arch/x86/kvm/svm/svm.h | 3 --- arch/x86/kvm/x86.c | 2 ++ 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 65e40acde71a..4fbe190c7915 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -231,6 +231,7 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control) static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) { + struct kvm_vcpu *vcpu = &svm->vcpu; bool vmcb12_lma; if ((vmcb12->save.efer & EFER_SVME) == 0) @@ -244,18 +245,10 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG); - if (!vmcb12_lma) { - if (vmcb12->save.cr4 & X86_CR4_PAE) { - if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK) - return false; - } else { - if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK) - return false; - } - } else { + if (vmcb12_lma) { if (!(vmcb12->save.cr4 & X86_CR4_PAE) || !(vmcb12->save.cr0 & X86_CR0_PE) || - (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) + (vmcb12->save.cr3 & vcpu->arch.cr3_lm_rsvd_bits)) return false; } if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 1d853fe4c778..be74e22b82ea 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -346,9 +346,6 @@ static inline bool gif_set(struct vcpu_svm *svm) } /* svm.c */ -#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U -#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U -#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U #define MSR_INVALID 0xffffffffU u32 svm_msrpm_offset(u32 msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 18a315bbcb79..fa5f059c2b94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9558,6 +9558,8 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) return -EINVAL; + if (sregs->cr3 & vcpu->arch.cr3_lm_rsvd_bits) + return -EINVAL; } else { /* * Not in 64-bit mode: EFER.LMA is clear and the code From 3e53d64e9a4d88226fb1ab55900fe4e89e81df1e Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Fri, 15 Jan 2021 17:37:17 +0800 Subject: [PATCH 047/103] cgroup-v1: add disabled controller check in cgroup1_parse_param() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 61e960b07b637f0295308ad91268501d744c21b5 ] When mounting a cgroup hierarchy with disabled controller in cgroup v1, all available controllers will be attached. For example, boot with cgroup_no_v1=cpu or cgroup_disable=cpu, and then mount with "mount -t cgroup -ocpu cpu /sys/fs/cgroup/cpu", then all enabled controllers will be attached except cpu. Fix this by adding disabled controller check in cgroup1_parse_param(). If the specified controller is disabled, just return error with information "Disabled controller xx" rather than attaching all the other enabled controllers. Fixes: f5dfb5315d34 ("cgroup: take options parsing into ->parse_monolithic()") Signed-off-by: Chen Zhou Reviewed-by: Zefan Li Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cgroup-v1.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 32596fdbcd5b..a5751784ad74 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -917,6 +917,9 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) for_each_subsys(ss, i) { if (strcmp(param->key, ss->legacy_name)) continue; + if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i)) + return invalfc(fc, "Disabled controller '%s'", + param->key); ctx->subsys_mask |= (1 << i); return 0; } From c39cdf559d264f2fb0a450cd5dc18e534732b251 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 15 Jan 2021 14:52:33 -0700 Subject: [PATCH 048/103] dmaengine: idxd: fix misc interrupt completion [ Upstream commit f5cc9ace24fbdf41b4814effbb2f9bad7046e988 ] Nikhil reported the misc interrupt handler can sometimes miss handling the command interrupt when an error interrupt happens near the same time. Have the irq handling thread continue to process the misc interrupts until all interrupts are processed. This is a low usage interrupt and is not expected to handle high volume traffic. Therefore there is no concern of this thread running for a long time. Fixes: 0d5c10b4c84d ("dmaengine: idxd: add work queue drain support") Reported-by: Nikhil Rao Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161074755329.2183844.13295528344116907983.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idxd/irq.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17a65a13fb64..552e2e270705 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -53,19 +53,14 @@ irqreturn_t idxd_irq_handler(int vec, void *data) return IRQ_WAKE_THREAD; } -irqreturn_t idxd_misc_thread(int vec, void *data) +static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; struct device *dev = &idxd->pdev->dev; union gensts_reg gensts; - u32 cause, val = 0; + u32 val = 0; int i; bool err = false; - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause & IDXD_INTC_ERR) { spin_lock_bh(&idxd->dev_lock); for (i = 0; i < 4; i++) @@ -123,7 +118,7 @@ irqreturn_t idxd_misc_thread(int vec, void *data) val); if (!err) - goto out; + return 0; gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); if (gensts.state == IDXD_DEVICE_STATE_HALT) { @@ -144,10 +139,33 @@ irqreturn_t idxd_misc_thread(int vec, void *data) gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); spin_unlock_bh(&idxd->dev_lock); + return -ENXIO; } } - out: + return 0; +} + +irqreturn_t idxd_misc_thread(int vec, void *data) +{ + struct idxd_irq_entry *irq_entry = data; + struct idxd_device *idxd = irq_entry->idxd; + int rc; + u32 cause; + + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (cause) + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); + + while (cause) { + rc = process_misc_interrupts(idxd, cause); + if (rc < 0) + break; + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (cause) + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); + } + idxd_unmask_msix_vector(idxd, irq_entry->id); return IRQ_HANDLED; } From 3ade81db49bdb1e3b4a2495ba4ef1ccde7dde2c1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2021 12:36:42 +0100 Subject: [PATCH 049/103] ath9k: fix build error with LEDS_CLASS=m [ Upstream commit b64acb28da8394485f0762e657470c9fc33aca4d ] When CONFIG_ATH9K is built-in but LED support is in a loadable module, both ath9k drivers fails to link: x86_64-linux-ld: drivers/net/wireless/ath/ath9k/gpio.o: in function `ath_deinit_leds': gpio.c:(.text+0x36): undefined reference to `led_classdev_unregister' x86_64-linux-ld: drivers/net/wireless/ath/ath9k/gpio.o: in function `ath_init_leds': gpio.c:(.text+0x179): undefined reference to `led_classdev_register_ext' The problem is that the 'imply' keyword does not enforce any dependency but is only a weak hint to Kconfig to enable another symbol from a defconfig file. Change imply to a 'depends on LEDS_CLASS' that prevents the incorrect configuration but still allows building the driver without LED support. The 'select MAC80211_LEDS' is now ensures that the LED support is actually used if it is present, and the added Kconfig dependency on MAC80211_LEDS ensures that it cannot be enabled manually when it has no effect. Fixes: 197f466e93f5 ("ath9k_htc: Do not select MAC80211_LEDS by default") Signed-off-by: Arnd Bergmann Acked-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210125113654.2408057-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/Kconfig | 8 ++------ net/mac80211/Kconfig | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index a84bb9b6573f..e150d82eddb6 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -21,11 +21,9 @@ config ATH9K_BTCOEX_SUPPORT config ATH9K tristate "Atheros 802.11n wireless cards support" depends on MAC80211 && HAS_DMA + select MAC80211_LEDS if LEDS_CLASS=y || LEDS_CLASS=MAC80211 select ATH9K_HW select ATH9K_COMMON - imply NEW_LEDS - imply LEDS_CLASS - imply MAC80211_LEDS help This module adds support for wireless adapters based on Atheros IEEE 802.11n AR5008, AR9001 and AR9002 family @@ -176,11 +174,9 @@ config ATH9K_PCI_NO_EEPROM config ATH9K_HTC tristate "Atheros HTC based wireless cards support" depends on USB && MAC80211 + select MAC80211_LEDS if LEDS_CLASS=y || LEDS_CLASS=MAC80211 select ATH9K_HW select ATH9K_COMMON - imply NEW_LEDS - imply LEDS_CLASS - imply MAC80211_LEDS help Support for Atheros HTC based cards. Chipsets supported: AR9271 diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index cd9a9bd242ba..51ec8256b7fa 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -69,7 +69,7 @@ config MAC80211_MESH config MAC80211_LEDS bool "Enable LED triggers" depends on MAC80211 - depends on LEDS_CLASS + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select LEDS_TRIGGERS help This option enables a few LED triggers for different From 1d7c1456692446e73b67532d0a4e7964129c742a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 26 Jan 2021 12:02:13 +0100 Subject: [PATCH 050/103] mt76: dma: fix a possible memory leak in mt76_add_fragment() [ Upstream commit 93a1d4791c10d443bc67044def7efee2991d48b7 ] Fix a memory leak in mt76_add_fragment routine returning the buffer to the page_frag_cache when we receive a new fragment and the skb_shared_info frag array is full. Fixes: b102f0c522cf6 ("mt76: fix array overflow on receiving too many fragments for a packet") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/4f9dd73407da88b2a552517ce8db242d86bf4d5c.1611616130.git.lorenzo@kernel.org Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/dma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 145e839fea4e..917617aad8d3 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -519,15 +519,17 @@ static void mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, int len, bool more) { - struct page *page = virt_to_head_page(data); - int offset = data - page_address(page); struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { - offset += q->buf_offset; + struct page *page = virt_to_head_page(data); + int offset = data - page_address(page) + q->buf_offset; + skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, q->buf_size); + } else { + skb_free_frag(data); } if (more) From 143b87907788e18326de5f0afa694ec633b56d85 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 29 Jan 2021 17:06:47 +0100 Subject: [PATCH 051/103] drm/vc4: hvs: Fix buffer overflow with the dlist handling [ Upstream commit facd93f4285c405f9a91b05166147cb39e860666 ] Commit 0a038c1c29a7 ("drm/vc4: Move LBM creation out of vc4_plane_mode_set()") changed the LBM allocation logic from first allocating the LBM memory for the plane to running mode_set, adding a gap in the LBM, and then running the dlist allocation filling that gap. The gap was introduced by incrementing the dlist array index, but was never checking whether or not we were over the array length, leading eventually to memory corruptions if we ever crossed this limit. vc4_dlist_write had that logic though, and was reallocating a larger dlist array when reaching the end of the buffer. Let's share the logic between both functions. Cc: Boris Brezillon Cc: Eric Anholt Fixes: 0a038c1c29a7 ("drm/vc4: Move LBM creation out of vc4_plane_mode_set()") Signed-off-by: Maxime Ripard Acked-by: Thomas Zimmermann Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20210129160647.128373-1-maxime@cerno.tech Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_plane.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 5612cab55227..af4b8944a603 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -220,7 +220,7 @@ static void vc4_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_reset(plane, &vc4_state->base); } -static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) +static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) { if (vc4_state->dlist_count == vc4_state->dlist_size) { u32 new_size = max(4u, vc4_state->dlist_count * 2); @@ -235,7 +235,15 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) vc4_state->dlist_size = new_size; } - vc4_state->dlist[vc4_state->dlist_count++] = val; + vc4_state->dlist_count++; +} + +static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) +{ + unsigned int idx = vc4_state->dlist_count; + + vc4_dlist_counter_increment(vc4_state); + vc4_state->dlist[idx] = val; } /* Returns the scl0/scl1 field based on whether the dimensions need to @@ -978,8 +986,10 @@ static int vc4_plane_mode_set(struct drm_plane *plane, * be set when calling vc4_plane_allocate_lbm(). */ if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || - vc4_state->y_scaling[1] != VC4_SCALING_NONE) - vc4_state->lbm_offset = vc4_state->dlist_count++; + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { + vc4_state->lbm_offset = vc4_state->dlist_count; + vc4_dlist_counter_increment(vc4_state); + } if (num_planes > 1) { /* Emit Cb/Cr as channel 0 and Y as channel From 40e3b5c128645d2ddad12310c7be98758cafb2b0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 1 Feb 2021 08:26:14 -0700 Subject: [PATCH 052/103] dmaengine: idxd: check device state before issue command [ Upstream commit 89e3becd8f821e507052e012d2559dcda59f538e ] Add device state check before executing command. Without the check the command can be issued while device is in halt state and causes the driver to block while waiting for the completion of the command. Reported-by: Sanjay Kumar Signed-off-by: Dave Jiang Tested-by: Sanjay Kumar Fixes: 0d5c10b4c84d ("dmaengine: idxd: add work queue drain support") Link: https://lore.kernel.org/r/161219313921.2976211.12222625226450097465.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/idxd/device.c | 23 ++++++++++++++++++++++- drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/init.c | 5 ++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 663344987e3f..a6704838ffcb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -325,17 +325,31 @@ static inline bool idxd_is_enabled(struct idxd_device *idxd) return false; } +static inline bool idxd_device_is_halted(struct idxd_device *idxd) +{ + union gensts_reg gensts; + + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); + + return (gensts.state == IDXD_DEVICE_STATE_HALT); +} + /* * This is function is only used for reset during probe and will * poll for completion. Once the device is setup with interrupts, * all commands will be done via interrupt completion. */ -void idxd_device_init_reset(struct idxd_device *idxd) +int idxd_device_init_reset(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; union idxd_command_reg cmd; unsigned long flags; + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); + return -ENXIO; + } + memset(&cmd, 0, sizeof(cmd)); cmd.cmd = IDXD_CMD_RESET_DEVICE; dev_dbg(dev, "%s: sending reset for init.\n", __func__); @@ -346,6 +360,7 @@ void idxd_device_init_reset(struct idxd_device *idxd) IDXD_CMDSTS_ACTIVE) cpu_relax(); spin_unlock_irqrestore(&idxd->dev_lock, flags); + return 0; } static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, @@ -355,6 +370,12 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, DECLARE_COMPLETION_ONSTACK(done); unsigned long flags; + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); + *status = IDXD_CMDSTS_HW_ERR; + return; + } + memset(&cmd, 0, sizeof(cmd)); cmd.cmd = cmd_code; cmd.operand = operand; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index d48f193daacc..953ef6536aac 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -281,7 +281,7 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ -void idxd_device_init_reset(struct idxd_device *idxd); +int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); void idxd_device_reset(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 0a4432b063b5..fa8c4228f358 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -289,7 +289,10 @@ static int idxd_probe(struct idxd_device *idxd) int rc; dev_dbg(dev, "%s entered and resetting device\n", __func__); - idxd_device_init_reset(idxd); + rc = idxd_device_init_reset(idxd); + if (rc < 0) + return rc; + dev_dbg(dev, "IDXD reset complete\n"); idxd_read_caps(idxd); From 52d29b4783268df881b85e76f90da74fbeaa59eb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 2 Feb 2021 23:06:36 -0800 Subject: [PATCH 053/103] bpf: Unbreak BPF_PROG_TYPE_KPROBE when kprobe is called via do_int3 [ Upstream commit 548f1191d86ccb9bde2a5305988877b7584c01eb ] The commit 0d00449c7a28 ("x86: Replace ist_enter() with nmi_enter()") converted do_int3 handler to be "NMI-like". That made old if (in_nmi()) check abort execution of bpf programs attached to kprobe when kprobe is firing via int3 (For example when kprobe is placed in the middle of the function). Remove the check to restore user visible behavior. Fixes: 0d00449c7a28 ("x86: Replace ist_enter() with nmi_enter()") Reported-by: Nikolay Borisov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Nikolay Borisov Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/bpf/20210203070636.70926-1-alexei.starovoitov@gmail.com Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0dde84b9d29f..fcbfc9564996 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -93,9 +93,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; - if (in_nmi()) /* not supported yet */ - return 1; - cant_sleep(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { From 8032bf2af9ce26b3a362b9711d15f626ab946a74 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 27 Jan 2021 06:36:53 +0000 Subject: [PATCH 054/103] bpf: Check for integer overflow when using roundup_pow_of_two() [ Upstream commit 6183f4d3a0a2ad230511987c6c362ca43ec0055f ] On 32-bit architecture, roundup_pow_of_two() can return 0 when the argument has upper most bit set due to resulting 1UL << 32. Add a check for this case. Fixes: d5a3b1f69186 ("bpf: introduce BPF_MAP_TYPE_STACK_TRACE") Signed-off-by: Bui Quang Minh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210127063653.3576-1-minhquangbui99@gmail.com Signed-off-by: Sasha Levin --- kernel/bpf/stackmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 06065fa27124..6e83bf8c080d 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -116,6 +116,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) /* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); + if (!n_buckets) + return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); From f035e97f331e6b8f61d95fe1fbb33622405cc7c3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 29 Jan 2021 20:57:43 +0100 Subject: [PATCH 055/103] netfilter: xt_recent: Fix attempt to update deleted entry [ Upstream commit b1bdde33b72366da20d10770ab7a49fe87b5e190 ] When both --reap and --update flag are specified, there's a code path at which the entry to be updated is reaped beforehand, which then leads to kernel crash. Reap only entries which won't be updated. Fixes kernel bugzilla #207773. Link: https://bugzilla.kernel.org/show_bug.cgi?id=207773 Reported-by: Reindl Harald Fixes: 0079c5aee348 ("netfilter: xt_recent: add an entry reaper") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_recent.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 606411869698..0446307516cd 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -152,7 +152,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) /* * Drop entries with timestamps older then 'time'. */ -static void recent_entry_reap(struct recent_table *t, unsigned long time) +static void recent_entry_reap(struct recent_table *t, unsigned long time, + struct recent_entry *working, bool update) { struct recent_entry *e; @@ -161,6 +162,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time) */ e = list_entry(t->lru_list.next, struct recent_entry, lru_list); + /* + * Do not reap the entry which are going to be updated. + */ + if (e == working && update) + return; + /* * The last time stamp is the most recent. */ @@ -303,7 +310,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) /* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) - recent_entry_reap(t, time); + recent_entry_reap(t, time, e, + info->check_set & XT_RECENT_UPDATE && ret); } if (info->check_set & XT_RECENT_SET || From ef8f281a653006e14b8d2999fe934e890e51e71b Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 30 Jan 2021 11:14:25 +0100 Subject: [PATCH 056/103] selftests: netfilter: fix current year [ Upstream commit a3005b0f83f217c888393c6bf9cd36e3d1616bca ] use date %Y instead of %G to read current year Problem appeared when running lkp-tests on 01/01/2021 Fixes: 48d072c4e8cd ("selftests: netfilter: add time counter check") Reported-by: kernel test robot Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/nft_meta.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 087f0e6e71ce..f33154c04d34 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -23,7 +23,7 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT -currentyear=$(date +%G) +currentyear=$(date +%Y) lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin < Date: Tue, 2 Feb 2021 16:07:37 +0100 Subject: [PATCH 057/103] netfilter: nftables: fix possible UAF over chains from packet path in netns [ Upstream commit 767d1216bff82507c945e92fe719dff2083bb2f4 ] Although hooks are released via call_rcu(), chain and rule objects are immediately released while packets are still walking over these bits. This patch adds the .pre_exit callback which is invoked before synchronize_rcu() in the netns framework to stay safe. Remove a comment which is not valid anymore since the core does not use synchronize_net() anymore since 8c873e219970 ("netfilter: core: free hooks with call_rcu"). Suggested-by: Florian Westphal Fixes: df05ef874b28 ("netfilter: nf_tables: release objects on netns destruction") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9a080767667b..8739ef135156 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8775,6 +8775,17 @@ int __nft_release_basechain(struct nft_ctx *ctx) } EXPORT_SYMBOL_GPL(__nft_release_basechain); +static void __nft_release_hooks(struct net *net) +{ + struct nft_table *table; + struct nft_chain *chain; + + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); + } +} + static void __nft_release_tables(struct net *net) { struct nft_flowtable *flowtable, *nf; @@ -8790,10 +8801,6 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(table, nt, &net->nft.tables, list) { ctx.family = table->family; - - list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); - /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { ctx.chain = chain; @@ -8842,6 +8849,11 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +static void __net_exit nf_tables_pre_exit_net(struct net *net) +{ + __nft_release_hooks(net); +} + static void __net_exit nf_tables_exit_net(struct net *net) { mutex_lock(&net->nft.commit_mutex); @@ -8855,8 +8867,9 @@ static void __net_exit nf_tables_exit_net(struct net *net) } static struct pernet_operations nf_tables_net_ops = { - .init = nf_tables_init_net, - .exit = nf_tables_exit_net, + .init = nf_tables_init_net, + .pre_exit = nf_tables_pre_exit_net, + .exit = nf_tables_exit_net, }; static int __init nf_tables_module_init(void) From bbb8c391486b717ac880145a0e848b88af89416e Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Tue, 2 Feb 2021 18:01:16 +0100 Subject: [PATCH 058/103] netfilter: flowtable: fix tcp and udp header checksum update [ Upstream commit 8d6bca156e47d68551750a384b3ff49384c67be3 ] When updating the tcp or udp header checksum on port nat the function inet_proto_csum_replace2 with the last parameter pseudohdr as true. This leads to an error in the case that GRO is used and packets are split up in GSO. The tcp or udp checksum of all packets is incorrect. The error is probably masked due to the fact the most network driver implement tcp/udp checksum offloading. It also only happens when GRO is applied and not on single packets. The error is most visible when using a pppoe connection which is not triggering the tcp/udp checksum offload. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Sven Auhagen Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 513f78db3cb2..4a4acbba78ff 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -399,7 +399,7 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, return -1; tcph = (void *)(skb_network_header(skb) + thoff); - inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); + inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); return 0; } @@ -415,7 +415,7 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, - new_port, true); + new_port, false); if (!udph->check) udph->check = CSUM_MANGLED_0; } From 50ad6fc36aed44d2b54c9530264e74caf051e5d2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 2 Feb 2021 08:09:38 +0100 Subject: [PATCH 059/103] xen/netback: avoid race in xenvif_rx_ring_slots_available() [ Upstream commit ec7d8e7dd3a59528e305a18e93f1cb98f7faf83b ] Since commit 23025393dbeb3b8b3 ("xen/netback: use lateeoi irq binding") xenvif_rx_ring_slots_available() is no longer called only from the rx queue kernel thread, so it needs to access the rx queue with the associated queue held. Reported-by: Igor Druzhinin Fixes: 23025393dbeb3b8b3 ("xen/netback: use lateeoi irq binding") Signed-off-by: Juergen Gross Acked-by: Wei Liu Link: https://lore.kernel.org/r/20210202070938.7863-1-jgross@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/xen-netback/rx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index b8febe1d1bfd..accc991d153f 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -38,10 +38,15 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) RING_IDX prod, cons; struct sk_buff *skb; int needed; + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); skb = skb_peek(&queue->rx_queue); - if (!skb) + if (!skb) { + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); return false; + } needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); if (skb_is_gso(skb)) @@ -49,6 +54,8 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) if (skb->sw_hash) needed++; + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + do { prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; From a4595395a91571157e9253581a2d70a9171b7df3 Mon Sep 17 00:00:00 2001 From: Xie He Date: Tue, 2 Feb 2021 23:15:41 -0800 Subject: [PATCH 060/103] net: hdlc_x25: Return meaningful error code in x25_open [ Upstream commit 81b8be68ef8e8915d0cc6cedd2ac425c74a24813 ] It's not meaningful to pass on LAPB error codes to HDLC code or other parts of the system, because they will not understand the error codes. Instead, use system-wide recognizable error codes. Fixes: f362e5fe0f1f ("wan/hdlc_x25: make lapb params configurable") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xie He Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20210203071541.86138-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wan/hdlc_x25.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index f52b9fed0593..34bc53facd11 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -171,11 +171,11 @@ static int x25_open(struct net_device *dev) result = lapb_register(dev, &cb); if (result != LAPB_OK) - return result; + return -ENOMEM; result = lapb_getparms(dev, ¶ms); if (result != LAPB_OK) - return result; + return -EINVAL; if (state(hdlc)->settings.dce) params.mode = params.mode | LAPB_DCE; @@ -190,7 +190,7 @@ static int x25_open(struct net_device *dev) result = lapb_setparms(dev, ¶ms); if (result != LAPB_OK) - return result; + return -EINVAL; return 0; } From 23b2eac8b76a13acec6d6e38d9f87bcaeda55a78 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 3 Feb 2021 19:06:55 -0600 Subject: [PATCH 061/103] net: ipa: set error code in gsi_channel_setup() [ Upstream commit 1d23a56b0296d29e7047b41fe0a42a001036160d ] In gsi_channel_setup(), we check to see if the configuration data contains any information about channels that are not supported by the hardware. If one is found, we abort the setup process, but the error code (ret) is not set in this case. Fix this bug. Fixes: 650d1603825d8 ("soc: qcom: ipa: the generic software interface") Reported-by: Dan Carpenter Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20210204010655.15619-1-elder@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ipa/gsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4a68da7115d1..2a65efd3e8da 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1573,6 +1573,7 @@ static int gsi_channel_setup(struct gsi *gsi, bool legacy) if (!channel->gsi) continue; /* Ignore uninitialized channels */ + ret = -EINVAL; dev_err(gsi->dev, "channel %u not supported by hardware\n", channel_id - 1); channel_id = gsi->channel_count; From 8eb3e56fd180ca12a09586ca31a913646942eb0d Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Wed, 3 Feb 2021 12:36:02 +0100 Subject: [PATCH 062/103] hv_netvsc: Reset the RSC count if NVSP_STAT_FAIL in netvsc_receive() [ Upstream commit 12bc8dfb83b5292fe387b795210018b7632ee08b ] Commit 44144185951a0f ("hv_netvsc: Add validation for untrusted Hyper-V values") added validation to rndis_filter_receive_data() (and rndis_filter_receive()) which introduced NVSP_STAT_FAIL-scenarios where the count is not updated/reset. Fix this omission, and prevent similar scenarios from occurring in the future. Reported-by: Juan Vazquez Signed-off-by: Andrea Parri (Microsoft) Fixes: 44144185951a0f ("hv_netvsc: Add validation for untrusted Hyper-V values") Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210203113602.558916-1-parri.andrea@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc.c | 5 ++++- drivers/net/hyperv/rndis_filter.c | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0c3de94b5178..6a7ab930ef70 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1253,8 +1253,11 @@ static int netvsc_receive(struct net_device *ndev, ret = rndis_filter_receive(ndev, net_device, nvchan, data, buflen); - if (unlikely(ret != NVSP_STAT_SUCCESS)) + if (unlikely(ret != NVSP_STAT_SUCCESS)) { + /* Drop incomplete packet */ + nvchan->rsc.cnt = 0; status = NVSP_STAT_FAIL; + } } enq_receive_complete(ndev, net_device, q_idx, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index b22e47bcfeca..90bc0008fa2f 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -508,8 +508,6 @@ static int rndis_filter_receive_data(struct net_device *ndev, return ret; drop: - /* Drop incomplete packet */ - nvchan->rsc.cnt = 0; return NVSP_STAT_FAIL; } From 5ed60a17d4668002c21f207d4a91900607c9cbfa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Feb 2021 15:45:11 +0200 Subject: [PATCH 063/103] net: enetc: initialize the RFS and RSS memories [ Upstream commit 07bf34a50e327975b21a9dee64d220c3dcb72ee9 ] Michael tried to enable Advanced Error Reporting through the ENETC's Root Complex Event Collector, and the system started spitting out single bit correctable ECC errors coming from the ENETC interfaces: pcieport 0000:00:1f.0: AER: Multiple Corrected error received: 0000:00:00.0 fsl_enetc 0000:00:00.0: PCIe Bus Error: severity=Corrected, type=Transaction Layer, (Receiver ID) fsl_enetc 0000:00:00.0: device [1957:e100] error status/mask=00004000/00000000 fsl_enetc 0000:00:00.0: [14] CorrIntErr fsl_enetc 0000:00:00.1: PCIe Bus Error: severity=Corrected, type=Transaction Layer, (Receiver ID) fsl_enetc 0000:00:00.1: device [1957:e100] error status/mask=00004000/00000000 fsl_enetc 0000:00:00.1: [14] CorrIntErr Further investigating the port correctable memory error detect register (PCMEDR) shows that these AER errors have an associated SOURCE_ID of 6 (RFS/RSS): $ devmem 0x1f8010e10 32 0xC0000006 $ devmem 0x1f8050e10 32 0xC0000006 Discussion with the hardware design engineers reveals that on LS1028A, the hardware does not do initialization of that RFS/RSS memory, and that software should clear/initialize the entire table before starting to operate. That comes as a bit of a surprise, since the driver does not do initialization of the RFS memory. Also, the initialization of the Receive Side Scaling is done only partially. Even though the entire ENETC IP has a single shared flow steering memory, the flow steering service should returns matches only for TCAM entries that are within the range of the Station Interface that is doing the search. Therefore, it should be sufficient for a Station Interface to initialize all of its own entries in order to avoid any ECC errors, and only the Station Interfaces in use should need initialization. There are Physical Station Interfaces associated with PCIe PFs and Virtual Station Interfaces associated with PCIe VFs. We let the PF driver initialize the entire port's memory, which includes the RFS entries which are going to be used by the VF. Reported-by: Michael Walle Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Vladimir Oltean Tested-by: Michael Walle Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210204134511.2640309-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/freescale/enetc/enetc_hw.h | 2 + .../net/ethernet/freescale/enetc/enetc_pf.c | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 4cbf1667d7ff..014ca6ae121f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -196,6 +196,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_CBS_BW_MASK GENMASK(6, 0) #define ENETC_PTCCBSR1(n) (0x1114 + (n) * 8) /* n = 0 to 7*/ #define ENETC_RSSHASH_KEY_SIZE 40 +#define ENETC_PRSSCAPR 0x1404 +#define ENETC_PRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32) #define ENETC_PRSSK(n) (0x1410 + (n) * 4) /* n = [0..9] */ #define ENETC_PSIVLANFMR 0x1700 #define ENETC_PSIVLANFMR_VS BIT(0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 419306342ac5..06514af0df10 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1004,6 +1004,51 @@ static void enetc_phylink_destroy(struct enetc_ndev_priv *priv) phylink_destroy(priv->phylink); } +/* Initialize the entire shared memory for the flow steering entries + * of this port (PF + VFs) + */ +static int enetc_init_port_rfs_memory(struct enetc_si *si) +{ + struct enetc_cmd_rfse rfse = {0}; + struct enetc_hw *hw = &si->hw; + int num_rfs, i, err = 0; + u32 val; + + val = enetc_port_rd(hw, ENETC_PRFSCAPR); + num_rfs = ENETC_PRFSCAPR_GET_NUM_RFS(val); + + for (i = 0; i < num_rfs; i++) { + err = enetc_set_fs_entry(si, &rfse, i); + if (err) + break; + } + + return err; +} + +static int enetc_init_port_rss_memory(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + int num_rss, err; + int *rss_table; + u32 val; + + val = enetc_port_rd(hw, ENETC_PRSSCAPR); + num_rss = ENETC_PRSSCAPR_GET_NUM_RSS(val); + if (!num_rss) + return 0; + + rss_table = kcalloc(num_rss, sizeof(*rss_table), GFP_KERNEL); + if (!rss_table) + return -ENOMEM; + + err = enetc_set_rss_table(si, rss_table, num_rss); + + kfree(rss_table); + + return err; +} + static int enetc_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1058,6 +1103,18 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_alloc_si_res; } + err = enetc_init_port_rfs_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RFS memory\n"); + goto err_init_port_rfs; + } + + err = enetc_init_port_rss_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RSS memory\n"); + goto err_init_port_rss; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -1086,6 +1143,8 @@ err_phylink_create: enetc_mdiobus_destroy(pf); err_mdiobus_create: enetc_free_msix(priv); +err_init_port_rss: +err_init_port_rfs: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: From 25c312214d9f7b0876892b5997a167c0a9a21c7d Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 4 Feb 2021 20:50:34 +0300 Subject: [PATCH 064/103] selftests: txtimestamp: fix compilation issue [ Upstream commit 647b8dd5184665432cc8a2b5bca46a201f690c37 ] PACKET_TX_TIMESTAMP is defined in if_packet.h but it is not included in test. Include it instead of otherwise the error of redefinition arrives. Also fix the compiler warning about ambiguous control flow by adding explicit braces. Fixes: 8fe2f761cae9 ("net-timestamp: expand documentation") Suggested-by: Willem de Bruijn Signed-off-by: Vadim Fedorenko Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/1612461034-24524-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/txtimestamp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 490a8cca708a..fabb1d555ee5 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -495,12 +495,12 @@ static void do_test(int family, unsigned int report_opt) total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { if (family == PF_INET) total_len += sizeof(struct iphdr); else total_len += sizeof(struct ipv6hdr); - + } /* special case, only rawv6_sendmsg: * pass proto in sin6_port if not connected * also see ANK comment in net/ipv4/raw.c From 284abe15c17e5f440b01a46f61948d3fa6b767a1 Mon Sep 17 00:00:00 2001 From: Mohammad Athari Bin Ismail Date: Thu, 4 Feb 2021 22:03:16 +0800 Subject: [PATCH 065/103] net: stmmac: set TxQ mode back to DCB after disabling CBS [ Upstream commit f317e2ea8c88737aa36228167b2292baef3f0430 ] When disable CBS, mode_to_use parameter is not updated even the operation mode of Tx Queue is changed to Data Centre Bridging (DCB). Therefore, when tc_setup_cbs() function is called to re-enable CBS, the operation mode of Tx Queue remains at DCB, which causing CBS fails to work. This patch updates the value of mode_to_use parameter to MTL_QUEUE_DCB after operation mode of Tx Queue is changed to DCB in stmmac_dma_qmode() callback function. Fixes: 1f705bc61aee ("net: stmmac: Add support for CBS QDISC") Suggested-by: Vinicius Costa Gomes Signed-off-by: Mohammad Athari Bin Ismail Signed-off-by: Song, Yoong Siang Reviewed-by: Jesse Brandeburg Acked-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/1612447396-20351-1-git-send-email-yoong.siang.song@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 06553d028d74..6088071cb192 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -330,7 +330,12 @@ static int tc_setup_cbs(struct stmmac_priv *priv, priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB; } else if (!qopt->enable) { - return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB); + ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, + MTL_QUEUE_DCB); + if (ret) + return ret; + + priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB; } /* Port Transmit Rate and Speed Divider */ From 4067ace5f119624aeb91c51e8cbb29ac9fa2150d Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 2 Feb 2021 21:08:02 -0800 Subject: [PATCH 066/103] ibmvnic: Clear failover_pending if unable to schedule [ Upstream commit ef66a1eace968ff22a35f45e6e8ec36b668b6116 ] Normally we clear the failover_pending flag when processing the reset. But if we are unable to schedule a failover reset we must clear the flag ourselves. We could fail to schedule the reset if we are in PROBING state (eg: when booting via kexec) or because we could not allocate memory. Thanks to Cris Forno for helping isolate the problem and for testing. Fixes: 1d8504937478 ("powerpc/vnic: Extend "failover pending" window") Signed-off-by: Sukadev Bhattiprolu Tested-by: Cristobal Forno Link: https://lore.kernel.org/r/20210203050802.680772-1-sukadev@linux.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2f281d0f9807..ee16e0e4fa5f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4813,7 +4813,22 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, complete(&adapter->init_done); adapter->init_done_rc = -EIO; } - ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } break; case IBMVNIC_CRQ_INIT_COMPLETE: dev_info(dev, "Partner initialization complete\n"); From d519197b48c13d7a50fd27966b235f16574aed47 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 5 Feb 2021 12:56:43 +0100 Subject: [PATCH 067/103] netfilter: conntrack: skip identical origin tuple in same zone only [ Upstream commit 07998281c268592963e1cd623fe6ab0270b65ae4 ] The origin skip check needs to re-test the zone. Else, we might skip a colliding tuple in the reply direction. This only occurs when using 'directional zones' where origin tuples reside in different zones but the reply tuples share the same zone. This causes the new conntrack entry to be dropped at confirmation time because NAT clash resolution was elided. Fixes: 4e35c1cb9460240 ("netfilter: nf_nat: skip nat clash resolution for same-origin entries") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 234b7cab37c3..ff0168736f6e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1229,7 +1229,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * Let nf_ct_resolve_clash() deal with this later. */ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) continue; NF_CT_STAT_INC_ATOMIC(net, found); From 880b1c2164d1ac7b6fb4d77ad882d53ae8e1d1ab Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 8 Feb 2021 12:17:34 +0100 Subject: [PATCH 068/103] scsi: scsi_debug: Fix a memory leak [ Upstream commit f852c596f2ee6f0eb364ea8f28f89da6da0ae7b5 ] The sdebug_q_arr pointer must be freed when the module is unloaded. $ cat /sys/kernel/debug/kmemleak unreferenced object 0xffff888e1cfb0000 (size 4096): comm "modprobe", pid 165555, jiffies 4325987516 (age 685.194s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000458f4f5d>] 0xffffffffc06702d9 [<000000003edc4b1f>] do_one_initcall+0xe9/0x57d [<00000000da7d518c>] do_init_module+0x1d1/0x6f0 [<000000009a6a9248>] load_module+0x36bd/0x4f50 [<00000000ddb0c3ce>] __do_sys_init_module+0x1db/0x260 [<000000009532db57>] do_syscall_64+0xa5/0x420 [<000000002916b13d>] entry_SYSCALL_64_after_hwframe+0x6a/0xdf Fixes: 87c715dcde63 ("scsi: scsi_debug: Add per_host_store option") Link: https://lore.kernel.org/r/20210208111734.34034-1-mlombard@redhat.com Acked-by: Douglas Gilbert Signed-off-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 4a08c450b756..b6540b92f566 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6881,6 +6881,7 @@ static void __exit scsi_debug_exit(void) sdebug_erase_all_stores(false); xa_destroy(per_store_ap); + kfree(sdebug_q_arr); } device_initcall(scsi_debug_init); From 03bf0f163df2b777c73739f15d723c90b76df5e8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 8 Feb 2021 16:43:30 +0100 Subject: [PATCH 069/103] x86/build: Disable CET instrumentation in the kernel for 32-bit too [ Upstream commit 256b92af784d5043eeb7d559b6d5963dcc2ecb10 ] Commit 20bf2b378729 ("x86/build: Disable CET instrumentation in the kernel") disabled CET instrumentation which gets added by default by the Ubuntu gcc9 and 10 by default, but did that only for 64-bit builds. It would still fail when building a 32-bit target. So disable CET for all x86 builds. Fixes: 20bf2b378729 ("x86/build: Disable CET instrumentation in the kernel") Reported-by: AC Signed-off-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: AC Link: https://lkml.kernel.org/r/YCCIgMHkzh/xT4ex@arch-chirva.localdomain Signed-off-by: Sasha Levin --- arch/x86/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 6a7efa78eba2..0a6d497221e4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -57,6 +57,9 @@ export BITS KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow KBUILD_CFLAGS += $(call cc-option,-mno-avx,) +# Intel CET isn't enabled in the kernel +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) + ifeq ($(CONFIG_X86_32),y) BITS := 32 UTS_MACHINE := i386 @@ -127,9 +130,6 @@ else KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel - - # Intel CET isn't enabled in the kernel - KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif ifdef CONFIG_X86_X32 From 6bbc1d5ee0eb52587d794587af7a7aae126a01eb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 8 Feb 2021 19:36:27 +0200 Subject: [PATCH 070/103] net: dsa: felix: implement port flushing on .phylink_mac_link_down [ Upstream commit eb4733d7cffc547e08fe5a216e4f03663bb71108 ] There are several issues which may be seen when the link goes down while forwarding traffic, all of which can be attributed to the fact that the port flushing procedure from the reference manual was not closely followed. With flow control enabled on both the ingress port and the egress port, it may happen when a link goes down that Ethernet packets are in flight. In flow control mode, frames are held back and not dropped. When there is enough traffic in flight (example: iperf3 TCP), then the ingress port might enter congestion and never exit that state. This is a problem, because it is the egress port's link that went down, and that has caused the inability of the ingress port to send packets to any other port. This is solved by flushing the egress port's queues when it goes down. There is also a problem when performing stream splitting for IEEE 802.1CB traffic (not yet upstream, but a sort of multicast, basically). There, if one port from the destination ports mask goes down, splitting the stream towards the other destinations will no longer be performed. This can be traced down to this line: ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG); which should have been instead, as per the reference manual: ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA, DEV_MAC_ENA_CFG); Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is the case, but apparently multicasting to several ports will cause issues if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set. I am not sure what the state of the Ocelot VSC7514 driver is, but probably not as bad as Felix/Seville, since VSC7514 uses phylib and has the following in ocelot_adjust_link: if (!phydev->link) return; therefore the port is not really put down when the link is lost, unlike the DSA drivers which use .phylink_mac_link_down for that. Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h which are not exported in include/soc/mscc/ and a bugfix patch should probably not move headers around. Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/ocelot/felix.c | 17 ++++++++- drivers/net/ethernet/mscc/ocelot.c | 54 +++++++++++++++++++++++++++ drivers/net/ethernet/mscc/ocelot_io.c | 8 ++++ include/soc/mscc/ocelot.h | 2 + 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index c444ef3da3e2..89d7c9b23186 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -214,9 +214,24 @@ static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port, { struct ocelot *ocelot = ds->priv; struct ocelot_port *ocelot_port = ocelot->ports[port]; + int err; + + ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA, + DEV_MAC_ENA_CFG); - ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG); ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0); + + err = ocelot_port_flush(ocelot, port); + if (err) + dev_err(ocelot->dev, "failed to flush port %d: %d\n", + port, err); + + /* Put the port in reset. */ + ocelot_port_writel(ocelot_port, + DEV_CLOCK_CFG_MAC_TX_RST | + DEV_CLOCK_CFG_MAC_RX_RST | + DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000), + DEV_CLOCK_CFG); } static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index d4768dcb6c69..aa400b925b08 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -348,6 +348,60 @@ static void ocelot_vlan_init(struct ocelot *ocelot) } } +static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port) +{ + return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port); +} + +int ocelot_port_flush(struct ocelot *ocelot, int port) +{ + int err, val; + + /* Disable dequeuing from the egress queues */ + ocelot_rmw_rix(ocelot, QSYS_PORT_MODE_DEQUEUE_DIS, + QSYS_PORT_MODE_DEQUEUE_DIS, + QSYS_PORT_MODE, port); + + /* Disable flow control */ + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); + + /* Disable priority flow control */ + ocelot_fields_write(ocelot, port, + QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, 0); + + /* Wait at least the time it takes to receive a frame of maximum length + * at the port. + * Worst-case delays for 10 kilobyte jumbo frames are: + * 8 ms on a 10M port + * 800 μs on a 100M port + * 80 μs on a 1G port + * 32 μs on a 2.5G port + */ + usleep_range(8000, 10000); + + /* Disable half duplex backpressure. */ + ocelot_rmw_rix(ocelot, 0, SYS_FRONT_PORT_MODE_HDX_MODE, + SYS_FRONT_PORT_MODE, port); + + /* Flush the queues associated with the port. */ + ocelot_rmw_gix(ocelot, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG_FLUSH_ENA, + REW_PORT_CFG, port); + + /* Enable dequeuing from the egress queues. */ + ocelot_rmw_rix(ocelot, 0, QSYS_PORT_MODE_DEQUEUE_DIS, QSYS_PORT_MODE, + port); + + /* Wait until flushing is complete. */ + err = read_poll_timeout(ocelot_read_eq_avail, val, !val, + 100, 2000000, false, ocelot, port); + + /* Clear flushing again. */ + ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port); + + return err; +} +EXPORT_SYMBOL(ocelot_port_flush); + void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev) { diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c index 0acb45948418..ea4e83410fe4 100644 --- a/drivers/net/ethernet/mscc/ocelot_io.c +++ b/drivers/net/ethernet/mscc/ocelot_io.c @@ -71,6 +71,14 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) } EXPORT_SYMBOL(ocelot_port_writel); +void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) +{ + u32 cur = ocelot_port_readl(port, reg); + + ocelot_port_writel(port, (cur & (~mask)) | val, reg); +} +EXPORT_SYMBOL(ocelot_port_rmwl); + u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, u32 reg, u32 offset) { diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 49b46df476f2..4971b45860a4 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -703,6 +703,7 @@ struct ocelot_policer { /* I/O */ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); +void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg); u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset); void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset); void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, @@ -731,6 +732,7 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset); int ocelot_get_ts_info(struct ocelot *ocelot, int port, struct ethtool_ts_info *info); void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); +int ocelot_port_flush(struct ocelot *ocelot, int port); void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, From 40389ba14d0ee1f7f838e7ba9ba1434eae2aa5df Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 9 Feb 2021 17:03:05 +0800 Subject: [PATCH 071/103] net: hns3: add a check for queue_id in hclge_reset_vf_queue() [ Upstream commit 67a69f84cab60484f02eb8cbc7a76edffbb28a25 ] The queue_id is received from vf, if use it directly, an out-of-bound issue may be caused, so add a check for this queue_id before using it in hclge_reset_vf_queue(). Fixes: 1a426f8b40fc ("net: hns3: fix the VF queue reset flow error") Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 4321132a4f63..c40820baf48a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9404,12 +9404,19 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) { + struct hnae3_handle *handle = &vport->nic; struct hclge_dev *hdev = vport->back; int reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + if (queue_id >= handle->kinfo.num_tqps) { + dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", + queue_id); + return; + } + queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); From 2c0e46258eab60dfa447d0ab7e316478f0f852a3 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 9 Feb 2021 17:03:06 +0800 Subject: [PATCH 072/103] net: hns3: add a check for tqp_index in hclge_get_ring_chain_from_mbx() [ Upstream commit 326334aad024a60f46dc5e7dbe1efe32da3ca66f ] The tqp_index is received from vf, if use it directly, an out-of-bound issue may be caused, so add a check for this tqp_index before using it in hclge_get_ring_chain_from_mbx(). Fixes: 84e095d64ed9 ("net: hns3: Change PF to add ring-vect binding & resetQ to mailbox") Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 3ab6db2588d3..c997c9037155 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -158,21 +158,31 @@ static int hclge_get_ring_chain_from_mbx( struct hclge_vport *vport) { struct hnae3_ring_chain_node *cur_chain, *new_chain; + struct hclge_dev *hdev = vport->back; int ring_num; - int i = 0; + int i; ring_num = req->msg.ring_num; if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM) return -ENOMEM; + for (i = 0; i < ring_num; i++) { + if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) { + dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n", + req->msg.param[i].tqp_index, + vport->nic.kinfo.rss_size - 1); + return -EINVAL; + } + } + hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, - req->msg.param[i].ring_type); + req->msg.param[0].ring_type); ring_chain->tqp_index = hclge_get_queue_id(vport->nic.kinfo.tqp - [req->msg.param[i].tqp_index]); + [req->msg.param[0].tqp_index]); hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M, - HNAE3_RING_GL_IDX_S, req->msg.param[i].int_gl_index); + HNAE3_RING_GL_IDX_S, req->msg.param[0].int_gl_index); cur_chain = ring_chain; From 5ff69431b7633b1cfe30ceff58c6843aa7cb4d46 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 9 Feb 2021 17:03:07 +0800 Subject: [PATCH 073/103] net: hns3: add a check for index in hclge_get_rss_key() [ Upstream commit 532cfc0df1e4d68e74522ef4a0dcbf6ebbe68287 ] The index is received from vf, if use it directly, an out-of-bound issue may be caused, so add a check for this index before using it in hclge_get_rss_key(). Fixes: a638b1d8cc87 ("net: hns3: fix get VF RSS issue") Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index c997c9037155..9c8004fc9dc4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -591,6 +591,17 @@ static void hclge_get_rss_key(struct hclge_vport *vport, index = mbx_req->msg.data[0]; + /* Check the query index of rss_hash_key from VF, make sure no + * more than the size of rss_hash_key. + */ + if (((index + 1) * HCLGE_RSS_MBX_RESP_LEN) > + sizeof(vport[0].rss_hash_key)) { + dev_warn(&hdev->pdev->dev, + "failed to get the rss hash key, the index(%u) invalid !\n", + index); + return; + } + memcpy(resp_msg->data, &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN], HCLGE_RSS_MBX_RESP_LEN); From 39e855fa959e17a9818e471987ac600f25b85662 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 9 Feb 2021 13:42:07 -0800 Subject: [PATCH 074/103] firmware_loader: align .builtin_fw to 8 [ Upstream commit 793f49a87aae24e5bcf92ad98d764153fc936570 ] arm64 references the start address of .builtin_fw (__start_builtin_fw) with a pair of R_AARCH64_ADR_PREL_PG_HI21/R_AARCH64_LDST64_ABS_LO12_NC relocations. The compiler is allowed to emit the R_AARCH64_LDST64_ABS_LO12_NC relocation because struct builtin_fw in include/linux/firmware.h is 8-byte aligned. The R_AARCH64_LDST64_ABS_LO12_NC relocation requires the address to be a multiple of 8, which may not be the case if .builtin_fw is empty. Unconditionally align .builtin_fw to fix the linker error. 32-bit architectures could use ALIGN(4) but that would add unnecessary complexity, so just use ALIGN(8). Link: https://lkml.kernel.org/r/20201208054646.2913063-1-maskray@google.com Link: https://github.com/ClangBuiltLinux/linux/issues/1204 Fixes: 5658c76 ("firmware: allow firmware files to be built into kernel image") Signed-off-by: Fangrui Song Reported-by: kernel test robot Acked-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Tested-by: Douglas Anderson Acked-by: Nathan Chancellor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b2b3d81b1535..b97c628ad91f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -459,7 +459,7 @@ } \ \ /* Built-in firmware blobs */ \ - .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) { \ + .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) { \ __start_builtin_fw = .; \ KEEP(*(.builtin_fw)) \ __end_builtin_fw = .; \ From 7596c85a8962b92bbfff544af970e8180a5efd4d Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:57 +0100 Subject: [PATCH 075/103] drm/sun4i: tcon: set sync polarity for tcon1 channel [ Upstream commit 50791f5d7b6a14b388f46c8885f71d1b98216d1d ] Channel 1 has polarity bits for vsync and hsync signals but driver never sets them. It turns out that with pre-HDMI2 controllers seemingly there is no issue if polarity is not set. However, with HDMI2 controllers (H6) there often comes to de-synchronization due to phase shift. This causes flickering screen. It's safe to assume that similar issues might happen also with pre-HDMI2 controllers. Solve issue with setting vsync and hsync polarity. Note that display stacks with tcon top have polarity bits actually in tcon0 polarity register. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-3-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/sun4i/sun4i_tcon.h | 6 ++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index eaaf5d70e352..1e643bc7e786 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -689,6 +689,30 @@ static void sun4i_tcon1_mode_set(struct sun4i_tcon *tcon, SUN4I_TCON1_BASIC5_V_SYNC(vsync) | SUN4I_TCON1_BASIC5_H_SYNC(hsync)); + /* Setup the polarity of multiple signals */ + if (tcon->quirks->polarity_in_ch0) { + val = 0; + + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + val |= SUN4I_TCON0_IO_POL_HSYNC_POSITIVE; + + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE; + + regmap_write(tcon->regs, SUN4I_TCON0_IO_POL_REG, val); + } else { + /* according to vendor driver, this bit must be always set */ + val = SUN4I_TCON1_IO_POL_UNKNOWN; + + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + val |= SUN4I_TCON1_IO_POL_HSYNC_POSITIVE; + + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + val |= SUN4I_TCON1_IO_POL_VSYNC_POSITIVE; + + regmap_write(tcon->regs, SUN4I_TCON1_IO_POL_REG, val); + } + /* Map output pins to channel 1 */ regmap_update_bits(tcon->regs, SUN4I_TCON_GCTL_REG, SUN4I_TCON_GCTL_IOMAP_MASK, @@ -1517,6 +1541,7 @@ static const struct sun4i_tcon_quirks sun8i_a83t_tv_quirks = { static const struct sun4i_tcon_quirks sun8i_r40_tv_quirks = { .has_channel_1 = true, + .polarity_in_ch0 = true, .set_mux = sun8i_r40_tcon_tv_set_mux, }; diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index cfbf4e6c1679..ee555318e3c2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -153,6 +153,11 @@ #define SUN4I_TCON1_BASIC5_V_SYNC(height) (((height) - 1) & 0x3ff) #define SUN4I_TCON1_IO_POL_REG 0xf0 +/* there is no documentation about this bit */ +#define SUN4I_TCON1_IO_POL_UNKNOWN BIT(26) +#define SUN4I_TCON1_IO_POL_HSYNC_POSITIVE BIT(25) +#define SUN4I_TCON1_IO_POL_VSYNC_POSITIVE BIT(24) + #define SUN4I_TCON1_IO_TRI_REG 0xf4 #define SUN4I_TCON_ECC_FIFO_REG 0xf8 @@ -235,6 +240,7 @@ struct sun4i_tcon_quirks { bool needs_de_be_mux; /* sun6i needs mux to select backend */ bool needs_edp_reset; /* a80 edp reset needed for tcon0 access */ bool supports_lvds; /* Does the TCON support an LVDS output? */ + bool polarity_in_ch0; /* some tcon1 channels have polarity bits in tcon0 pol register */ u8 dclk_min_div; /* minimum divider for TCON0 DCLK */ /* callback to handle tcon muxing options */ From e639cb199b8e3d9331d3d93248d7608527880c0b Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:58 +0100 Subject: [PATCH 076/103] drm/sun4i: dw-hdmi: always set clock rate [ Upstream commit 36b53581fe0dc2e25b67de4e58920307f22d195a ] As expected, HDMI controller clock should always match pixel clock. In the past, changing HDMI controller rate would seemingly worsen situation. However, that was the result of other bugs which are now fixed. Fix that by removing set_rate quirk and always set clock rate. Fixes: 40bb9d3147b2 ("drm/sun4i: Add support for H6 DW HDMI controller") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-4-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 4 +--- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index 92add2cef2e7..23773a5e0650 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -21,8 +21,7 @@ static void sun8i_dw_hdmi_encoder_mode_set(struct drm_encoder *encoder, { struct sun8i_dw_hdmi *hdmi = encoder_to_sun8i_dw_hdmi(encoder); - if (hdmi->quirks->set_rate) - clk_set_rate(hdmi->clk_tmds, mode->crtc_clock * 1000); + clk_set_rate(hdmi->clk_tmds, mode->crtc_clock * 1000); } static const struct drm_encoder_helper_funcs @@ -295,7 +294,6 @@ static int sun8i_dw_hdmi_remove(struct platform_device *pdev) static const struct sun8i_dw_hdmi_quirks sun8i_a83t_quirks = { .mode_valid = sun8i_dw_hdmi_mode_valid_a83t, - .set_rate = true, }; static const struct sun8i_dw_hdmi_quirks sun50i_h6_quirks = { diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index d983746fa194..d4b55af0592f 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -179,7 +179,6 @@ struct sun8i_dw_hdmi_quirks { enum drm_mode_status (*mode_valid)(struct dw_hdmi *hdmi, void *data, const struct drm_display_info *info, const struct drm_display_mode *mode); - unsigned int set_rate : 1; unsigned int use_drm_infoframe : 1; }; From 962168c2bc15642969fd1fa0b82e1538dcbd5691 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:59 +0100 Subject: [PATCH 077/103] drm/sun4i: Fix H6 HDMI PHY configuration [ Upstream commit 6a155216c48f2f65c8dcb02c4c27549c170d24a9 ] As it turns out, vendor HDMI PHY driver for H6 has a pretty big table of predefined values for various pixel clocks. However, most of them are not useful/tested because they come from reference driver code. Vendor PHY driver is concerned with only few of those, namely 27 MHz, 74.25 MHz, 148.5 MHz, 297 MHz and 594 MHz. These are all frequencies for standard CEA modes. Fix sun50i_h6_cur_ctr and sun50i_h6_phy_config with the values only for aforementioned frequencies. Table sun50i_h6_mpll_cfg doesn't need to be changed because values are actually frequency dependent and not so much SoC dependent. See i.MX6 documentation for explanation of those values for similar PHY. Fixes: c71c9b2fee17 ("drm/sun4i: Add support for Synopsys HDMI PHY") Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-5-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index 35c2133724e2..9994edf67509 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -104,29 +104,21 @@ static const struct dw_hdmi_mpll_config sun50i_h6_mpll_cfg[] = { static const struct dw_hdmi_curr_ctrl sun50i_h6_cur_ctr[] = { /* pixelclk bpp8 bpp10 bpp12 */ - { 25175000, { 0x0000, 0x0000, 0x0000 }, }, { 27000000, { 0x0012, 0x0000, 0x0000 }, }, - { 59400000, { 0x0008, 0x0008, 0x0008 }, }, - { 72000000, { 0x0008, 0x0008, 0x001b }, }, - { 74250000, { 0x0013, 0x0013, 0x0013 }, }, - { 90000000, { 0x0008, 0x001a, 0x001b }, }, - { 118800000, { 0x001b, 0x001a, 0x001b }, }, - { 144000000, { 0x001b, 0x001a, 0x0034 }, }, - { 180000000, { 0x001b, 0x0033, 0x0034 }, }, - { 216000000, { 0x0036, 0x0033, 0x0034 }, }, - { 237600000, { 0x0036, 0x0033, 0x001b }, }, - { 288000000, { 0x0036, 0x001b, 0x001b }, }, - { 297000000, { 0x0019, 0x001b, 0x0019 }, }, - { 330000000, { 0x0036, 0x001b, 0x001b }, }, - { 594000000, { 0x003f, 0x001b, 0x001b }, }, + { 74250000, { 0x0013, 0x001a, 0x001b }, }, + { 148500000, { 0x0019, 0x0033, 0x0034 }, }, + { 297000000, { 0x0019, 0x001b, 0x001b }, }, + { 594000000, { 0x0010, 0x001b, 0x001b }, }, { ~0UL, { 0x0000, 0x0000, 0x0000 }, } }; static const struct dw_hdmi_phy_config sun50i_h6_phy_config[] = { /*pixelclk symbol term vlev*/ - { 74250000, 0x8009, 0x0004, 0x0232}, - { 148500000, 0x8029, 0x0004, 0x0273}, - { 594000000, 0x8039, 0x0004, 0x014a}, + { 27000000, 0x8009, 0x0007, 0x02b0 }, + { 74250000, 0x8009, 0x0006, 0x022d }, + { 148500000, 0x8029, 0x0006, 0x0270 }, + { 297000000, 0x8039, 0x0005, 0x01ab }, + { 594000000, 0x8029, 0x0000, 0x008a }, { ~0UL, 0x0000, 0x0000, 0x0000} }; From 664b66e3552c219b7591e772d608160911401288 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:59:00 +0100 Subject: [PATCH 078/103] drm/sun4i: dw-hdmi: Fix max. frequency for H6 [ Upstream commit 1926a0508d8947cf081280d85ff035300dc71da7 ] It turns out that reasoning for lowering max. supported frequency is wrong. Scrambling works just fine. Several now fixed bugs prevented proper functioning, even with rates lower than 340 MHz. Issues were just more pronounced with higher frequencies. Fix that by allowing max. supported frequency in HW and fix the comment. Fixes: cd9063757a22 ("drm/sun4i: DW HDMI: Lower max. supported rate for H6") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210209175900.7092-6-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index 23773a5e0650..bbdfd5e26ec8 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -47,11 +47,9 @@ sun8i_dw_hdmi_mode_valid_h6(struct dw_hdmi *hdmi, void *data, { /* * Controller support maximum of 594 MHz, which correlates to - * 4K@60Hz 4:4:4 or RGB. However, for frequencies greater than - * 340 MHz scrambling has to be enabled. Because scrambling is - * not yet implemented, just limit to 340 MHz for now. + * 4K@60Hz 4:4:4 or RGB. */ - if (mode->clock > 340000) + if (mode->clock > 594000) return MODE_CLOCK_HIGH; return MODE_OK; From 0c0ddf0dd69200285652a26a6978a8e4d889b488 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 9 Feb 2021 18:58:56 +0100 Subject: [PATCH 079/103] clk: sunxi-ng: mp: fix parent rate change flag check [ Upstream commit 245090ab2636c0869527ce563afbfb8aff29e825 ] CLK_SET_RATE_PARENT flag is checked on parent clock instead of current one. Fix that. Fixes: 3f790433c3cb ("clk: sunxi-ng: Adjust MP clock parent rate when allowed") Reviewed-by: Chen-Yu Tsai Tested-by: Andre Heider Signed-off-by: Jernej Skrabec Link: https://lore.kernel.org/r/20210209175900.7092-2-jernej.skrabec@siol.net Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/sunxi-ng/ccu_mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c index fa4ecb915590..9d3a76604d94 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.c +++ b/drivers/clk/sunxi-ng/ccu_mp.c @@ -108,7 +108,7 @@ static unsigned long ccu_mp_round_rate(struct ccu_mux_internal *mux, max_m = cmp->m.max ?: 1 << cmp->m.width; max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1); - if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) { + if (!clk_hw_can_set_rate_parent(&cmp->common.hw)) { ccu_mp_find_best(*parent_rate, rate, max_m, max_p, &m, &p); rate = *parent_rate / p / m; } else { From f6b5e671c6a7ee18fb170584e424d9d5a803e1b4 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Fri, 5 Feb 2021 09:51:40 +0100 Subject: [PATCH 080/103] i2c: stm32f7: fix configuration of the digital filter [ Upstream commit 3d6a3d3a2a7a3a60a824e7c04e95fd50dec57812 ] The digital filter related computation are present in the driver however the programming of the filter within the IP is missing. The maximum value for the DNF is wrong and should be 15 instead of 16. Fixes: aeb068c57214 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Alain Volmat Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-stm32f7.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index f41f51a176a1..674735334547 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -57,6 +57,8 @@ #define STM32F7_I2C_CR1_RXDMAEN BIT(15) #define STM32F7_I2C_CR1_TXDMAEN BIT(14) #define STM32F7_I2C_CR1_ANFOFF BIT(12) +#define STM32F7_I2C_CR1_DNF_MASK GENMASK(11, 8) +#define STM32F7_I2C_CR1_DNF(n) (((n) & 0xf) << 8) #define STM32F7_I2C_CR1_ERRIE BIT(7) #define STM32F7_I2C_CR1_TCIE BIT(6) #define STM32F7_I2C_CR1_STOPIE BIT(5) @@ -160,7 +162,7 @@ enum { }; #define STM32F7_I2C_DNF_DEFAULT 0 -#define STM32F7_I2C_DNF_MAX 16 +#define STM32F7_I2C_DNF_MAX 15 #define STM32F7_I2C_ANALOG_FILTER_ENABLE 1 #define STM32F7_I2C_ANALOG_FILTER_DELAY_MIN 50 /* ns */ @@ -725,6 +727,13 @@ static void stm32f7_i2c_hw_config(struct stm32f7_i2c_dev *i2c_dev) else stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_ANFOFF); + + /* Program the Digital Filter */ + stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_DNF_MASK); + stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, + STM32F7_I2C_CR1_DNF(i2c_dev->setup.dnf)); + stm32f7_i2c_set_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_PE); } From 4761b1aabcfd36d5a4d0528168c474926bd7b2f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 12 Feb 2021 20:52:54 -0800 Subject: [PATCH 081/103] h8300: fix PREEMPTION build, TI_PRE_COUNT undefined [ Upstream commit ade9679c159d5bbe14fb7e59e97daf6062872e2b ] Fix a build error for undefined 'TI_PRE_COUNT' by adding it to asm-offsets.c. h8300-linux-ld: arch/h8300/kernel/entry.o: in function `resume_kernel': (.text+0x29a): undefined reference to `TI_PRE_COUNT' Link: https://lkml.kernel.org/r/20210212021650.22740-1-rdunlap@infradead.org Fixes: df2078b8daa7 ("h8300: Low level entry") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Yoshinori Sato Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/h8300/kernel/asm-offsets.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c index 85e60509f0a8..d4b53af657c8 100644 --- a/arch/h8300/kernel/asm-offsets.c +++ b/arch/h8300/kernel/asm-offsets.c @@ -63,6 +63,9 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE, thread_info, preempt_count); +#ifdef CONFIG_PREEMPTION + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#endif return 0; } From 902c6dcbb146eb119ae539a231103619855df6a0 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Fri, 12 Feb 2021 08:22:27 +0100 Subject: [PATCH 082/103] scripts: set proper OpenSSL include dir also for sign-file commit fe968c41ac4f4ec9ffe3c4cf16b72285f5e9674f upstream. Fixes: 2cea4a7a1885 ("scripts: use pkg-config to locate libcrypto") Signed-off-by: Rolf Eike Beer Cc: stable@vger.kernel.org # 5.6.x Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Makefile b/scripts/Makefile index 9de3c03b94aa..c36106bce80e 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -17,6 +17,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include +HOSTCFLAGS_sign-file.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_sign-file = $(CRYPTO_LIBS) HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS) From 3ed6cc9c2dd08a2ed29f43b5916cb16c3318e98a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Feb 2021 16:27:41 +0100 Subject: [PATCH 083/103] x86/pci: Create PCI/MSI irqdomain after x86_init.pci.arch_init() commit 70245f86c109e0eafb92ea9653184c0e44b4b35c upstream. Invoking x86_init.irqs.create_pci_msi_domain() before x86_init.pci.arch_init() breaks XEN PV. The XEN_PV specific pci.arch_init() function overrides the default create_pci_msi_domain() which is obviously too late. As a consequence the XEN PV PCI/MSI allocation goes through the native path which runs out of vectors and causes malfunction. Invoke it after x86_init.pci.arch_init(). Fixes: 6b15ffa07dc3 ("x86/irq: Initialize PCI/MSI domain at PCI init time") Reported-by: Juergen Gross Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87pn18djte.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/init.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 00bfa1ebad6c..0bb3b8b44e4e 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -9,16 +9,23 @@ in the right sequence from here. */ static __init int pci_arch_init(void) { - int type; - - x86_create_pci_msi_domain(); + int type, pcbios = 1; type = pci_direct_probe(); if (!(pci_probe & PCI_PROBE_NOEARLY)) pci_mmcfg_early_init(); - if (x86_init.pci.arch_init && !x86_init.pci.arch_init()) + if (x86_init.pci.arch_init) + pcbios = x86_init.pci.arch_init(); + + /* + * Must happen after x86_init.pci.arch_init(). Xen sets up the + * x86_init.irqs.create_pci_msi_domain there. + */ + x86_create_pci_msi_domain(); + + if (!pcbios) return 0; pci_pcbios_init(); From 975a2396e37cecc42ba006d69d943be149efbeb0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 10 Feb 2021 18:03:16 +0000 Subject: [PATCH 084/103] arm64: mte: Allow PTRACE_PEEKMTETAGS access to the zero page commit 68d54ceeec0e5fee4fb8048e6a04c193f32525ca upstream. The ptrace(PTRACE_PEEKMTETAGS) implementation checks whether the user page has valid tags (mapped with PROT_MTE) by testing the PG_mte_tagged page flag. If this bit is cleared, ptrace(PTRACE_PEEKMTETAGS) returns -EIO. A newly created (PROT_MTE) mapping points to the zero page which had its tags zeroed during cpu_enable_mte(). If there were no prior writes to this mapping, ptrace(PTRACE_PEEKMTETAGS) fails with -EIO since the zero page does not have the PG_mte_tagged flag set. Set PG_mte_tagged on the zero page when its tags are cleared during boot. In addition, to avoid ptrace(PTRACE_PEEKMTETAGS) succeeding on !PROT_MTE mappings pointing to the zero page, change the __access_remote_tags() check to (vm_flags & VM_MTE) instead of PG_mte_tagged. Signed-off-by: Catalin Marinas Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE") Cc: # 5.10.x Cc: Will Deacon Reported-by: Luis Machado Tested-by: Luis Machado Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210210180316.23654-1-catalin.marinas@arm.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpufeature.c | 6 +----- arch/arm64/kernel/mte.c | 3 ++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0a52e076153b..65a522fbd874 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1696,16 +1696,12 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { - static bool cleared_zero_page = false; - /* * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ - if (!cleared_zero_page) { - cleared_zero_page = true; + if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags)) mte_clear_page_tags(lm_alias(empty_zero_page)); - } } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index ef15c8a2a49d..7a66a7d9c1ff 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -239,11 +239,12 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, * would cause the existing tags to be cleared if the page * was never mapped with PROT_MTE. */ - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (!(vma->vm_flags & VM_MTE)) { ret = -EOPNOTSUPP; put_page(page); break; } + WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags)); /* limit access to the end of the page */ offset = offset_in_page(addr); From 24f5544f76d38cf55903a62cc9b1dbf264eb5807 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 3 Feb 2021 08:47:56 +0000 Subject: [PATCH 085/103] rxrpc: Fix clearance of Tx/Rx ring when releasing a call commit 7b5eab57cac45e270a0ad624ba157c5b30b3d44d upstream. At the end of rxrpc_release_call(), rxrpc_cleanup_ring() is called to clear the Rx/Tx skbuff ring, but this doesn't lock the ring whilst it's accessing it. Unfortunately, rxrpc_resend() might be trying to retransmit a packet concurrently with this - and whilst it does lock the ring, this isn't protection against rxrpc_cleanup_call(). Fix this by removing the call to rxrpc_cleanup_ring() from rxrpc_release_call(). rxrpc_cleanup_ring() will be called again anyway from rxrpc_cleanup_call(). The earlier call is just an optimisation to recycle skbuffs more quickly. Alternative solutions include rxrpc_release_call() could try to cancel the work item or wait for it to complete or rxrpc_cleanup_ring() could lock when accessing the ring (which would require a bh lock). This can produce a report like the following: BUG: KASAN: use-after-free in rxrpc_send_data_packet+0x19b4/0x1e70 net/rxrpc/output.c:372 Read of size 4 at addr ffff888011606e04 by task kworker/0:0/5 ... Workqueue: krxrpcd rxrpc_process_call Call Trace: ... kasan_report.cold+0x79/0xd5 mm/kasan/report.c:413 rxrpc_send_data_packet+0x19b4/0x1e70 net/rxrpc/output.c:372 rxrpc_resend net/rxrpc/call_event.c:266 [inline] rxrpc_process_call+0x1634/0x1f60 net/rxrpc/call_event.c:412 process_one_work+0x98d/0x15f0 kernel/workqueue.c:2275 ... Allocated by task 2318: ... sock_alloc_send_pskb+0x793/0x920 net/core/sock.c:2348 rxrpc_send_data+0xb51/0x2bf0 net/rxrpc/sendmsg.c:358 rxrpc_do_sendmsg+0xc03/0x1350 net/rxrpc/sendmsg.c:744 rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:560 ... Freed by task 2318: ... kfree_skb+0x140/0x3f0 net/core/skbuff.c:704 rxrpc_free_skb+0x11d/0x150 net/rxrpc/skbuff.c:78 rxrpc_cleanup_ring net/rxrpc/call_object.c:485 [inline] rxrpc_release_call+0x5dd/0x860 net/rxrpc/call_object.c:552 rxrpc_release_calls_on_socket+0x21c/0x300 net/rxrpc/call_object.c:579 rxrpc_release_sock net/rxrpc/af_rxrpc.c:885 [inline] rxrpc_release+0x263/0x5a0 net/rxrpc/af_rxrpc.c:916 __sock_release+0xcd/0x280 net/socket.c:597 ... The buggy address belongs to the object at ffff888011606dc0 which belongs to the cache skbuff_head_cache of size 232 Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Reported-by: syzbot+174de899852504e4a74a@syzkaller.appspotmail.com Reported-by: syzbot+3d1c772efafd3c38d007@syzkaller.appspotmail.com Signed-off-by: David Howells cc: Hillf Danton Link: https://lore.kernel.org/r/161234207610.653119.5287360098400436976.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rxrpc/call_object.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c845594b663f..4eb91d958a48 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -548,8 +548,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); - - rxrpc_cleanup_ring(call); _leave(""); } From 46a831d1cc25f559a5ef46aff35e39795b61a94c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 3 Feb 2021 14:29:52 -0500 Subject: [PATCH 086/103] udp: fix skb_copy_and_csum_datagram with odd segment sizes commit 52cbd23a119c6ebf40a527e53f3402d2ea38eccb upstream. When iteratively computing a checksum with csum_block_add, track the offset "pos" to correctly rotate in csum_block_add when offset is odd. The open coded implementation of skb_copy_and_csum_datagram did this. With the switch to __skb_datagram_iter calling csum_and_copy_to_iter, pos was reinitialized to 0 on each call. Bring back the pos by passing it along with the csum to the callback. Changes v1->v2 - pass csum value, instead of csump pointer (Alexander Duyck) Link: https://lore.kernel.org/netdev/20210128152353.GB27281@optiplex/ Fixes: 950fcaecd5cc ("datagram: consolidate datagram copy to iter helpers") Reported-by: Oliver Graute Signed-off-by: Willem de Bruijn Reviewed-by: Alexander Duyck Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210203192952.1849843-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/uio.h | 8 +++++++- lib/iov_iter.c | 24 ++++++++++++++---------- net/core/datagram.c | 12 ++++++++++-- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 72d88566694e..27ff8eb786dc 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -260,7 +260,13 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); + +struct csum_state { + __wsum csum; + size_t off; +}; + +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a21e6a5792c5..f0b2ccb1bb01 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -592,14 +592,15 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, } static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) + struct csum_state *csstate, + struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; unsigned int p_mask = pipe->ring_size - 1; + __wsum sum = csstate->csum; + size_t off = csstate->off; unsigned int i_head; size_t n, r; - size_t off = 0; - __wsum sum = *csum; if (!sanity(i)) return 0; @@ -621,7 +622,8 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, i_head++; } while (n); i->count -= bytes; - *csum = sum; + csstate->csum = sum; + csstate->off = off; return bytes; } @@ -1522,18 +1524,19 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, } EXPORT_SYMBOL(csum_and_copy_from_iter_full); -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { + struct csum_state *csstate = _csstate; const char *from = addr; - __wsum *csum = csump; __wsum sum, next; - size_t off = 0; + size_t off; if (unlikely(iov_iter_is_pipe(i))) - return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); + return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); - sum = *csum; + sum = csstate->csum; + off = csstate->off; if (unlikely(iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; @@ -1561,7 +1564,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, off += v.iov_len; }) ) - *csum = sum; + csstate->csum = sum; + csstate->off = off; return bytes; } EXPORT_SYMBOL(csum_and_copy_to_iter); diff --git a/net/core/datagram.c b/net/core/datagram.c index 9fcaa544f11a..bc92683fdcdb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -721,8 +721,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, __wsum *csump) { - return __skb_datagram_iter(skb, offset, to, len, true, - csum_and_copy_to_iter, csump); + struct csum_state csdata = { .csum = *csump }; + int ret; + + ret = __skb_datagram_iter(skb, offset, to, len, true, + csum_and_copy_to_iter, &csdata); + if (ret) + return ret; + + *csump = csdata.csum; + return 0; } /** From c930943a368336be15b909cc1d6c344eceb3bd5e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 4 Feb 2021 18:33:51 +0200 Subject: [PATCH 087/103] net: dsa: call teardown method on probe failure commit 8fd54a73b7cda11548154451bdb4bde6d8ff74c7 upstream. Since teardown is supposed to undo the effects of the setup method, it should be called in the error path for dsa_switch_setup, not just in dsa_switch_teardown. Fixes: 5e3f847a02aa ("net: dsa: Add teardown callback for drivers") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210204163351.2929670-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/dsa/dsa2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a47e0f9b20d0..a04fd637b4cd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -462,20 +462,23 @@ static int dsa_switch_setup(struct dsa_switch *ds) ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) { err = -ENOMEM; - goto unregister_notifier; + goto teardown; } dsa_slave_mii_bus_init(ds); err = mdiobus_register(ds->slave_mii_bus); if (err < 0) - goto unregister_notifier; + goto teardown; } ds->setup = true; return 0; +teardown: + if (ds->ops->teardown) + ds->ops->teardown(ds); unregister_notifier: dsa_switch_unregister_notifier(ds); unregister_devlink_ports: From 8a3fc32b322cc3081dd3569047c9834f496b4ab0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Feb 2021 18:25:37 +0100 Subject: [PATCH 088/103] cpufreq: ACPI: Extend frequency tables to cover boost frequencies commit 3c55e94c0adea4a5389c4b80f6ae9927dd6a4501 upstream. A severe performance regression on AMD EPYC processors when using the schedutil scaling governor was discovered by Phoronix.com and attributed to the following commits: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems") 976df7e5730e ("x86, sched: Use midpoint of max_boost and max_P for frequency invariance on AMD EPYC") The source of the problem is that the maximum performance level taken for computing the arch_max_freq_ratio value used in the x86 scale- invariance code is higher than the one corresponding to the cpuinfo.max_freq value coming from the acpi_cpufreq driver. This effectively causes the scale-invariant utilization to fall below 100% even if the CPU runs at cpuinfo.max_freq or slightly faster, so the schedutil governor selects a frequency below cpuinfo.max_freq then. That frequency corresponds to a frequency table entry below the maximum performance level necessary to get to the "boost" range of CPU frequencies. However, if the cpuinfo.max_freq value coming from acpi_cpufreq was higher, the schedutil governor would select higher frequencies which in turn would allow acpi_cpufreq to set more adequate performance levels and to get to the "boost" range of CPU frequencies more often. This issue affects any systems where acpi_cpufreq is used and the "boost" (or "turbo") frequencies are enabled, not just AMD EPYC. Moreover, commit db865272d9c4 ("cpufreq: Avoid configuring old governors as default with intel_pstate") from the 5.10 development cycle made it extremely easy to default to schedutil even if the preferred driver is acpi_cpufreq as long as intel_pstate is built too, because the mere presence of the latter effectively removes the ondemand governor from the defaults. Distro kernels are likely to include both intel_pstate and acpi_cpufreq on x86, so their users who cannot use intel_pstate or choose to use acpi_cpufreq may easily be affectecd by this issue. To address this issue, extend the frequency table constructed by acpi_cpufreq for each CPU to cover the entire range of available frequencies (including the "boost" ones) if CPPC is available and indicates that "boost" (or "turbo") frequencies are enabled. That causes cpuinfo.max_freq to become the maximum "boost" frequency of the given CPU (instead of the maximum frequency returned by the ACPI _PSS object that corresponds to the "nominal" performance level). Fixes: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems") Fixes: 976df7e5730e ("x86, sched: Use midpoint of max_boost and max_P for frequency invariance on AMD EPYC") Fixes: db865272d9c4 ("cpufreq: Avoid configuring old governors as default with intel_pstate") Link: https://www.phoronix.com/scan.php?page=article&item=linux511-amd-schedutil&num=1 Link: https://lore.kernel.org/linux-pm/20210203135321.12253-2-ggherdovich@suse.cz/ Reported-by: Michael Larabel Diagnosed-by: Giovanni Gherdovich Signed-off-by: Rafael J. Wysocki Tested-by: Giovanni Gherdovich Reviewed-by: Giovanni Gherdovich Tested-by: Michael Larabel Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/acpi-cpufreq.c | 109 +++++++++++++++++++++++++++++---- 1 file changed, 96 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 1e4fbb002a31..4614f1c6f50a 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -53,6 +54,7 @@ struct acpi_cpufreq_data { unsigned int resume; unsigned int cpu_feature; unsigned int acpi_perf_cpu; + unsigned int first_perf_state; cpumask_var_t freqdomain_cpus; void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); u32 (*cpu_freq_read)(struct acpi_pct_register *reg); @@ -221,10 +223,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr) perf = to_perf_data(data); - cpufreq_for_each_entry(pos, policy->freq_table) + cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state) if (msr == perf->states[pos->driver_data].status) return pos->frequency; - return policy->freq_table[0].frequency; + return policy->freq_table[data->first_perf_state].frequency; } static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) @@ -363,6 +365,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; + unsigned int state; pr_debug("%s (%d)\n", __func__, cpu); @@ -374,7 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) if (unlikely(!data || !policy->freq_table)) return 0; - cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; + state = to_perf_data(data)->state; + if (state < data->first_perf_state) + state = data->first_perf_state; + + cached_freq = policy->freq_table[state].frequency; freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); if (freq != cached_freq) { /* @@ -628,16 +635,54 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) } #endif +#ifdef CONFIG_ACPI_CPPC_LIB +static u64 get_max_boost_ratio(unsigned int cpu) +{ + struct cppc_perf_caps perf_caps; + u64 highest_perf, nominal_perf; + int ret; + + if (acpi_pstate_strict) + return 0; + + ret = cppc_get_perf_caps(cpu, &perf_caps); + if (ret) { + pr_debug("CPU%d: Unable to get performance capabilities (%d)\n", + cpu, ret); + return 0; + } + + highest_perf = perf_caps.highest_perf; + nominal_perf = perf_caps.nominal_perf; + + if (!highest_perf || !nominal_perf) { + pr_debug("CPU%d: highest or nominal performance missing\n", cpu); + return 0; + } + + if (highest_perf < nominal_perf) { + pr_debug("CPU%d: nominal performance above highest\n", cpu); + return 0; + } + + return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); +} +#else +static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; } +#endif + static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) { - unsigned int i; - unsigned int valid_states = 0; - unsigned int cpu = policy->cpu; - struct acpi_cpufreq_data *data; - unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data(policy->cpu); - struct acpi_processor_performance *perf; struct cpufreq_frequency_table *freq_table; + struct acpi_processor_performance *perf; + struct acpi_cpufreq_data *data; + unsigned int cpu = policy->cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); + unsigned int valid_states = 0; + unsigned int result = 0; + unsigned int state_count; + u64 max_boost_ratio; + unsigned int i; #ifdef CONFIG_SMP static int blacklisted; #endif @@ -750,8 +795,20 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_unreg; } - freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), - GFP_KERNEL); + state_count = perf->state_count + 1; + + max_boost_ratio = get_max_boost_ratio(cpu); + if (max_boost_ratio) { + /* + * Make a room for one more entry to represent the highest + * available "boost" frequency. + */ + state_count++; + valid_states++; + data->first_perf_state = valid_states; + } + + freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); if (!freq_table) { result = -ENOMEM; goto err_unreg; @@ -785,6 +842,30 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) valid_states++; } freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + + if (max_boost_ratio) { + unsigned int state = data->first_perf_state; + unsigned int freq = freq_table[state].frequency; + + /* + * Because the loop above sorts the freq_table entries in the + * descending order, freq is the maximum frequency in the table. + * Assume that it corresponds to the CPPC nominal frequency and + * use it to populate the frequency field of the extra "boost" + * frequency entry. + */ + freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; + /* + * The purpose of the extra "boost" frequency entry is to make + * the rest of cpufreq aware of the real maximum frequency, but + * the way to request it is the same as for the first_perf_state + * entry that is expected to cover the entire range of "boost" + * frequencies of the CPU, so copy the driver_data value from + * that entry. + */ + freq_table[0].driver_data = freq_table[state].driver_data; + } + policy->freq_table = freq_table; perf->state = 0; @@ -858,8 +939,10 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy) { struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; + unsigned int freq = policy->freq_table[data->first_perf_state].frequency; - if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) + if (perf->states[0].core_frequency * 1000 != freq) pr_warn(FW_WARN "P-state 0 is not max freq\n"); } From 18193e09834cbb6e645861bc26c5ae184441e0d2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Feb 2021 18:34:32 +0100 Subject: [PATCH 089/103] cpufreq: ACPI: Update arch scale-invariance max perf ratio if CPPC is not there commit d11a1d08a082a7dc0ada423d2b2e26e9b6f2525c upstream. If the maximum performance level taken for computing the arch_max_freq_ratio value used in the x86 scale-invariance code is higher than the one corresponding to the cpuinfo.max_freq value coming from the acpi_cpufreq driver, the scale-invariant utilization falls below 100% even if the CPU runs at cpuinfo.max_freq or slightly faster, which causes the schedutil governor to select a frequency below cpuinfo.max_freq. That frequency corresponds to a frequency table entry below the maximum performance level necessary to get to the "boost" range of CPU frequencies which prevents "boost" frequencies from being used in some workloads. While this issue is related to scale-invariance, it may be amplified by commit db865272d9c4 ("cpufreq: Avoid configuring old governors as default with intel_pstate") from the 5.10 development cycle which made it extremely easy to default to schedutil even if the preferred driver is acpi_cpufreq as long as intel_pstate is built too, because the mere presence of the latter effectively removes the ondemand governor from the defaults. Distro kernels are likely to include both intel_pstate and acpi_cpufreq on x86, so their users who cannot use intel_pstate or choose to use acpi_cpufreq may easily be affectecd by this issue. If CPPC is available, it can be used to address this issue by extending the frequency tables created by acpi_cpufreq to cover the entire available frequency range (including "boost" frequencies) for each CPU, but if CPPC is not there, acpi_cpufreq has no idea what the maximum "boost" frequency is and the frequency tables created by it cannot be extended in a meaningful way, so in that case make it ask the arch scale-invariance code to to use the "nominal" performance level for CPU utilization scaling in order to avoid the issue at hand. Fixes: db865272d9c4 ("cpufreq: Avoid configuring old governors as default with intel_pstate") Signed-off-by: Rafael J. Wysocki Reviewed-by: Giovanni Gherdovich Acked-by: Peter Zijlstra (Intel) Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/smpboot.c | 1 + drivers/cpufreq/acpi-cpufreq.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index de776b2e6046..c65642c10aae 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1829,6 +1829,7 @@ void arch_set_max_freq_ratio(bool turbo_disabled) arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : arch_turbo_freq_ratio; } +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); static bool turbo_disabled(void) { diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 4614f1c6f50a..d3e5a6fceb61 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -806,6 +806,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) state_count++; valid_states++; data->first_perf_state = valid_states; + } else { + /* + * If the maximum "boost" frequency is unknown, ask the arch + * scale-invariance code to use the "nominal" performance for + * CPU utilization scaling so as to prevent the schedutil + * governor from selecting inadequate CPU frequencies. + */ + arch_set_max_freq_ratio(true); } freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); From 9e6ce473e96ba0ad63820325892373d24c1aa8aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Feb 2021 13:31:46 -0800 Subject: [PATCH 090/103] net: gro: do not keep too many GRO packets in napi->rx_list commit 8dc1c444df193701910f5e80b5d4caaf705a8fb0 upstream. Commit c80794323e82 ("net: Fix packet reordering caused by GRO and listified RX cooperation") had the unfortunate effect of adding latencies in common workloads. Before the patch, GRO packets were immediately passed to upper stacks. After the patch, we can accumulate quite a lot of GRO packets (depdending on NAPI budget). My fix is counting in napi->rx_count number of segments instead of number of logical packets. Fixes: c80794323e82 ("net: Fix packet reordering caused by GRO and listified RX cooperation") Signed-off-by: Eric Dumazet Bisected-by: John Sperbeck Tested-by: Jian Yang Cc: Maxim Mikityanskiy Reviewed-by: Saeed Mahameed Reviewed-by: Edward Cree Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20210204213146.4192368-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 81e5d482c238..da85cb939869 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5723,10 +5723,11 @@ static void gro_normal_list(struct napi_struct *napi) /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) { list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) gro_normal_list(napi); } @@ -5765,7 +5766,7 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) } out: - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); return NET_RX_SUCCESS; } @@ -6055,7 +6056,7 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi, { switch (ret) { case GRO_NORMAL: - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, 1); break; case GRO_DROP: @@ -6143,7 +6144,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_NORMAL) - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, 1); break; case GRO_DROP: From c9013813413e12c8968e22964e314841b93af3c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 5 Feb 2021 11:36:30 +1100 Subject: [PATCH 091/103] net: fix iteration for sctp transport seq_files commit af8085f3a4712c57d0dd415ad543bac85780375c upstream. The sctp transport seq_file iterators take a reference to the transport in the ->start and ->next functions and releases the reference in the ->show function. The preferred handling for such resources is to release them in the subsequent ->next or ->stop function call. Since Commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") there is no guarantee that ->show will be called after ->next, so this function can now leak references. So move the sctp_transport_put() call to ->next and ->stop. Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") Reported-by: Xin Long Signed-off-by: NeilBrown Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/proc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index f7da88ae20a5..982a87b3e11f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -215,6 +215,12 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; + if (v && v != SEQ_START_TOKEN) { + struct sctp_transport *transport = v; + + sctp_transport_put(transport); + } + sctp_transport_walk_stop(&iter->hti); } @@ -222,6 +228,12 @@ static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; + if (v && v != SEQ_START_TOKEN) { + struct sctp_transport *transport = v; + + sctp_transport_put(transport); + } + ++*pos; return sctp_transport_get_next(seq_file_net(seq), &iter->hti); @@ -277,8 +289,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sk->sk_rcvbuf); seq_printf(seq, "\n"); - sctp_transport_put(transport); - return 0; } @@ -354,8 +364,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } - sctp_transport_put(transport); - return 0; } From fcee53dc03c5aa5bca6631fa0aa900efc3079236 Mon Sep 17 00:00:00 2001 From: Norbert Slusarek Date: Fri, 5 Feb 2021 13:12:06 +0100 Subject: [PATCH 092/103] net/vmw_vsock: fix NULL pointer dereference commit 5d1cbcc990f18edaddddef26677073c4e6fad7b7 upstream. In vsock_stream_connect(), a thread will enter schedule_timeout(). While being scheduled out, another thread can enter vsock_stream_connect() as well and set vsk->transport to NULL. In case a signal was sent, the first thread can leave schedule_timeout() and vsock_transport_cancel_pkt() will be called right after. Inside vsock_transport_cancel_pkt(), a null dereference will happen on transport->cancel_pkt. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Signed-off-by: Norbert Slusarek Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/trinity-c2d6cede-bfb1-44e2-85af-1fbc7f541715-1612535117028@3c-app-gmx-bap12 Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index f64e681493a5..020d87886254 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1216,7 +1216,7 @@ static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) { const struct vsock_transport *transport = vsk->transport; - if (!transport->cancel_pkt) + if (!transport || !transport->cancel_pkt) return -EOPNOTSUPP; return transport->cancel_pkt(vsk); From bc21a88465c2778f1c877859475adacb1be1821d Mon Sep 17 00:00:00 2001 From: Norbert Slusarek Date: Fri, 5 Feb 2021 13:14:05 +0100 Subject: [PATCH 093/103] net/vmw_vsock: improve locking in vsock_connect_timeout() commit 3d0bc44d39bca615b72637e340317b7899b7f911 upstream. A possible locking issue in vsock_connect_timeout() was recognized by Eric Dumazet which might cause a null pointer dereference in vsock_transport_cancel_pkt(). This patch assures that vsock_transport_cancel_pkt() will be called within the lock, so a race condition won't occur which could result in vsk->transport to be set to NULL. Fixes: 380feae0def7 ("vsock: cancel packets when failing to connect") Reported-by: Eric Dumazet Signed-off-by: Norbert Slusarek Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/trinity-f8e0937a-cf0e-4d80-a76e-d9a958ba3ef1-1612535522360@3c-app-gmx-bap12 Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 020d87886254..66c9bc46891e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1226,7 +1226,6 @@ static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; - int cancel = 0; vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); @@ -1237,11 +1236,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = TCP_CLOSE; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); - cancel = 1; + vsock_transport_cancel_pkt(vsk); } release_sock(sk); - if (cancel) - vsock_transport_cancel_pkt(vsk); sock_put(sk); } From e22b963d3ec9a510c44d02ff7262fcec49098533 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Fri, 5 Feb 2021 17:37:32 -0800 Subject: [PATCH 094/103] net: watchdog: hold device global xmit lock during tx disable commit 3aa6bce9af0e25b735c9c1263739a5639a336ae8 upstream. Prevent netif_tx_disable() running concurrently with dev_watchdog() by taking the device global xmit lock. Otherwise, the recommended: netif_carrier_off(dev); netif_tx_disable(dev); driver shutdown sequence can happen after the watchdog has already checked carrier, resulting in possible false alarms. This is because netif_tx_lock() only sets the frozen bit without maintaining the locks on the individual queues. Fixes: c3f26a269c24 ("netdev: Fix lockdep warnings in multiqueue configurations.") Signed-off-by: Edwin Peer Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c3da0e1ea9d..9de5312edeb8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4313,6 +4313,7 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_disable(); cpu = smp_processor_id(); + spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -4320,6 +4321,7 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } + spin_unlock(&dev->tx_global_lock); local_bh_enable(); } From 55ad30cb7f73b575570f2a39772b88ff25308b27 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 6 Feb 2021 22:47:33 +0100 Subject: [PATCH 095/103] bridge: mrp: Fix the usage of br_mrp_port_switchdev_set_state commit b2bdba1cbc84cadb14393d0101a5bfd38d342e0a upstream. The function br_mrp_port_switchdev_set_state was called both with MRP port state and STP port state, which is an issue because they don't match exactly. Therefore, update the function to be used only with STP port state and use the id SWITCHDEV_ATTR_ID_PORT_STP_STATE. The choice of using STP over MRP is that the drivers already implement SWITCHDEV_ATTR_ID_PORT_STP_STATE and already in SW we update the port STP state. Fixes: 9a9f26e8f7ea30 ("bridge: mrp: Connect MRP API with the switchdev API") Fixes: fadd409136f0f2 ("bridge: switchdev: mrp: Implement MRP API for switchdev") Fixes: 2f1a11ae11d222 ("bridge: mrp: Add MRP interface.") Reported-by: Rasmus Villemoes Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_mrp.c | 9 ++++++--- net/bridge/br_mrp_switchdev.c | 7 +++---- net/bridge/br_private_mrp.h | 3 +-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index b36689e6e7cb..d1336a7ad7ff 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -544,19 +544,22 @@ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance) int br_mrp_set_port_state(struct net_bridge_port *p, enum br_mrp_port_state_type state) { + u32 port_state; + if (!p || !(p->flags & BR_MRP_AWARE)) return -EINVAL; spin_lock_bh(&p->br->lock); if (state == BR_MRP_PORT_STATE_FORWARDING) - p->state = BR_STATE_FORWARDING; + port_state = BR_STATE_FORWARDING; else - p->state = BR_STATE_BLOCKING; + port_state = BR_STATE_BLOCKING; + p->state = port_state; spin_unlock_bh(&p->br->lock); - br_mrp_port_switchdev_set_state(p, state); + br_mrp_port_switchdev_set_state(p, port_state); return 0; } diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c index ed547e03ace1..75a7e8d0a268 100644 --- a/net/bridge/br_mrp_switchdev.c +++ b/net/bridge/br_mrp_switchdev.c @@ -169,13 +169,12 @@ int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, return err; } -int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, - enum br_mrp_port_state_type state) +int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state) { struct switchdev_attr attr = { .orig_dev = p->dev, - .id = SWITCHDEV_ATTR_ID_MRP_PORT_STATE, - .u.mrp_port_state = state, + .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, + .u.stp_state = state, }; int err; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index af0e9eff6549..6657705b94b1 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -72,8 +72,7 @@ int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp, int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp, u32 interval, u8 max_miss, u32 period, bool monitor); -int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, - enum br_mrp_port_state_type state); +int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state); int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, enum br_mrp_port_role_type role); int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, From ba3bcb35d783805f2b3ebf2ebf61702cd68f4959 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sat, 6 Feb 2021 22:47:34 +0100 Subject: [PATCH 096/103] switchdev: mrp: Remove SWITCHDEV_ATTR_ID_MRP_PORT_STAT commit 059d2a1004981dce19f0127dabc1b4ec927d202a upstream. Now that MRP started to use also SWITCHDEV_ATTR_ID_PORT_STP_STATE to notify HW, then SWITCHDEV_ATTR_ID_MRP_PORT_STAT is not used anywhere else, therefore we can remove it. Fixes: c284b545900830 ("switchdev: mrp: Extend switchdev API to offload MRP") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/switchdev.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 53e8b4994296..8528015590e4 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -41,7 +41,6 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, #if IS_ENABLED(CONFIG_BRIDGE_MRP) - SWITCHDEV_ATTR_ID_MRP_PORT_STATE, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, #endif }; @@ -60,7 +59,6 @@ struct switchdev_attr { bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ bool mc_disabled; /* MC_DISABLED */ #if IS_ENABLED(CONFIG_BRIDGE_MRP) - u8 mrp_port_state; /* MRP_PORT_STATE */ u8 mrp_port_role; /* MRP_PORT_ROLE */ #endif } u; From afe31701609b6ccf5cc637fb229383c26d72d478 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 8 Feb 2021 15:44:54 +0100 Subject: [PATCH 097/103] vsock/virtio: update credit only if socket is not closed commit ce7536bc7398e2ae552d2fabb7e0e371a9f1fe46 upstream. If the socket is closed or is being released, some resources used by virtio_transport_space_update() such as 'vsk->trans' may be released. To avoid a use after free bug we should only update the available credit when we are sure the socket is still open and we have the lock held. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210208144454.84438-1-sgarzare@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/virtio_transport_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 5956939eebb7..e4370b1b7494 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1130,8 +1130,6 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, vsk = vsock_sk(sk); - space_available = virtio_transport_space_update(sk, pkt); - lock_sock(sk); /* Check if sk has been closed before lock_sock */ @@ -1142,6 +1140,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + space_available = virtio_transport_space_update(sk, pkt); + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid; From 69e9fd9de17e839920149e0906f4758667d3cee7 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 9 Feb 2021 09:52:19 +0100 Subject: [PATCH 098/103] vsock: fix locking in vsock_shutdown() commit 1c5fae9c9a092574398a17facc31c533791ef232 upstream. In vsock_shutdown() we touched some socket fields without holding the socket lock, such as 'state' and 'sk_flags'. Also, after the introduction of multi-transport, we are accessing 'vsk->transport' in vsock_send_shutdown() without holding the lock and this call can be made while the connection is in progress, so the transport can change in the meantime. To avoid issues, we hold the socket lock when we enter in vsock_shutdown() and release it when we leave. Among the transports that implement the 'shutdown' callback, only hyperv_transport acquired the lock. Since the caller now holds it, we no longer take it. Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 8 +++++--- net/vmw_vsock/hyperv_transport.c | 4 ---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 66c9bc46891e..791955f5e7ec 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -926,10 +926,12 @@ static int vsock_shutdown(struct socket *sock, int mode) */ sk = sock->sk; + + lock_sock(sk); if (sock->state == SS_UNCONNECTED) { err = -ENOTCONN; if (sk->sk_type == SOCK_STREAM) - return err; + goto out; } else { sock->state = SS_DISCONNECTING; err = 0; @@ -938,10 +940,8 @@ static int vsock_shutdown(struct socket *sock, int mode) /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { - lock_sock(sk); sk->sk_shutdown |= mode; sk->sk_state_change(sk); - release_sock(sk); if (sk->sk_type == SOCK_STREAM) { sock_reset_flag(sk, SOCK_DONE); @@ -949,6 +949,8 @@ static int vsock_shutdown(struct socket *sock, int mode) } } +out: + release_sock(sk); return err; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 630b851f8150..cc3bae2659e7 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -474,14 +474,10 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) static int hvs_shutdown(struct vsock_sock *vsk, int mode) { - struct sock *sk = sk_vsock(vsk); - if (!(mode & SEND_SHUTDOWN)) return 0; - lock_sock(sk); hvs_shutdown_lock_held(vsk->trans, mode); - release_sock(sk); return 0; } From 862d1c0edd2126037816a8e0d8d1adacba84c74f Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Tue, 2 Feb 2021 02:32:33 +0600 Subject: [PATCH 099/103] net/rds: restrict iovecs length for RDS_CMSG_RDMA_ARGS commit a11148e6fcce2ae53f47f0a442d098d860b4f7db upstream. syzbot found WARNING in rds_rdma_extra_size [1] when RDS_CMSG_RDMA_ARGS control message is passed with user-controlled 0x40001 bytes of args->nr_local, causing order >= MAX_ORDER condition. The exact value 0x40001 can be checked with UIO_MAXIOV which is 0x400. So for kcalloc() 0x400 iovecs with sizeof(struct rds_iovec) = 0x10 is the closest limit, with 0x10 leftover. Same condition is currently done in rds_cmsg_rdma_args(). [1] WARNING: mm/page_alloc.c:5011 [..] Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc_array include/linux/slab.h:592 [inline] kcalloc include/linux/slab.h:621 [inline] rds_rdma_extra_size+0xb2/0x3b0 net/rds/rdma.c:568 rds_rm_size net/rds/send.c:928 [inline] Reported-by: syzbot+1bd2b07f93745fa38425@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/20210201203233.1324704-1-snovitoll@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 1d0afb1dd77b..6f1a50d50d06 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -565,6 +565,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args, if (args->nr_local == 0) return -EINVAL; + if (args->nr_local > UIO_MAXIOV) + return -EMSGSIZE; + iov->iov = kcalloc(args->nr_local, sizeof(struct rds_iovec), GFP_KERNEL); From 57068800258f472d09d7fea549fe587f653e0ede Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Tue, 2 Feb 2021 15:20:59 +0600 Subject: [PATCH 100/103] net/qrtr: restrict user-controlled length in qrtr_tun_write_iter() commit 2a80c15812372e554474b1dba0b1d8e467af295d upstream. syzbot found WARNING in qrtr_tun_write_iter [1] when write_iter length exceeds KMALLOC_MAX_SIZE causing order >= MAX_ORDER condition. Additionally, there is no check for 0 length write. [1] WARNING: mm/page_alloc.c:5011 [..] Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc include/linux/slab.h:557 [inline] kzalloc include/linux/slab.h:682 [inline] qrtr_tun_write_iter+0x8a/0x180 net/qrtr/tun.c:83 call_write_iter include/linux/fs.h:1901 [inline] Reported-by: syzbot+c2a7e5c5211605a90865@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Link: https://lore.kernel.org/r/20210202092059.1361381-1-snovitoll@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/qrtr/tun.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index 15ce9b642b25..b238c40a9984 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -80,6 +80,12 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; void *kbuf; + if (!len) + return -EINVAL; + + if (len > KMALLOC_MAX_SIZE) + return -ENOMEM; + kbuf = kzalloc(len, GFP_KERNEL); if (!kbuf) return -ENOMEM; From e5c376c41a5798451355fb2e0732847be4a64c58 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 12 Nov 2020 11:31:55 +0100 Subject: [PATCH 101/103] ovl: expand warning in ovl_d_real() commit cef4cbff06fbc3be54d6d79ee139edecc2ee8598 upstream. There was a syzbot report with this warning but insufficient information... Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d23177a53c95..50529a4e7bf3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -79,7 +79,7 @@ static void ovl_dentry_release(struct dentry *dentry) static struct dentry *ovl_d_real(struct dentry *dentry, const struct inode *inode) { - struct dentry *real; + struct dentry *real = NULL, *lower; /* It's an overlay file */ if (inode && d_inode(dentry) == inode) @@ -98,9 +98,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (real && !inode && ovl_has_upperdata(d_inode(dentry))) return real; - real = ovl_dentry_lowerdata(dentry); - if (!real) + lower = ovl_dentry_lowerdata(dentry); + if (!lower) goto bug; + real = lower; /* Handle recursion */ real = d_real(real, inode); @@ -108,8 +109,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry, if (!inode || inode == d_inode(real)) return real; bug: - WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, - inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); + WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", + __func__, dentry, inode ? inode->i_sb->s_id : "NULL", + inode ? inode->i_ino : 0, real, + real && d_inode(real) ? d_inode(real)->i_ino : 0); return dentry; } From 90ac1981acfaf57f8976e4af8b12516532992f04 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 16 Oct 2020 15:57:45 +0200 Subject: [PATCH 102/103] kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq commit aee9ddb1d3718d3ba05b50c51622d7792ae749c9 upstream. Currently there's a KCOV remote coverage collection section in __usb_hcd_giveback_urb(). Initially that section was added based on the assumption that usb_hcd_giveback_urb() can only be called in interrupt context as indicated by a comment before it. This is what happens when syzkaller is fuzzing the USB stack via the dummy_hcd driver. As it turns out, it's actually valid to call usb_hcd_giveback_urb() in task context, provided that the caller turned off the interrupts; USB/IP does exactly that. This can lead to a nested KCOV remote coverage collection sections both trying to collect coverage in task context. This isn't supported by KCOV, and leads to a WARNING. Change __usb_hcd_giveback_urb() to only call kcov_remote_*() callbacks when it's being executed in a softirq. As the result, the coverage from USB/IP related usb_hcd_giveback_urb() calls won't be collected, but the WARNING is fixed. A potential future improvement would be to support nested remote coverage collection sections, but this patch doesn't address that. Reviewed-by: Dmitry Vyukov Acked-by: Marco Elver Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/f3a7a153f0719cb53ec385b16e912798bd3e4cf9.1602856358.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 2c6b9578a7d3..99908d8d2dd3 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1646,9 +1646,16 @@ static void __usb_hcd_giveback_urb(struct urb *urb) /* pass ownership to the completion handler */ urb->status = status; - kcov_remote_start_usb((u64)urb->dev->bus->busnum); + /* + * This function can be called in task context inside another remote + * coverage collection section, but KCOV doesn't support that kind of + * recursion yet. Only collect coverage in softirq context for now. + */ + if (in_serving_softirq()) + kcov_remote_start_usb((u64)urb->dev->bus->busnum); urb->complete(urb); - kcov_remote_stop(); + if (in_serving_softirq()) + kcov_remote_stop(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); From 13b6016e96f628ac1cfb3c0b342911fd91c9c005 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Feb 2021 11:02:30 +0100 Subject: [PATCH 103/103] Linux 5.10.17 Tested-by: Pavel Machek (CIP) Tested-by: Jason Self Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Shuah Khan Tested-by: Ross Schmidt Link: https://lore.kernel.org/r/20210215152719.459796636@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9a1f26680d83..b740f9c933cb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 16 +SUBLEVEL = 17 EXTRAVERSION = NAME = Kleptomaniac Octopus