From 8e635da0e0d3cb45e32fa79b36218fb98281bc10 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 5 Jul 2023 13:19:37 +0200 Subject: [PATCH 001/203] KVM: s390: pv: fix index value of replaced ASCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c2fceb59bbda16468bda82b002383bff59de89ab ] The index field of the struct page corresponding to a guest ASCE should be 0. When replacing the ASCE in s390_replace_asce(), the index of the new ASCE should also be set to 0. Having the wrong index might lead to the wrong addresses being passed around when notifying pte invalidations, and eventually to validity intercepts (VM crash) if the prefix gets unmapped and the notifier gets called with the wrong address. Reviewed-by: Philippe Mathieu-Daudé Fixes: faa2f72cb356 ("KVM: s390: pv: leak the topmost page table when destroy fails") Reviewed-by: Janosch Frank Signed-off-by: Claudio Imbrenda Message-ID: <20230705111937.33472-3-imbrenda@linux.ibm.com> Signed-off-by: Sasha Levin --- arch/s390/mm/gmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 03e561608eed..b5a60fbb9664 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2786,6 +2786,7 @@ int s390_replace_asce(struct gmap *gmap) page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; + page->index = 0; table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); From 22786d53817d1452f30373807f8202bb9bf43732 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 18 Jul 2023 13:56:07 +0200 Subject: [PATCH 002/203] io_uring: don't audit the capability check in io_uring_create() [ Upstream commit 6adc2272aaaf84f34b652cf77f770c6fcc4b8336 ] The check being unconditional may lead to unwanted denials reported by LSMs when a process has the capability granted by DAC, but denied by an LSM. In the case of SELinux such denials are a problem, since they can't be effectively filtered out via the policy and when not silenced, they produce noise that may hide a true problem or an attack. Since not having the capability merely means that the created io_uring context will be accounted against the current user's RLIMIT_MEMLOCK limit, we can disable auditing of denials for this check by using ns_capable_noaudit() instead of capable(). Fixes: 2b188cc1bb85 ("Add io_uring IO interface") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2193317 Signed-off-by: Ondrej Mosnacek Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/20230718115607.65652-1-omosnace@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 51e6ebe72caf..f84584b762d0 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -10431,7 +10431,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, if (!ctx) return -ENOMEM; ctx->compat = in_compat_syscall(); - if (!capable(CAP_IPC_LOCK)) + if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK)) ctx->user = get_uid(current_user()); /* From 3069fc0326b823c5595cb066d2cc8b3b00c09f15 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 10 Jul 2023 14:34:25 +0200 Subject: [PATCH 003/203] gpio: tps68470: Make tps68470_gpio_output() always set the initial value [ Upstream commit 5a7adc6c1069ce31ef4f606ae9c05592c80a6ab5 ] Make tps68470_gpio_output() call tps68470_gpio_set() for output-only pins too, so that the initial value passed to gpiod_direction_output() is honored for these pins too. Fixes: 275b13a65547 ("gpio: Add support for TPS68470 GPIOs") Reviewed-by: Andy Shevchenko Reviewed-by: Daniel Scally Tested-by: Daniel Scally Reviewed-by: Sakari Ailus Signed-off-by: Hans de Goede Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-tps68470.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c index f7f5f770e0fb..e19eb7c982a1 100644 --- a/drivers/gpio/gpio-tps68470.c +++ b/drivers/gpio/gpio-tps68470.c @@ -91,13 +91,13 @@ static int tps68470_gpio_output(struct gpio_chip *gc, unsigned int offset, struct tps68470_gpio_data *tps68470_gpio = gpiochip_get_data(gc); struct regmap *regmap = tps68470_gpio->tps68470_regmap; + /* Set the initial value */ + tps68470_gpio_set(gc, offset, value); + /* rest are always outputs */ if (offset >= TPS68470_N_REGULAR_GPIO) return 0; - /* Set the initial value */ - tps68470_gpio_set(gc, offset, value); - return regmap_update_bits(regmap, TPS68470_GPIO_CTL_REG_A(offset), TPS68470_GPIO_MODE_MASK, TPS68470_GPIO_MODE_OUT_CMOS); From 52403c3dad2030d669471d3d779be7c5b91f7ab0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Jun 2023 17:21:50 +0100 Subject: [PATCH 004/203] btrfs: fix race between quota disable and relocation [ Upstream commit 8a4a0b2a3eaf75ca8854f856ef29690c12b2f531 ] If we disable quotas while we have a relocation of a metadata block group that has extents belonging to the quota root, we can cause the relocation to fail with -ENOENT. This is because relocation builds backref nodes for extents of the quota root and later needs to walk the backrefs and access the quota root - however if in between a task disables quotas, it results in deleting the quota root from the root tree (with btrfs_del_root(), called from btrfs_quota_disable(). This can be sporadically triggered by test case btrfs/255 from fstests: $ ./check btrfs/255 FSTYP -- btrfs PLATFORM -- Linux/x86_64 debian0 6.4.0-rc6-btrfs-next-134+ #1 SMP PREEMPT_DYNAMIC Thu Jun 15 11:59:28 WEST 2023 MKFS_OPTIONS -- /dev/sdc MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1 btrfs/255 6s ... _check_dmesg: something found in dmesg (see /home/fdmanana/git/hub/xfstests/results//btrfs/255.dmesg) - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/255.out.bad) # --- tests/btrfs/255.out 2023-03-02 21:47:53.876609426 +0000 # +++ /home/fdmanana/git/hub/xfstests/results//btrfs/255.out.bad 2023-06-16 10:20:39.267563212 +0100 # @@ -1,2 +1,4 @@ # QA output created by 255 # +ERROR: error during balancing '/home/fdmanana/btrfs-tests/scratch_1': No such file or directory # +There may be more info in syslog - try dmesg | tail # Silence is golden # ... (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/255.out /home/fdmanana/git/hub/xfstests/results//btrfs/255.out.bad' to see the entire diff) Ran: btrfs/255 Failures: btrfs/255 Failed 1 of 1 tests To fix this make the quota disable operation take the cleaner mutex, as relocation of a block group also takes this mutex. This is also what we do when deleting a subvolume/snapshot, we take the cleaner mutex in the cleaner kthread (at cleaner_kthread()) and then we call btrfs_del_root() at btrfs_drop_snapshot() while under the protection of the cleaner mutex. Fixes: bed92eae26cc ("Btrfs: qgroup implementation and prototypes") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 828a7ff4aebe..a67323c2d41f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1202,12 +1202,23 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) int ret = 0; /* - * We need to have subvol_sem write locked, to prevent races between - * concurrent tasks trying to disable quotas, because we will unlock - * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. + * We need to have subvol_sem write locked to prevent races with + * snapshot creation. */ lockdep_assert_held_write(&fs_info->subvol_sem); + /* + * Lock the cleaner mutex to prevent races with concurrent relocation, + * because relocation may be building backrefs for blocks of the quota + * root while we are deleting the root. This is like dropping fs roots + * of deleted snapshots/subvolumes, we need the same protection. + * + * This also prevents races between concurrent tasks trying to disable + * quotas, because we will unlock and relock qgroup_ioctl_lock across + * BTRFS_FS_QUOTA_ENABLED changes. + */ + mutex_lock(&fs_info->cleaner_mutex); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; @@ -1287,6 +1298,7 @@ out: btrfs_end_transaction(trans); else if (trans) ret = btrfs_end_transaction(trans); + mutex_unlock(&fs_info->cleaner_mutex); return ret; } From 52df40a5c71ed4bd1c6c83398c1f7d8841a9e159 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 8 Jun 2023 11:27:38 +0100 Subject: [PATCH 005/203] btrfs: fix extent buffer leak after tree mod log failure at split_node() [ Upstream commit ede600e497b1461d06d22a7d17703d9096868bc3 ] At split_node(), if we fail to log the tree mod log copy operation, we return without unlocking the split extent buffer we just allocated and without decrementing the reference we own on it. Fix this by unlocking it and decrementing the ref count before returning. Fixes: 5de865eebb83 ("Btrfs: fix tree mod logging") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 41a7ace9998e..814f2f07e74c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3589,6 +3589,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); if (ret) { + btrfs_tree_unlock(split); + free_extent_buffer(split); btrfs_abort_transaction(trans, ret); return ret; } From 41e90f0e50f55390c505d8a5dff64f4a1a46d76a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 2 Feb 2018 17:24:57 +0100 Subject: [PATCH 006/203] i2c: Delete error messages for failed memory allocations [ Upstream commit 6b3b21a8542fd2fb6ffc61bc13b9419f0c58ebad ] These issues were detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Wolfram Sang Stable-dep-of: 05f933d5f731 ("i2c: nomadik: Remove a useless call in the remove function") Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-ibm_iic.c | 4 +--- drivers/i2c/busses/i2c-nomadik.c | 1 - drivers/i2c/busses/i2c-sh7760.c | 1 - drivers/i2c/busses/i2c-tiny-usb.c | 4 +--- 4 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index 9f71daf6db64..c073f5b8833a 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -694,10 +694,8 @@ static int iic_probe(struct platform_device *ofdev) int ret; dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - dev_err(&ofdev->dev, "failed to allocate device data\n"); + if (!dev) return -ENOMEM; - } platform_set_drvdata(ofdev, dev); diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index a3363b20f168..b456e4ae8830 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -972,7 +972,6 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) dev = devm_kzalloc(&adev->dev, sizeof(struct nmk_i2c_dev), GFP_KERNEL); if (!dev) { - dev_err(&adev->dev, "cannot allocate memory\n"); ret = -ENOMEM; goto err_no_mem; } diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index 319d1fa617c8..a0ccc5d00987 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -445,7 +445,6 @@ static int sh7760_i2c_probe(struct platform_device *pdev) id = kzalloc(sizeof(struct cami2c), GFP_KERNEL); if (!id) { - dev_err(&pdev->dev, "no mem for private data\n"); ret = -ENOMEM; goto out0; } diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index 7279ca0eaa2d..d1fa9ff5aeab 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -226,10 +226,8 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface, /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (dev == NULL) { - dev_err(&interface->dev, "Out of memory\n"); + if (!dev) goto error; - } dev->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dev->interface = interface; From 04b114067849af42233c9d67c1bf28ab74c40e51 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 2 Feb 2018 14:50:09 +0100 Subject: [PATCH 007/203] i2c: Improve size determinations [ Upstream commit 06e989578232da33a7fe96b04191b862af8b2cec ] Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Wolfram Sang Stable-dep-of: 05f933d5f731 ("i2c: nomadik: Remove a useless call in the remove function") Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-nomadik.c | 2 +- drivers/i2c/busses/i2c-sh7760.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index b456e4ae8830..4eb087575d96 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -970,7 +970,7 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) struct i2c_vendor_data *vendor = id->data; u32 max_fifo_threshold = (vendor->fifodepth / 2) - 1; - dev = devm_kzalloc(&adev->dev, sizeof(struct nmk_i2c_dev), GFP_KERNEL); + dev = devm_kzalloc(&adev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) { ret = -ENOMEM; goto err_no_mem; diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index a0ccc5d00987..051b904cb35f 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -443,7 +443,7 @@ static int sh7760_i2c_probe(struct platform_device *pdev) goto out0; } - id = kzalloc(sizeof(struct cami2c), GFP_KERNEL); + id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { ret = -ENOMEM; goto out0; From ec954a4ab0c8d8cb39a4b3bfcd1be475a1605de4 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Sun, 11 Jun 2023 03:36:59 +0200 Subject: [PATCH 008/203] i2c: nomadik: Remove unnecessary goto label [ Upstream commit 1c5d33fff0d375e4ab7c4261dc62a286babbb4c6 ] The err_no_mem goto label doesn't do anything. Remove it. Signed-off-by: Andi Shyti Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang Stable-dep-of: 05f933d5f731 ("i2c: nomadik: Remove a useless call in the remove function") Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-nomadik.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index 4eb087575d96..03a08fa679bb 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -971,10 +971,9 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) u32 max_fifo_threshold = (vendor->fifodepth / 2) - 1; dev = devm_kzalloc(&adev->dev, sizeof(*dev), GFP_KERNEL); - if (!dev) { - ret = -ENOMEM; - goto err_no_mem; - } + if (!dev) + return -ENOMEM; + dev->vendor = vendor; dev->adev = adev; nmk_i2c_of_probe(np, dev); @@ -995,30 +994,27 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) dev->virtbase = devm_ioremap(&adev->dev, adev->res.start, resource_size(&adev->res)); - if (!dev->virtbase) { - ret = -ENOMEM; - goto err_no_mem; - } + if (!dev->virtbase) + return -ENOMEM; dev->irq = adev->irq[0]; ret = devm_request_irq(&adev->dev, dev->irq, i2c_irq_handler, 0, DRIVER_NAME, dev); if (ret) { dev_err(&adev->dev, "cannot claim the irq %d\n", dev->irq); - goto err_no_mem; + return ret; } dev->clk = devm_clk_get(&adev->dev, NULL); if (IS_ERR(dev->clk)) { dev_err(&adev->dev, "could not get i2c clock\n"); - ret = PTR_ERR(dev->clk); - goto err_no_mem; + return PTR_ERR(dev->clk); } ret = clk_prepare_enable(dev->clk); if (ret) { dev_err(&adev->dev, "can't prepare_enable clock\n"); - goto err_no_mem; + return ret; } init_hw(dev); @@ -1049,7 +1045,6 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) err_no_adap: clk_disable_unprepare(dev->clk); - err_no_mem: return ret; } From a7ab5d7943b55aa4adc7eb1bed71dde7accb7b18 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Sun, 11 Jun 2023 03:37:00 +0200 Subject: [PATCH 009/203] i2c: nomadik: Use devm_clk_get_enabled() [ Upstream commit 9c7174db4cdd111e10d19eed5c36fd978a14c8a2 ] Replace the pair of functions, devm_clk_get() and clk_prepare_enable(), with a single function devm_clk_get_enabled(). Signed-off-by: Andi Shyti Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang Stable-dep-of: 05f933d5f731 ("i2c: nomadik: Remove a useless call in the remove function") Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-nomadik.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index 03a08fa679bb..344d1daa4ea0 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -1005,18 +1005,12 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) return ret; } - dev->clk = devm_clk_get(&adev->dev, NULL); + dev->clk = devm_clk_get_enabled(&adev->dev, NULL); if (IS_ERR(dev->clk)) { - dev_err(&adev->dev, "could not get i2c clock\n"); + dev_err(&adev->dev, "could enable i2c clock\n"); return PTR_ERR(dev->clk); } - ret = clk_prepare_enable(dev->clk); - if (ret) { - dev_err(&adev->dev, "can't prepare_enable clock\n"); - return ret; - } - init_hw(dev); adap = &dev->adap; @@ -1037,16 +1031,11 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) ret = i2c_add_adapter(adap); if (ret) - goto err_no_adap; + return ret; pm_runtime_put(&adev->dev); return 0; - - err_no_adap: - clk_disable_unprepare(dev->clk); - - return ret; } static void nmk_i2c_remove(struct amba_device *adev) @@ -1060,7 +1049,6 @@ static void nmk_i2c_remove(struct amba_device *adev) clear_all_interrupts(dev); /* disable the controller */ i2c_clr_bit(dev->virtbase + I2C_CR, I2C_CR_PE); - clk_disable_unprepare(dev->clk); release_mem_region(res->start, resource_size(res)); } From 9fd349c8858e151405749c44a714ce8726cc666a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 4 Jul 2023 21:50:28 +0200 Subject: [PATCH 010/203] i2c: nomadik: Remove a useless call in the remove function [ Upstream commit 05f933d5f7318b03ff2028c1704dc867ac16f2c7 ] Since commit 235602146ec9 ("i2c-nomadik: turn the platform driver to an amba driver"), there is no more request_mem_region() call in this driver. So remove the release_mem_region() call from the remove function which is likely a left over. Fixes: 235602146ec9 ("i2c-nomadik: turn the platform driver to an amba driver") Cc: # v3.6+ Acked-by: Linus Walleij Reviewed-by: Andi Shyti Signed-off-by: Christophe JAILLET Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-nomadik.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index 344d1daa4ea0..a06c4b76894a 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -1040,7 +1040,6 @@ static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) static void nmk_i2c_remove(struct amba_device *adev) { - struct resource *res = &adev->res; struct nmk_i2c_dev *dev = amba_get_drvdata(adev); i2c_del_adapter(&dev->adap); @@ -1049,7 +1048,6 @@ static void nmk_i2c_remove(struct amba_device *adev) clear_all_interrupts(dev); /* disable the controller */ i2c_clr_bit(dev->virtbase + I2C_CR, I2C_CR_PE); - release_mem_region(res->start, resource_size(res)); } static struct i2c_vendor_data vendor_stn8815 = { From ffc0ee491f048043d60c1cef0001a02cdebb805d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 20 Jun 2023 14:44:55 -0500 Subject: [PATCH 011/203] PCI/ASPM: Return 0 or -ETIMEDOUT from pcie_retrain_link() [ Upstream commit f5297a01ee805d7fa569d288ed65fc0f9ac9b03d ] "pcie_retrain_link" is not a question with a true/false answer, so "bool" isn't quite the right return type. Return 0 for success or -ETIMEDOUT if the retrain failed. No functional change intended. [bhelgaas: based on Ilpo's patch below] Link: https://lore.kernel.org/r/20230502083923.34562-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Bjorn Helgaas Stable-dep-of: e7e39756363a ("PCI/ASPM: Avoid link retraining race") Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 51da8ba67d21..166cb0077023 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -192,7 +192,7 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) link->clkpm_disable = blacklist ? 1 : 0; } -static bool pcie_retrain_link(struct pcie_link_state *link) +static int pcie_retrain_link(struct pcie_link_state *link) { struct pci_dev *parent = link->pdev; unsigned long end_jiffies; @@ -219,7 +219,9 @@ static bool pcie_retrain_link(struct pcie_link_state *link) break; msleep(1); } while (time_before(jiffies, end_jiffies)); - return !(reg16 & PCI_EXP_LNKSTA_LT); + if (reg16 & PCI_EXP_LNKSTA_LT) + return -ETIMEDOUT; + return 0; } /* @@ -288,15 +290,15 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link) reg16 &= ~PCI_EXP_LNKCTL_CCC; pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); - if (pcie_retrain_link(link)) - return; + if (pcie_retrain_link(link)) { - /* Training failed. Restore common clock configurations */ - pci_err(parent, "ASPM: Could not configure common clock\n"); - list_for_each_entry(child, &linkbus->devices, bus_list) - pcie_capability_write_word(child, PCI_EXP_LNKCTL, + /* Training failed. Restore common clock configurations */ + pci_err(parent, "ASPM: Could not configure common clock\n"); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_reg[PCI_FUNC(child->devfn)]); - pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); + } } /* Convert L0s latency encoding to ns */ From 9f283ca643ddce6d7580780d7421ec80db59f9c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 20 Jun 2023 14:49:33 -0500 Subject: [PATCH 012/203] PCI/ASPM: Factor out pcie_wait_for_retrain() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9c7f136433d26592cb4d9cd00b4e15c33d9797c6 ] Factor pcie_wait_for_retrain() out from pcie_retrain_link(). No functional change intended. [bhelgaas: split out from https: //lore.kernel.org/r/20230502083923.34562-1-ilpo.jarvinen@linux.intel.com] Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Stable-dep-of: e7e39756363a ("PCI/ASPM: Avoid link retraining race") Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 166cb0077023..927f0a02489c 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -192,10 +192,26 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) link->clkpm_disable = blacklist ? 1 : 0; } +static int pcie_wait_for_retrain(struct pci_dev *pdev) +{ + unsigned long end_jiffies; + u16 reg16; + + /* Wait for Link Training to be cleared by hardware */ + end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT; + do { + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, ®16); + if (!(reg16 & PCI_EXP_LNKSTA_LT)) + return 0; + msleep(1); + } while (time_before(jiffies, end_jiffies)); + + return -ETIMEDOUT; +} + static int pcie_retrain_link(struct pcie_link_state *link) { struct pci_dev *parent = link->pdev; - unsigned long end_jiffies; u16 reg16; pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); @@ -211,17 +227,7 @@ static int pcie_retrain_link(struct pcie_link_state *link) pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); } - /* Wait for link training end. Break out after waiting for timeout */ - end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT; - do { - pcie_capability_read_word(parent, PCI_EXP_LNKSTA, ®16); - if (!(reg16 & PCI_EXP_LNKSTA_LT)) - break; - msleep(1); - } while (time_before(jiffies, end_jiffies)); - if (reg16 & PCI_EXP_LNKSTA_LT) - return -ETIMEDOUT; - return 0; + return pcie_wait_for_retrain(parent); } /* From 838d6e86ec74e1017dd5f9f4ebf97a8ada8f110d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 2 May 2023 11:39:23 +0300 Subject: [PATCH 013/203] PCI/ASPM: Avoid link retraining race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7e39756363ad5bd83ddeae1063193d0f13870fd ] PCIe r6.0.1, sec 7.5.3.7, recommends setting the link control parameters, then waiting for the Link Training bit to be clear before setting the Retrain Link bit. This avoids a race where the LTSSM may not use the updated parameters if it is already in the midst of link training because of other normal link activity. Wait for the Link Training bit to be clear before toggling the Retrain Link bit to ensure that the LTSSM uses the updated link control parameters. [bhelgaas: commit log, return 0 (success)/-ETIMEDOUT instead of bool for both pcie_wait_for_retrain() and the existing pcie_retrain_link()] Suggested-by: Lukas Wunner Fixes: 7d715a6c1ae5 ("PCI: add PCI Express ASPM support") Link: https://lore.kernel.org/r/20230502083923.34562-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 927f0a02489c..7a3cf8aaec25 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -212,8 +212,19 @@ static int pcie_wait_for_retrain(struct pci_dev *pdev) static int pcie_retrain_link(struct pcie_link_state *link) { struct pci_dev *parent = link->pdev; + int rc; u16 reg16; + /* + * Ensure the updated LNKCTL parameters are used during link + * training by checking that there is no ongoing link training to + * avoid LTSSM race as recommended in Implementation Note at the + * end of PCIe r6.0.1 sec 7.5.3.7. + */ + rc = pcie_wait_for_retrain(parent); + if (rc) + return rc; + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); reg16 |= PCI_EXP_LNKCTL_RL; pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); From 793123331007ac0e96135b81e87c477375376dd1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 4 Apr 2022 16:06:32 -0400 Subject: [PATCH 014/203] dlm: cleanup plock_op vs plock_xop [ Upstream commit bcbb4ba6c9ba81e6975b642a2cade68044cd8a66 ] Lately the different casting between plock_op and plock_xop and list holders which was involved showed some issues which were hard to see. This patch removes the "plock_xop" structure and introduces a "struct plock_async_data". This structure will be set in "struct plock_op" in case of asynchronous lock handling as the original "plock_xop" was made for. There is no need anymore to cast pointers around for additional fields in case of asynchronous lock handling. As disadvantage another allocation was introduces but only needed in the asynchronous case which is currently only used in combination with nfs lockd. Signed-off-by: Alexander Aring Signed-off-by: David Teigland Stable-dep-of: 59e45c758ca1 ("fs: dlm: interrupt posix locks only when process is killed") Signed-off-by: Sasha Levin --- fs/dlm/plock.c | 77 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index edce0b25cd90..e70e23eca03e 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -19,21 +19,21 @@ static struct list_head recv_list; static wait_queue_head_t send_wq; static wait_queue_head_t recv_wq; +struct plock_async_data { + void *fl; + void *file; + struct file_lock flc; + int (*callback)(struct file_lock *fl, int result); +}; + struct plock_op { struct list_head list; int done; struct dlm_plock_info info; - int (*callback)(struct file_lock *fl, int result); + /* if set indicates async handling */ + struct plock_async_data *data; }; -struct plock_xop { - struct plock_op xop; - void *fl; - void *file; - struct file_lock flc; -}; - - static inline void set_version(struct dlm_plock_info *info) { info->version[0] = DLM_PLOCK_VERSION_MAJOR; @@ -58,6 +58,12 @@ static int check_version(struct dlm_plock_info *info) return 0; } +static void dlm_release_plock_op(struct plock_op *op) +{ + kfree(op->data); + kfree(op); +} + static void send_op(struct plock_op *op) { set_version(&op->info); @@ -101,22 +107,21 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number, int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, int cmd, struct file_lock *fl) { + struct plock_async_data *op_data; struct dlm_ls *ls; struct plock_op *op; - struct plock_xop *xop; int rv; ls = dlm_find_lockspace_local(lockspace); if (!ls) return -EINVAL; - xop = kzalloc(sizeof(*xop), GFP_NOFS); - if (!xop) { + op = kzalloc(sizeof(*op), GFP_NOFS); + if (!op) { rv = -ENOMEM; goto out; } - op = &xop->xop; op->info.optype = DLM_PLOCK_OP_LOCK; op->info.pid = fl->fl_pid; op->info.ex = (fl->fl_type == F_WRLCK); @@ -125,22 +130,32 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; + /* async handling */ if (fl->fl_lmops && fl->fl_lmops->lm_grant) { + op_data = kzalloc(sizeof(*op_data), GFP_NOFS); + if (!op_data) { + dlm_release_plock_op(op); + rv = -ENOMEM; + goto out; + } + /* fl_owner is lockd which doesn't distinguish processes on the nfs client */ op->info.owner = (__u64) fl->fl_pid; - op->callback = fl->fl_lmops->lm_grant; - locks_init_lock(&xop->flc); - locks_copy_lock(&xop->flc, fl); - xop->fl = fl; - xop->file = file; + op_data->callback = fl->fl_lmops->lm_grant; + locks_init_lock(&op_data->flc); + locks_copy_lock(&op_data->flc, fl); + op_data->fl = fl; + op_data->file = file; + + op->data = op_data; } else { op->info.owner = (__u64)(long) fl->fl_owner; } send_op(op); - if (!op->callback) { + if (!op->data) { rv = wait_event_interruptible(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { log_debug(ls, "dlm_posix_lock: wait killed %llx", @@ -148,7 +163,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, spin_lock(&ops_lock); list_del(&op->list); spin_unlock(&ops_lock); - kfree(xop); + dlm_release_plock_op(op); do_unlock_close(ls, number, file, fl); goto out; } @@ -173,7 +188,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, (unsigned long long)number); } - kfree(xop); + dlm_release_plock_op(op); out: dlm_put_lockspace(ls); return rv; @@ -183,11 +198,11 @@ EXPORT_SYMBOL_GPL(dlm_posix_lock); /* Returns failure iff a successful lock operation should be canceled */ static int dlm_plock_callback(struct plock_op *op) { + struct plock_async_data *op_data = op->data; struct file *file; struct file_lock *fl; struct file_lock *flc; int (*notify)(struct file_lock *fl, int result) = NULL; - struct plock_xop *xop = (struct plock_xop *)op; int rv = 0; spin_lock(&ops_lock); @@ -199,10 +214,10 @@ static int dlm_plock_callback(struct plock_op *op) spin_unlock(&ops_lock); /* check if the following 2 are still valid or make a copy */ - file = xop->file; - flc = &xop->flc; - fl = xop->fl; - notify = op->callback; + file = op_data->file; + flc = &op_data->flc; + fl = op_data->fl; + notify = op_data->callback; if (op->info.rv) { notify(fl, op->info.rv); @@ -233,7 +248,7 @@ static int dlm_plock_callback(struct plock_op *op) } out: - kfree(xop); + dlm_release_plock_op(op); return rv; } @@ -303,7 +318,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, rv = 0; out_free: - kfree(op); + dlm_release_plock_op(op); out: dlm_put_lockspace(ls); fl->fl_flags = fl_flags; @@ -371,7 +386,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, rv = 0; } - kfree(op); + dlm_release_plock_op(op); out: dlm_put_lockspace(ls); return rv; @@ -407,7 +422,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, (the process did not make an unlock call). */ if (op->info.flags & DLM_PLOCK_FL_CLOSE) - kfree(op); + dlm_release_plock_op(op); if (copy_to_user(u, &info, sizeof(info))) return -EFAULT; @@ -439,7 +454,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, op->info.owner == info.owner) { list_del_init(&op->list); memcpy(&op->info, &info, sizeof(info)); - if (op->callback) + if (op->data) do_callback = 1; else op->done = 1; From 0f6741acfd5bd221bc5ca2abcfbbd225b103afdb Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 4 Apr 2022 16:06:33 -0400 Subject: [PATCH 015/203] dlm: rearrange async condition return [ Upstream commit a800ba77fd285c6391a82819867ac64e9ab3af46 ] This patch moves the return of FILE_LOCK_DEFERRED a little bit earlier than checking afterwards again if the request was an asynchronous request. Signed-off-by: Alexander Aring Signed-off-by: David Teigland Stable-dep-of: 59e45c758ca1 ("fs: dlm: interrupt posix locks only when process is killed") Signed-off-by: Sasha Levin --- fs/dlm/plock.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index e70e23eca03e..01fb7d8c0bca 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -149,26 +149,25 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op_data->file = file; op->data = op_data; + + send_op(op); + rv = FILE_LOCK_DEFERRED; + goto out; } else { op->info.owner = (__u64)(long) fl->fl_owner; } send_op(op); - if (!op->data) { - rv = wait_event_interruptible(recv_wq, (op->done != 0)); - if (rv == -ERESTARTSYS) { - log_debug(ls, "dlm_posix_lock: wait killed %llx", - (unsigned long long)number); - spin_lock(&ops_lock); - list_del(&op->list); - spin_unlock(&ops_lock); - dlm_release_plock_op(op); - do_unlock_close(ls, number, file, fl); - goto out; - } - } else { - rv = FILE_LOCK_DEFERRED; + rv = wait_event_interruptible(recv_wq, (op->done != 0)); + if (rv == -ERESTARTSYS) { + log_debug(ls, "%s: wait killed %llx", __func__, + (unsigned long long)number); + spin_lock(&ops_lock); + list_del(&op->list); + spin_unlock(&ops_lock); + dlm_release_plock_op(op); + do_unlock_close(ls, number, file, fl); goto out; } From 1815d9bf02b7f00a1490f24e6a3d5000e4fb240b Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 19 May 2023 11:21:26 -0400 Subject: [PATCH 016/203] fs: dlm: interrupt posix locks only when process is killed [ Upstream commit 59e45c758ca1b9893ac923dd63536da946ac333b ] If a posix lock request is waiting for a result from user space (dlm_controld), do not let it be interrupted unless the process is killed. This reverts commit a6b1533e9a57 ("dlm: make posix locks interruptible"). The problem with the interruptible change is that all locks were cleared on any signal interrupt. If a signal was received that did not terminate the process, the process could continue running after all its dlm posix locks had been cleared. A future patch will add cancelation to allow proper interruption. Cc: stable@vger.kernel.org Fixes: a6b1533e9a57 ("dlm: make posix locks interruptible") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Sasha Levin --- fs/dlm/plock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 01fb7d8c0bca..f3482e936cc2 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -159,7 +159,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); - rv = wait_event_interruptible(recv_wq, (op->done != 0)); + rv = wait_event_killable(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { log_debug(ls, "%s: wait killed %llx", __func__, (unsigned long long)number); From 5f6d5b58c59e6ded30f60e899b8c8bae80b1c5a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 21 Sep 2020 13:05:54 +0200 Subject: [PATCH 017/203] drm/ttm: add ttm_bo_pin()/ttm_bo_unpin() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit deb0814b43f370a448a498409d949e38c9d8f02e ] As an alternative to the placement flag add a pin count to the ttm buffer object. v2: add dma_resv_assert_help() calls Signed-off-by: Christian König Reviewed-by: Dave Airlie Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/391596/?series=81973&rev=1 Stable-dep-of: a2848d08742c ("drm/ttm: never consider pinned BOs for eviction&swap") Signed-off-by: Sasha Levin --- drivers/gpu/drm/ttm/ttm_bo.c | 9 ++++++--- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- include/drm/ttm/ttm_bo_api.h | 26 ++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f673292eec9d..9a05caec3c99 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -115,7 +115,7 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, struct ttm_bo_device *bdev = bo->bdev; struct ttm_resource_manager *man; - if (!list_empty(&bo->lru)) + if (!list_empty(&bo->lru) || bo->pin_count) return; if (mem->placement & TTM_PL_FLAG_NO_EVICT) @@ -165,7 +165,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_del_from_lru(bo); ttm_bo_add_mem_to_lru(bo, &bo->mem); - if (bulk && !(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) { + if (bulk && !(bo->mem.placement & TTM_PL_FLAG_NO_EVICT) && + !bo->pin_count) { switch (bo->mem.mem_type) { case TTM_PL_TT: ttm_bo_bulk_move_set_pos(&bulk->tt[bo->priority], bo); @@ -544,8 +545,9 @@ static void ttm_bo_release(struct kref *kref) * shrinkers, now that they are queued for * destruction. */ - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) { + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT || bo->pin_count) { bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT; + bo->pin_count = 0; ttm_bo_del_from_lru(bo); ttm_bo_add_mem_to_lru(bo, &bo->mem); } @@ -1174,6 +1176,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, bo->moving = NULL; bo->mem.placement = TTM_PL_FLAG_CACHED; bo->acc_size = acc_size; + bo->pin_count = 0; bo->sg = sg; if (resv) { bo->base.resv = resv; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index fb2a25f8408f..1968df9743fc 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -352,7 +352,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, return -ENOMEM; fbo->base = *bo; - fbo->base.mem.placement |= TTM_PL_FLAG_NO_EVICT; ttm_bo_get(bo); fbo->bo = bo; @@ -372,6 +371,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, kref_init(&fbo->base.kref); fbo->base.destroy = &ttm_transfered_destroy; fbo->base.acc_size = 0; + fbo->base.pin_count = 1; if (bo->type != ttm_bo_type_sg) fbo->base.base.resv = &fbo->base.base._resv; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 0f7cd21d6d74..33aca60870e2 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -157,6 +157,7 @@ struct ttm_buffer_object { struct dma_fence *moving; unsigned priority; + unsigned pin_count; /** * Special members that are protected by the reserve lock @@ -606,6 +607,31 @@ static inline bool ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object *bo) return bo->base.dev != NULL; } +/** + * ttm_bo_pin - Pin the buffer object. + * @bo: The buffer object to pin + * + * Make sure the buffer is not evicted any more during memory pressure. + */ +static inline void ttm_bo_pin(struct ttm_buffer_object *bo) +{ + dma_resv_assert_held(bo->base.resv); + ++bo->pin_count; +} + +/** + * ttm_bo_unpin - Unpin the buffer object. + * @bo: The buffer object to unpin + * + * Allows the buffer object to be evicted again during memory pressure. + */ +static inline void ttm_bo_unpin(struct ttm_buffer_object *bo) +{ + dma_resv_assert_held(bo->base.resv); + WARN_ON_ONCE(!bo->pin_count); + --bo->pin_count; +} + int ttm_mem_evict_first(struct ttm_bo_device *bdev, struct ttm_resource_manager *man, const struct ttm_place *place, From 8996b13051f0c211aaaf243dfd270003f1b67431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 7 Jul 2023 11:25:00 +0200 Subject: [PATCH 018/203] drm/ttm: never consider pinned BOs for eviction&swap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a2848d08742c8e8494675892c02c0d22acbe3cf8 ] There is a small window where we have already incremented the pin count but not yet moved the bo from the lru to the pinned list. Signed-off-by: Christian König Reported-by: Pelloux-Prayer, Pierre-Eric Tested-by: Pelloux-Prayer, Pierre-Eric Acked-by: Alex Deucher Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230707120826.3701-1-christian.koenig@amd.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/ttm/ttm_bo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9a05caec3c99..dca4dfdd332d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -672,6 +672,12 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, { bool ret = false; + if (bo->pin_count) { + *locked = false; + *busy = false; + return false; + } + if (bo->base.resv == ctx->resv) { dma_resv_assert_held(bo->base.resv); if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT) From 840ce9cfc86f89c335625ec297acc0375f82e19b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 15 Oct 2020 23:55:07 +0900 Subject: [PATCH 019/203] tracing: Show real address for trace event arguments [ Upstream commit efbbdaa22bb78761bff8dfdde027ad04bedd47ce ] To help debugging kernel, show real address for trace event arguments in tracefs/trace{,pipe} instead of hashed pointer value. Since ftrace human-readable format uses vsprintf(), all %p are translated to hash values instead of pointer address. However, when debugging the kernel, raw address value gives a hint when comparing with the memory mapping in the kernel. (Those are sometimes used with crash log, which is not hashed too) So converting %p with %px when calling trace_seq_printf(). Moreover, this is not improving the security because the tracefs can be used only by root user and the raw address values are readable from tracefs/percpu/cpu*/trace_pipe_raw file. Link: https://lkml.kernel.org/r/160277370703.29307.5134475491761971203.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Stable-dep-of: d5a821896360 ("tracing: Fix memory leak of iter->temp when reading trace_pipe") Signed-off-by: Sasha Levin --- include/linux/trace_events.h | 4 ++ include/trace/trace_events.h | 2 +- kernel/trace/trace.c | 71 +++++++++++++++++++++++++++++++++++- kernel/trace/trace.h | 2 + kernel/trace/trace_output.c | 12 +++++- 5 files changed, 88 insertions(+), 3 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index c57b79301a75..e418065c2c90 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -55,6 +55,8 @@ struct trace_event; int trace_raw_output_prep(struct trace_iterator *iter, struct trace_event *event); +extern __printf(2, 3) +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); /* * The trace entry - the most basic unit of tracing. This is what @@ -87,6 +89,8 @@ struct trace_iterator { unsigned long iter_flags; void *temp; /* temp holder */ unsigned int temp_size; + char *fmt; /* modified format holder */ + unsigned int fmt_size; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 717d388ecbd6..29917bce6dbc 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -364,7 +364,7 @@ trace_raw_output_##call(struct trace_iterator *iter, int flags, \ if (ret != TRACE_TYPE_HANDLED) \ return ret; \ \ - trace_seq_printf(s, print); \ + trace_event_printf(iter, print); \ \ return trace_handle_return(s); \ } \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7867fc39c4fc..bed792f06575 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3582,6 +3582,62 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, return next; } +#define STATIC_FMT_BUF_SIZE 128 +static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; + +static char *trace_iter_expand_format(struct trace_iterator *iter) +{ + char *tmp; + + if (iter->fmt == static_fmt_buf) + return NULL; + + tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, + GFP_KERNEL); + if (tmp) { + iter->fmt_size += STATIC_FMT_BUF_SIZE; + iter->fmt = tmp; + } + + return tmp; +} + +const char *trace_event_format(struct trace_iterator *iter, const char *fmt) +{ + const char *p, *new_fmt; + char *q; + + if (WARN_ON_ONCE(!fmt)) + return fmt; + + p = fmt; + new_fmt = q = iter->fmt; + while (*p) { + if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { + if (!trace_iter_expand_format(iter)) + return fmt; + + q += iter->fmt - new_fmt; + new_fmt = iter->fmt; + } + + *q++ = *p++; + + /* Replace %p with %px */ + if (p[-1] == '%') { + if (p[0] == '%') { + *q++ = *p++; + } else if (p[0] == 'p' && !isalnum(p[1])) { + *q++ = *p++; + *q++ = 'x'; + } + } + } + *q = '\0'; + + return new_fmt; +} + #define STATIC_TEMP_BUF_SIZE 128 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); @@ -4368,6 +4424,16 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) if (iter->temp) iter->temp_size = 128; + /* + * trace_event_printf() may need to modify given format + * string to replace %p with %px so that it shows real address + * instead of hash value. However, that is only for the event + * tracing, other tracer may not need. Defer the allocation + * until it is needed. + */ + iter->fmt = NULL; + iter->fmt_size = 0; + /* * We make a copy of the current tracer to avoid concurrent * changes on it while we are reading. @@ -4519,6 +4585,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_destroy(&iter->mutex); free_cpumask_var(iter->started); + kfree(iter->fmt); kfree(iter->temp); kfree(iter->trace); kfree(iter->buffer_iter); @@ -9382,9 +9449,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp */ + /* Can not use kmalloc for iter.temp and iter.fmt */ iter.temp = static_temp_buf; iter.temp_size = STATIC_TEMP_BUF_SIZE; + iter.fmt = static_fmt_buf; + iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e5b505b5b7d0..892b3d2f33b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -758,6 +758,8 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, struct ring_buffer_event *event); +const char *trace_event_format(struct trace_iterator *iter, const char *fmt); + int trace_empty(struct trace_iterator *iter); void *trace_find_next_entry_inc(struct trace_iterator *iter); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b3ee8d9b6b62..94b0991717b6 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -312,13 +312,23 @@ int trace_raw_output_prep(struct trace_iterator *iter, } EXPORT_SYMBOL(trace_raw_output_prep); +void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + trace_seq_vprintf(&iter->seq, trace_event_format(iter, fmt), ap); + va_end(ap); +} +EXPORT_SYMBOL(trace_event_printf); + static int trace_output_raw(struct trace_iterator *iter, char *name, char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; trace_seq_printf(s, "%s: ", name); - trace_seq_vprintf(s, fmt, ap); + trace_seq_vprintf(s, trace_event_format(iter, fmt), ap); return trace_handle_return(s); } From b306d09967caa0f2ec1f63280b175b7be49260f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 8 Nov 2021 14:46:26 +0100 Subject: [PATCH 020/203] pwm: meson: Simplify duplicated per-channel tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f97f18feac9bd5a8163b108aee52d783114b36f ] The driver tracks per-channel data via struct pwm_device::chip_data and struct meson_pwm::channels[]. The latter holds the actual data, the former is only a pointer to the latter. So simplify by using struct meson_pwm::channels[] consistently. Signed-off-by: Uwe Kleine-König Reviewed-by: Martin Blumenstingl Signed-off-by: Thierry Reding Stable-dep-of: 87a2cbf02d77 ("pwm: meson: fix handling of period/duty if greater than UINT_MAX") Signed-off-by: Sasha Levin --- drivers/pwm/pwm-meson.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 0283163ddbe8..d3d9767aeb45 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -147,12 +147,13 @@ static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) return err; } - return pwm_set_chip_data(pwm, channel); + return 0; } static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { - struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + struct meson_pwm *meson = to_meson_pwm(chip); + struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; if (channel) clk_disable_unprepare(channel->clk); @@ -161,7 +162,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, const struct pwm_state *state) { - struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; unsigned int duty, period, pre_div, cnt, duty_cnt; unsigned long fin_freq; @@ -230,7 +231,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, static void meson_pwm_enable(struct meson_pwm *meson, struct pwm_device *pwm) { - struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; struct meson_pwm_channel_data *channel_data; unsigned long flags; u32 value; @@ -273,8 +274,8 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm) static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { - struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); struct meson_pwm *meson = to_meson_pwm(chip); + struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; int err = 0; if (!state) From e3454b438c67d0ec582dd3fbda17fa9ec520ddb7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 24 May 2023 21:48:36 +0200 Subject: [PATCH 021/203] pwm: meson: fix handling of period/duty if greater than UINT_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 87a2cbf02d7701255f9fcca7e5bd864a7bb397cf ] state->period/duty are of type u64, and if their value is greater than UINT_MAX, then the cast to uint will cause problems. Fix this by changing the type of the respective local variables to u64. Fixes: b79c3670e120 ("pwm: meson: Don't duplicate the polarity internally") Cc: stable@vger.kernel.org Suggested-by: Uwe Kleine-König Reviewed-by: Martin Blumenstingl Signed-off-by: Heiner Kallweit Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-meson.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index d3d9767aeb45..5b5fd1671350 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -163,8 +163,9 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, const struct pwm_state *state) { struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; - unsigned int duty, period, pre_div, cnt, duty_cnt; + unsigned int pre_div, cnt, duty_cnt; unsigned long fin_freq; + u64 duty, period; duty = state->duty_cycle; period = state->period; @@ -186,19 +187,19 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq); - pre_div = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * 0xffffLL); + pre_div = div64_u64(fin_freq * period, NSEC_PER_SEC * 0xffffLL); if (pre_div > MISC_CLK_DIV_MASK) { dev_err(meson->chip.dev, "unable to get period pre_div\n"); return -EINVAL; } - cnt = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * (pre_div + 1)); + cnt = div64_u64(fin_freq * period, NSEC_PER_SEC * (pre_div + 1)); if (cnt > 0xffff) { dev_err(meson->chip.dev, "unable to get period cnt\n"); return -EINVAL; } - dev_dbg(meson->chip.dev, "period=%u pre_div=%u cnt=%u\n", period, + dev_dbg(meson->chip.dev, "period=%llu pre_div=%u cnt=%u\n", period, pre_div, cnt); if (duty == period) { @@ -211,14 +212,13 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, channel->lo = cnt; } else { /* Then check is we can have the duty with the same pre_div */ - duty_cnt = div64_u64(fin_freq * (u64)duty, - NSEC_PER_SEC * (pre_div + 1)); + duty_cnt = div64_u64(fin_freq * duty, NSEC_PER_SEC * (pre_div + 1)); if (duty_cnt > 0xffff) { dev_err(meson->chip.dev, "unable to get duty cycle\n"); return -EINVAL; } - dev_dbg(meson->chip.dev, "duty=%u pre_div=%u duty_cnt=%u\n", + dev_dbg(meson->chip.dev, "duty=%llu pre_div=%u duty_cnt=%u\n", duty, pre_div, duty_cnt); channel->pre_div = pre_div; From ed3d841f2fc2c39048b6aeb5c58e59e227b304e9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 Jun 2023 15:32:03 +0800 Subject: [PATCH 022/203] ext4: fix to check return value of freeze_bdev() in ext4_shutdown() [ Upstream commit c4d13222afd8a64bf11bc7ec68645496ee8b54b9 ] freeze_bdev() can fail due to a lot of reasons, it needs to check its reason before later process. Fixes: 783d94854499 ("ext4: add EXT4_IOC_GOINGDOWN ioctl") Cc: stable@kernel.org Signed-off-by: Chao Yu Link: https://lore.kernel.org/r/20230606073203.1310389-1-chao@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1171618f6549..56829507e68c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -612,6 +612,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) { struct ext4_sb_info *sbi = EXT4_SB(sb); __u32 flags; + struct super_block *ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -630,7 +631,9 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) switch (flags) { case EXT4_GOING_FLAGS_DEFAULT: - freeze_bdev(sb->s_bdev); + ret = freeze_bdev(sb->s_bdev); + if (IS_ERR(ret)) + return PTR_ERR(ret); set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); thaw_bdev(sb->s_bdev, sb); break; From ceba255a791b3158938ecab0abcdd12bf83002d6 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 22 Sep 2022 11:12:28 +0000 Subject: [PATCH 023/203] phy: qcom-snps: Use dev_err_probe() to simplify code [ Upstream commit 668dc8afce43d4bc01feb3e929d6d5ffcb14f899 ] In the probe path, dev_err() can be replaced with dev_err_probe() which will check if error code is -EPROBE_DEFER and prints the error name. It also sets the defer probe reason which can be checked later through debugfs. Signed-off-by: Yuan Can Reviewed-by: Dmitry Baryshkov Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20220922111228.36355-8-yuancan@huawei.com Signed-off-by: Vinod Koul Stable-dep-of: 8a0eb8f9b9a0 ("phy: qcom-snps-femto-v2: properly enable ref clock") Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c index 7e61202aa234..54846259405a 100644 --- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c +++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c @@ -304,12 +304,9 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev) return PTR_ERR(hsphy->base); hsphy->ref_clk = devm_clk_get(dev, "ref"); - if (IS_ERR(hsphy->ref_clk)) { - ret = PTR_ERR(hsphy->ref_clk); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get ref clk, %d\n", ret); - return ret; - } + if (IS_ERR(hsphy->ref_clk)) + return dev_err_probe(dev, PTR_ERR(hsphy->ref_clk), + "failed to get ref clk\n"); hsphy->phy_reset = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(hsphy->phy_reset)) { @@ -322,12 +319,9 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev) hsphy->vregs[i].supply = qcom_snps_hsphy_vreg_names[i]; ret = devm_regulator_bulk_get(dev, num, hsphy->vregs); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to get regulator supplies: %d\n", - ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, + "failed to get regulator supplies\n"); pm_runtime_set_active(dev); pm_runtime_enable(dev); From f34090579a8b5d15510dc7098c39d310b74cacdd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 May 2023 16:48:18 +0200 Subject: [PATCH 024/203] phy: qcom-snps: correct struct qcom_snps_hsphy kerneldoc [ Upstream commit 2a881183dc5ab2474ef602e48fe7af34db460d95 ] Update kerneldoc of struct qcom_snps_hsphy to fix: drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c:135: warning: Function parameter or member 'update_seq_cfg' not described in 'qcom_snps_hsphy' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230507144818.193039-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul Stable-dep-of: 8a0eb8f9b9a0 ("phy: qcom-snps-femto-v2: properly enable ref clock") Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c index 54846259405a..136b45903c79 100644 --- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c +++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c @@ -73,11 +73,11 @@ static const char * const qcom_snps_hsphy_vreg_names[] = { * * @cfg_ahb_clk: AHB2PHY interface clock * @ref_clk: phy reference clock - * @iface_clk: phy interface clock * @phy_reset: phy reset control * @vregs: regulator supplies bulk data * @phy_initialized: if PHY has been initialized correctly * @mode: contains the current mode the PHY is in + * @update_seq_cfg: tuning parameters for phy init */ struct qcom_snps_hsphy { struct phy *phy; From ffebc22bdd08dc85a21e1a1c091646987f9840c1 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Thu, 29 Jun 2023 10:45:38 -0400 Subject: [PATCH 025/203] phy: qcom-snps-femto-v2: keep cfg_ahb_clk enabled during runtime suspend [ Upstream commit 45d89a344eb46db9dce851c28e14f5e3c635c251 ] In the dwc3 core, both system and runtime suspend end up calling dwc3_suspend_common(). From there, what happens for the PHYs depends on the USB mode and whether the controller is entering system or runtime suspend. HOST mode: (1) system suspend on a non-wakeup-capable controller The [1] if branch is taken. dwc3_core_exit() is called, which ends up calling phy_power_off() and phy_exit(). Those two functions decrease the PM runtime count at some point, so they will trigger the PHY runtime sleep (assuming the count is right). (2) runtime suspend / system suspend on a wakeup-capable controller The [1] branch is not taken. dwc3_suspend_common() calls phy_pm_runtime_put_sync(). Assuming the ref count is right, the PHY runtime suspend op is called. DEVICE mode: dwc3_core_exit() is called on both runtime and system sleep unless the controller is already runtime suspended. OTG mode: (1) system suspend : dwc3_core_exit() is called (2) runtime suspend : do nothing In host mode, the code seems to make a distinction between 1) runtime sleep / system sleep for wakeup-capable controller, and 2) system sleep for non-wakeup-capable controller, where phy_power_off() and phy_exit() are only called for the latter. This suggests the PHY is not supposed to be in a fully powered-off state for runtime sleep and system sleep for wakeup-capable controller. Moreover, downstream, cfg_ahb_clk only gets disabled for system suspend. The clocks are disabled by phy->set_suspend() [2] which is only called in the system sleep path through dwc3_core_exit() [3]. With that in mind, don't disable the clocks during the femto PHY runtime suspend callback. The clocks will only be disabled during system suspend for non-wakeup-capable controllers, through dwc3_core_exit(). [1] https://elixir.bootlin.com/linux/v6.4/source/drivers/usb/dwc3/core.c#L1988 [2] https://git.codelinaro.org/clo/la/kernel/msm-5.4/-/blob/LV.AU.1.2.1.r2-05300-gen3meta.0/drivers/usb/phy/phy-msm-snps-hs.c#L524 [3] https://git.codelinaro.org/clo/la/kernel/msm-5.4/-/blob/LV.AU.1.2.1.r2-05300-gen3meta.0/drivers/usb/dwc3/core.c#L1915 Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230629144542.14906-2-athierry@redhat.com Signed-off-by: Vinod Koul Stable-dep-of: 8a0eb8f9b9a0 ("phy: qcom-snps-femto-v2: properly enable ref clock") Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c index 136b45903c79..dfe5f0944910 100644 --- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c +++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c @@ -122,22 +122,13 @@ static int qcom_snps_hsphy_suspend(struct qcom_snps_hsphy *hsphy) 0, USB2_AUTO_RESUME); } - clk_disable_unprepare(hsphy->cfg_ahb_clk); return 0; } static int qcom_snps_hsphy_resume(struct qcom_snps_hsphy *hsphy) { - int ret; - dev_dbg(&hsphy->phy->dev, "Resume QCOM SNPS PHY, mode\n"); - ret = clk_prepare_enable(hsphy->cfg_ahb_clk); - if (ret) { - dev_err(&hsphy->phy->dev, "failed to enable cfg ahb clock\n"); - return ret; - } - return 0; } From fb1db979043a9479d0ca65b66e0674fa77bd4339 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Thu, 29 Jun 2023 10:45:39 -0400 Subject: [PATCH 026/203] phy: qcom-snps-femto-v2: properly enable ref clock [ Upstream commit 8a0eb8f9b9a002291a3934acfd913660b905249e ] The driver is not enabling the ref clock, which thus gets disabled by the clk_disable_unused() initcall. This leads to the dwc3 controller failing to initialize if probed after clk_disable_unused() is called, for instance when the driver is built as a module. To fix this, switch to the clk_bulk API to handle both cfg_ahb and ref clocks at the proper places. Note that the cfg_ahb clock is currently not used by any device tree instantiation of the PHY. Work needs to be done separately to fix this. Link: https://lore.kernel.org/linux-arm-msm/ZEqvy+khHeTkC2hf@fedora/ Fixes: 51e8114f80d0 ("phy: qcom-snps: Add SNPS USB PHY driver for QCOM based SOCs") Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230629144542.14906-3-athierry@redhat.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c index dfe5f0944910..abb926456933 100644 --- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c +++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c @@ -68,11 +68,13 @@ static const char * const qcom_snps_hsphy_vreg_names[] = { /** * struct qcom_snps_hsphy - snps hs phy attributes * + * @dev: device structure + * * @phy: generic phy * @base: iomapped memory space for snps hs phy * - * @cfg_ahb_clk: AHB2PHY interface clock - * @ref_clk: phy reference clock + * @num_clks: number of clocks + * @clks: array of clocks * @phy_reset: phy reset control * @vregs: regulator supplies bulk data * @phy_initialized: if PHY has been initialized correctly @@ -80,11 +82,13 @@ static const char * const qcom_snps_hsphy_vreg_names[] = { * @update_seq_cfg: tuning parameters for phy init */ struct qcom_snps_hsphy { + struct device *dev; + struct phy *phy; void __iomem *base; - struct clk *cfg_ahb_clk; - struct clk *ref_clk; + int num_clks; + struct clk_bulk_data *clks; struct reset_control *phy_reset; struct regulator_bulk_data vregs[SNPS_HS_NUM_VREGS]; @@ -92,6 +96,34 @@ struct qcom_snps_hsphy { enum phy_mode mode; }; +static int qcom_snps_hsphy_clk_init(struct qcom_snps_hsphy *hsphy) +{ + struct device *dev = hsphy->dev; + + hsphy->num_clks = 2; + hsphy->clks = devm_kcalloc(dev, hsphy->num_clks, sizeof(*hsphy->clks), GFP_KERNEL); + if (!hsphy->clks) + return -ENOMEM; + + /* + * TODO: Currently no device tree instantiation of the PHY is using the clock. + * This needs to be fixed in order for this code to be able to use devm_clk_bulk_get(). + */ + hsphy->clks[0].id = "cfg_ahb"; + hsphy->clks[0].clk = devm_clk_get_optional(dev, "cfg_ahb"); + if (IS_ERR(hsphy->clks[0].clk)) + return dev_err_probe(dev, PTR_ERR(hsphy->clks[0].clk), + "failed to get cfg_ahb clk\n"); + + hsphy->clks[1].id = "ref"; + hsphy->clks[1].clk = devm_clk_get(dev, "ref"); + if (IS_ERR(hsphy->clks[1].clk)) + return dev_err_probe(dev, PTR_ERR(hsphy->clks[1].clk), + "failed to get ref clk\n"); + + return 0; +} + static inline void qcom_snps_hsphy_write_mask(void __iomem *base, u32 offset, u32 mask, u32 val) { @@ -174,16 +206,16 @@ static int qcom_snps_hsphy_init(struct phy *phy) if (ret) return ret; - ret = clk_prepare_enable(hsphy->cfg_ahb_clk); + ret = clk_bulk_prepare_enable(hsphy->num_clks, hsphy->clks); if (ret) { - dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); + dev_err(&phy->dev, "failed to enable clocks, %d\n", ret); goto poweroff_phy; } ret = reset_control_assert(hsphy->phy_reset); if (ret) { dev_err(&phy->dev, "failed to assert phy_reset, %d\n", ret); - goto disable_ahb_clk; + goto disable_clks; } usleep_range(100, 150); @@ -191,7 +223,7 @@ static int qcom_snps_hsphy_init(struct phy *phy) ret = reset_control_deassert(hsphy->phy_reset); if (ret) { dev_err(&phy->dev, "failed to de-assert phy_reset, %d\n", ret); - goto disable_ahb_clk; + goto disable_clks; } qcom_snps_hsphy_write_mask(hsphy->base, USB2_PHY_USB_PHY_CFG0, @@ -237,8 +269,8 @@ static int qcom_snps_hsphy_init(struct phy *phy) return 0; -disable_ahb_clk: - clk_disable_unprepare(hsphy->cfg_ahb_clk); +disable_clks: + clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks); poweroff_phy: regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs); @@ -250,7 +282,7 @@ static int qcom_snps_hsphy_exit(struct phy *phy) struct qcom_snps_hsphy *hsphy = phy_get_drvdata(phy); reset_control_assert(hsphy->phy_reset); - clk_disable_unprepare(hsphy->cfg_ahb_clk); + clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks); regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs); hsphy->phy_initialized = false; @@ -290,14 +322,15 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev) if (!hsphy) return -ENOMEM; + hsphy->dev = dev; + hsphy->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hsphy->base)) return PTR_ERR(hsphy->base); - hsphy->ref_clk = devm_clk_get(dev, "ref"); - if (IS_ERR(hsphy->ref_clk)) - return dev_err_probe(dev, PTR_ERR(hsphy->ref_clk), - "failed to get ref clk\n"); + ret = qcom_snps_hsphy_clk_init(hsphy); + if (ret) + return dev_err_probe(dev, ret, "failed to initialize clocks\n"); hsphy->phy_reset = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(hsphy->phy_reset)) { From 39c789c9570d1ce74e1bee9eea5cc17866f1c061 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 13 Jun 2023 18:47:53 +0200 Subject: [PATCH 027/203] media: staging: atomisp: select V4L2_FWNODE [ Upstream commit bf4c985707d3168ebb7d87d15830de66949d979c ] Select V4L2_FWNODE as the driver depends on it. Reported-by: Andy Shevchenko Fixes: aa31f6514047 ("media: atomisp: allow building the driver again") Signed-off-by: Sakari Ailus Tested-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/atomisp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/media/atomisp/Kconfig b/drivers/staging/media/atomisp/Kconfig index 37577bb72998..1a0b958f1aa0 100644 --- a/drivers/staging/media/atomisp/Kconfig +++ b/drivers/staging/media/atomisp/Kconfig @@ -13,6 +13,7 @@ config VIDEO_ATOMISP tristate "Intel Atom Image Signal Processor Driver" depends on VIDEO_V4L2 && INTEL_ATOMISP depends on PMIC_OPREGION + select V4L2_FWNODE select IOSF_MBI select VIDEOBUF_VMALLOC help From 8a6cc852f6589d7d6651fb9f7b708c3b3d84e49d Mon Sep 17 00:00:00 2001 From: Wang Ming Date: Thu, 13 Jul 2023 09:42:39 +0800 Subject: [PATCH 028/203] i40e: Fix an NULL vs IS_ERR() bug for debugfs_create_dir() [ Upstream commit 043b1f185fb0f3939b7427f634787706f45411c4 ] The debugfs_create_dir() function returns error pointers. It never returns NULL. Most incorrect error checks were fixed, but the one in i40e_dbg_init() was forgotten. Fix the remaining error check. Fixes: 02e9c290814c ("i40e: debugfs interface") Signed-off-by: Wang Ming Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 989d5c7263d7..8bcf5902babf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1839,7 +1839,7 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf) void i40e_dbg_init(void) { i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL); - if (!i40e_dbg_root) + if (IS_ERR(i40e_dbg_root)) pr_info("init of debugfs failed\n"); } From 111b699300a766b365b0380d8ca4611634ef5043 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 19 Jul 2023 17:22:33 +0800 Subject: [PATCH 029/203] net: phy: marvell10g: fix 88x3310 power up [ Upstream commit c7b75bea853daeb64fc831dbf39a6bbabcc402ac ] Clear MV_V2_PORT_CTRL_PWRDOWN bit to set power up for 88x3310 PHY, it sometimes does not take effect immediately. And a read of this register causes the bit not to clear. This will cause mv3310_reset() to time out, which will fail the config initialization. So add a delay before the next access. Fixes: c9cc1c815d36 ("net: phy: marvell10g: place in powersave mode at probe") Signed-off-by: Jiawen Wu Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/marvell10g.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 2b64318efdba..42b48d0d0c4e 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -263,6 +263,13 @@ static int mv3310_power_up(struct phy_device *phydev) ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL, MV_V2_PORT_CTRL_PWRDOWN); + /* Sometimes, the power down bit doesn't clear immediately, and + * a read of this register causes the bit not to clear. Delay + * 100us to allow the PHY to come out of power down mode before + * the next access. + */ + udelay(100); + if (phydev->drv->phy_id != MARVELL_PHY_ID_88X3310 || priv->firmware_ver < 0x00030000) return ret; From 344b7c00039849351929d24c40c4540b54079f8f Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 30 Aug 2021 14:06:38 +0800 Subject: [PATCH 030/203] net: hns3: reconstruct function hclge_ets_validate() [ Upstream commit 161ad669e6c23529415bffed5cb3bfa012e46cb4 ] This patch reconstructs function hclge_ets_validate() to reduce the code cycle complexity and make code more concise. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller Stable-dep-of: 882481b1c55f ("net: hns3: fix wrong bw weight of disabled tc issue") Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3pf/hclge_dcb.c | 47 ++++++++++++++----- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 5bab885744fc..c75794552a1a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -105,26 +105,30 @@ static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc, return 0; } -static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, - u8 *tc, bool *changed) +static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets, + bool *changed) { - bool has_ets_tc = false; - u32 total_ets_bw = 0; - u8 max_tc = 0; - int ret; + u8 max_tc_id = 0; u8 i; for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i]) *changed = true; - if (ets->prio_tc[i] > max_tc) - max_tc = ets->prio_tc[i]; + if (ets->prio_tc[i] > max_tc_id) + max_tc_id = ets->prio_tc[i]; } - ret = hclge_dcb_common_validate(hdev, max_tc + 1, ets->prio_tc); - if (ret) - return ret; + /* return max tc number, max tc id need to plus 1 */ + return max_tc_id + 1; +} + +static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, + struct ieee_ets *ets, bool *changed) +{ + bool has_ets_tc = false; + u32 total_ets_bw = 0; + u8 i; for (i = 0; i < HNAE3_MAX_TC; i++) { switch (ets->tc_tsa[i]) { @@ -158,7 +162,26 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, if (has_ets_tc && total_ets_bw != BW_PERCENT) return -EINVAL; - *tc = max_tc + 1; + return 0; +} + +static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, + u8 *tc, bool *changed) +{ + u8 tc_num; + int ret; + + tc_num = hclge_ets_tc_changed(hdev, ets, changed); + + ret = hclge_dcb_common_validate(hdev, tc_num, ets->prio_tc); + if (ret) + return ret; + + ret = hclge_ets_sch_mode_validate(hdev, ets, changed); + if (ret) + return ret; + + *tc = tc_num; if (*tc != hdev->tm_info.num_tc) *changed = true; From 076f786f04149e9a074f920fb606d22ff251eb34 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 20 Jul 2023 10:05:10 +0800 Subject: [PATCH 031/203] net: hns3: fix wrong bw weight of disabled tc issue [ Upstream commit 882481b1c55fc44861d7e2d54b4e0936b1b39f2c ] In dwrr mode, the default bandwidth weight of disabled tc is set to 0. If the bandwidth weight is 0, the mode will change to sp. Therefore, disabled tc default bandwidth weight need changed to 1, and 0 is returned when query the bandwidth weight of disabled tc. In addition, driver need stop configure bandwidth weight if tc is disabled. Fixes: 848440544b41 ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 17 ++++++++++++++--- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 3 ++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index c75794552a1a..d60b8dfe3872 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -53,7 +53,10 @@ static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev, for (i = 0; i < HNAE3_MAX_TC; i++) { ets->prio_tc[i] = hdev->tm_info.prio_tc[i]; - ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + if (i < hdev->tm_info.num_tc) + ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + else + ets->tc_tx_bw[i] = 0; if (hdev->tm_info.tc_info[i].tc_sch_mode == HCLGE_SCH_MODE_SP) @@ -124,7 +127,8 @@ static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets, } static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, - struct ieee_ets *ets, bool *changed) + struct ieee_ets *ets, bool *changed, + u8 tc_num) { bool has_ets_tc = false; u32 total_ets_bw = 0; @@ -138,6 +142,13 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev, *changed = true; break; case IEEE_8021QAZ_TSA_ETS: + if (i >= tc_num) { + dev_err(&hdev->pdev->dev, + "tc%u is disabled, cannot set ets bw\n", + i); + return -EINVAL; + } + /* The hardware will switch to sp mode if bandwidth is * 0, so limit ets bandwidth must be greater than 0. */ @@ -177,7 +188,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, if (ret) return ret; - ret = hclge_ets_sch_mode_validate(hdev, ets, changed); + ret = hclge_ets_sch_mode_validate(hdev, ets, changed, tc_num); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index b3ceaaaeacae..8c5c5562c0a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -651,6 +651,7 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) static void hclge_tm_pg_info_init(struct hclge_dev *hdev) { #define BW_PERCENT 100 +#define DEFAULT_BW_WEIGHT 1 u8 i; @@ -672,7 +673,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) for (k = 0; k < hdev->tm_info.num_tc; k++) hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT; for (; k < HNAE3_MAX_TC; k++) - hdev->tm_info.pg_info[i].tc_dwrr[k] = 0; + hdev->tm_info.pg_info[i].tc_dwrr[k] = DEFAULT_BW_WEIGHT; } } From d2741769d512ea1bf5544bf85da7602e64f82e45 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Mar 2022 05:04:28 +0000 Subject: [PATCH 032/203] vxlan: move to its own directory [ Upstream commit 6765393614ea8e2c0a7b953063513823f87c9115 ] vxlan.c has grown too long. This patch moves it to its own directory. subsequent patches add new functionality in new files. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Stable-dep-of: 94d166c5318c ("vxlan: calculate correct header length for GPE") Signed-off-by: Sasha Levin --- drivers/net/Makefile | 2 +- drivers/net/vxlan/Makefile | 7 +++++++ drivers/net/{vxlan.c => vxlan/vxlan_core.c} | 0 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 drivers/net/vxlan/Makefile rename drivers/net/{vxlan.c => vxlan/vxlan_core.c} (100%) diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 72e18d505d1a..64430440c580 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o -obj-$(CONFIG_VXLAN) += vxlan.o +obj-$(CONFIG_VXLAN) += vxlan/ obj-$(CONFIG_GENEVE) += geneve.o obj-$(CONFIG_BAREUDP) += bareudp.o obj-$(CONFIG_GTP) += gtp.o diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile new file mode 100644 index 000000000000..567266133593 --- /dev/null +++ b/drivers/net/vxlan/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the vxlan driver +# + +obj-$(CONFIG_VXLAN) += vxlan.o + +vxlan-objs := vxlan_core.o diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan/vxlan_core.c similarity index 100% rename from drivers/net/vxlan.c rename to drivers/net/vxlan/vxlan_core.c From 6f26f145737727028974025f908549b534940a16 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 20 Jul 2023 11:05:56 +0200 Subject: [PATCH 033/203] vxlan: calculate correct header length for GPE [ Upstream commit 94d166c5318c6edd1e079df8552233443e909c33 ] VXLAN-GPE does not add an extra inner Ethernet header. Take that into account when calculating header length. This causes problems in skb_tunnel_check_pmtu, where incorrect PMTU is cached. In the collect_md mode (which is the only mode that VXLAN-GPE supports), there's no magic auto-setting of the tunnel interface MTU. It can't be, since the destination and thus the underlying interface may be different for each packet. So, the administrator is responsible for setting the correct tunnel interface MTU. Apparently, the administrators are capable enough to calculate that the maximum MTU for VXLAN-GPE is (their_lower_MTU - 36). They set the tunnel interface MTU to 1464. If you run a TCP stream over such interface, it's then segmented according to the MTU 1464, i.e. producing 1514 bytes frames. Which is okay, this still fits the lower MTU. However, skb_tunnel_check_pmtu (called from vxlan_xmit_one) uses 50 as the header size and thus incorrectly calculates the frame size to be 1528. This leads to ICMP too big message being generated (locally), PMTU of 1450 to be cached and the TCP stream to be resegmented. The fix is to use the correct actual header size, especially for skb_tunnel_check_pmtu calculation. Fixes: e1e5314de08ba ("vxlan: implement GPE") Signed-off-by: Jiri Benc Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/vxlan/vxlan_core.c | 23 ++++++++----------- include/net/vxlan.h | 13 +++++++---- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5c542f5d2b20..2b100b7b325a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8409,7 +8409,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct ixgbe_adapter *adapter = q_vector->adapter; if (unlikely(skb_tail_pointer(skb) < hdr.network + - VXLAN_HEADROOM)) + vxlan_headroom(0))) return; /* verify the port is recognized as VXLAN */ diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 72d670667f64..1ac9de69bde6 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2720,7 +2720,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ndst = &rt->dst; - err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM, + err = skb_tunnel_check_pmtu(skb, ndst, vxlan_headroom(flags & VXLAN_F_GPE), netif_is_any_bridge_port(dev)); if (err < 0) { goto tx_error; @@ -2781,7 +2781,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } - err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM, + err = skb_tunnel_check_pmtu(skb, ndst, + vxlan_headroom((flags & VXLAN_F_GPE) | VXLAN_F_IPV6), netif_is_any_bridge_port(dev)); if (err < 0) { goto tx_error; @@ -3158,14 +3159,12 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu) struct vxlan_rdst *dst = &vxlan->default_dst; struct net_device *lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex); - bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6); /* This check is different than dev->max_mtu, because it looks at * the lowerdev->mtu, rather than the static dev->max_mtu */ if (lowerdev) { - int max_mtu = lowerdev->mtu - - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + int max_mtu = lowerdev->mtu - vxlan_headroom(vxlan->cfg.flags); if (new_mtu > max_mtu) return -EINVAL; } @@ -3784,11 +3783,11 @@ static void vxlan_config_apply(struct net_device *dev, struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *dst = &vxlan->default_dst; unsigned short needed_headroom = ETH_HLEN; - bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6); int max_mtu = ETH_MAX_MTU; + u32 flags = conf->flags; if (!changelink) { - if (conf->flags & VXLAN_F_GPE) + if (flags & VXLAN_F_GPE) vxlan_raw_setup(dev); else vxlan_ether_setup(dev); @@ -3814,8 +3813,7 @@ static void vxlan_config_apply(struct net_device *dev, dev->needed_tailroom = lowerdev->needed_tailroom; - max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : - VXLAN_HEADROOM); + max_mtu = lowerdev->mtu - vxlan_headroom(flags); if (max_mtu < ETH_MIN_MTU) max_mtu = ETH_MIN_MTU; @@ -3826,10 +3824,9 @@ static void vxlan_config_apply(struct net_device *dev, if (dev->mtu > max_mtu) dev->mtu = max_mtu; - if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) - needed_headroom += VXLAN6_HEADROOM; - else - needed_headroom += VXLAN_HEADROOM; + if (flags & VXLAN_F_COLLECT_METADATA) + flags |= VXLAN_F_IPV6; + needed_headroom += vxlan_headroom(flags); dev->needed_headroom = needed_headroom; memcpy(&vxlan->cfg, conf, sizeof(*conf)); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 08537aa14f7c..cf1d870f7b9a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -327,10 +327,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, return features; } -/* IP header + UDP + VXLAN + Ethernet header */ -#define VXLAN_HEADROOM (20 + 8 + 8 + 14) -/* IPv6 header + UDP + VXLAN + Ethernet header */ -#define VXLAN6_HEADROOM (40 + 8 + 8 + 14) +static inline int vxlan_headroom(u32 flags) +{ + /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */ + /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */ + return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + + sizeof(struct udphdr) + sizeof(struct vxlanhdr) + + (flags & VXLAN_F_GPE ? 0 : ETH_HLEN); +} static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) { From ad249aa3c38f329f91fba8b4b3cd087e79fb0ce8 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 21 Jul 2023 02:05:55 -0700 Subject: [PATCH 034/203] phy: hisilicon: Fix an out of bounds check in hisi_inno_phy_probe() [ Upstream commit 13c088cf3657d70893d75cf116be937f1509cc0f ] The size of array 'priv->ports[]' is INNO_PHY_PORT_NUM. In the for loop, 'i' is used as the index for array 'priv->ports[]' with a check (i > INNO_PHY_PORT_NUM) which indicates that INNO_PHY_PORT_NUM is allowed value for 'i' in the same loop. This > comparison needs to be changed to >=, otherwise it potentially leads to an out of bounds write on the next iteration through the loop Fixes: ba8b0ee81fbb ("phy: add inno-usb2-phy driver for hi3798cv200 SoC") Reported-by: Dan Carpenter Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20230721090558.3588613-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/hisilicon/phy-hisi-inno-usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c index 34a6a9a1ceb2..897c6bb4cbb8 100644 --- a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c +++ b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c @@ -153,7 +153,7 @@ static int hisi_inno_phy_probe(struct platform_device *pdev) phy_set_drvdata(phy, &priv->ports[i]); i++; - if (i > INNO_PHY_PORT_NUM) { + if (i >= INNO_PHY_PORT_NUM) { dev_warn(dev, "Support %d ports in maximum\n", i); break; } From 5a3c929682234d2da860bbe1e948abff528a271f Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Thu, 20 Jul 2023 22:42:19 +0800 Subject: [PATCH 035/203] ethernet: atheros: fix return value check in atl1e_tso_csum() [ Upstream commit 69a184f7a372aac588babfb0bd681aaed9779f5b ] in atl1e_tso_csum, it should check the return value of pskb_trim(), and return an error code if an unexpected value is returned by pskb_trim(). Fixes: a6a5325239c2 ("atl1e: Atheros L1E Gigabit Ethernet driver") Signed-off-by: Yuanjun Gong Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230720144219.39285-1-ruc_gongyuanjun@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index ff9f96de74b8..696ce3c5a8ba 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1642,8 +1642,11 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter, real_len = (((unsigned char *)ip_hdr(skb) - skb->data) + ntohs(ip_hdr(skb)->tot_len)); - if (real_len < skb->len) - pskb_trim(skb, real_len); + if (real_len < skb->len) { + err = pskb_trim(skb, real_len); + if (err) + return err; + } hdr_len = (skb_transport_offset(skb) + tcp_hdrlen(skb)); if (unlikely(skb->len == hdr_len)) { From b55a2b34b1b66368977f528954b306ee9156d784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 20 Jul 2023 09:00:22 -0700 Subject: [PATCH 036/203] ipv6 addrconf: fix bug where deleting a mngtmpaddr can create a new temporary address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 69172f0bcb6a09110c5d2a6d792627f5095a9018 ] currently on 6.4 net/main: # ip link add dummy1 type dummy # echo 1 > /proc/sys/net/ipv6/conf/dummy1/use_tempaddr # ip link set dummy1 up # ip -6 addr add 2000::1/64 mngtmpaddr dev dummy1 # ip -6 addr show dev dummy1 11: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 inet6 2000::44f3:581c:8ca:3983/64 scope global temporary dynamic valid_lft 604800sec preferred_lft 86172sec inet6 2000::1/64 scope global mngtmpaddr valid_lft forever preferred_lft forever inet6 fe80::e8a8:a6ff:fed5:56d4/64 scope link valid_lft forever preferred_lft forever # ip -6 addr del 2000::44f3:581c:8ca:3983/64 dev dummy1 (can wait a few seconds if you want to, the above delete isn't [directly] the problem) # ip -6 addr show dev dummy1 11: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 inet6 2000::1/64 scope global mngtmpaddr valid_lft forever preferred_lft forever inet6 fe80::e8a8:a6ff:fed5:56d4/64 scope link valid_lft forever preferred_lft forever # ip -6 addr del 2000::1/64 mngtmpaddr dev dummy1 # ip -6 addr show dev dummy1 11: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000 inet6 2000::81c9:56b7:f51a:b98f/64 scope global temporary dynamic valid_lft 604797sec preferred_lft 86169sec inet6 fe80::e8a8:a6ff:fed5:56d4/64 scope link valid_lft forever preferred_lft forever This patch prevents this new 'global temporary dynamic' address from being created by the deletion of the related (same subnet prefix) 'mngtmpaddr' (which is triggered by there already being no temporary addresses). Cc: Jiri Pirko Fixes: 53bd67491537 ("ipv6 addrconf: introduce IFA_F_MANAGETEMPADDR to tell kernel to manage temporary addresses") Reported-by: Xiao Ma Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230720160022.1887942-1-maze@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d5d10496b4ae..9b414681500a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2555,12 +2555,18 @@ static void manage_tempaddrs(struct inet6_dev *idev, ipv6_ifa_notify(0, ift); } - if ((create || list_empty(&idev->tempaddr_list)) && - idev->cnf.use_tempaddr > 0) { + /* Also create a temporary address if it's enabled but no temporary + * address currently exists. + * However, we get called with valid_lft == 0, prefered_lft == 0, create == false + * as part of cleanup (ie. deleting the mngtmpaddr). + * We don't want that to result in creating a new temporary ip address. + */ + if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft)) + create = true; + + if (create && idev->cnf.use_tempaddr > 0) { /* When a new public address is created as described * in [ADDRCONF], also create a new temporary address. - * Also create a temporary address if it's enabled but - * no temporary address currently exists. */ read_unlock_bh(&idev->lock); ipv6_create_tempaddr(ifp, false); From 0cd74fbd3b8327e60525e1ec4a6c28895693909f Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 21 Jul 2023 15:24:10 -0700 Subject: [PATCH 037/203] tcp: Reduce chance of collisions in inet6_hashfn(). [ Upstream commit d11b0df7ddf1831f3e170972f43186dad520bfcc ] For both IPv4 and IPv6 incoming TCP connections are tracked in a hash table with a hash over the source & destination addresses and ports. However, the IPv6 hash is insufficient and can lead to a high rate of collisions. The IPv6 hash used an XOR to fit everything into the 96 bits for the fast jenkins hash, meaning it is possible for an external entity to ensure the hash collides, thus falling back to a linear search in the bucket, which is slow. We take the approach of hash the full length of IPv6 address in __ipv6_addr_jhash() so that all users can benefit from a more secure version. While this may look like it adds overhead, the reality of modern CPUs means that this is unmeasurable in real world scenarios. In simulating with llvm-mca, the increase in cycles for the hashing code was ~16 cycles on Skylake (from a base of ~155), and an extra ~9 on Nehalem (base of ~173). In commit dd6d2910c5e0 ("netfilter: conntrack: switch to siphash") netfilter switched from a jenkins hash to a siphash, but even the faster hsiphash is a more significant overhead (~20-30%) in some preliminary testing. So, in this patch, we keep to the more conservative approach to ensure we don't add much overhead per SYN. In testing, this results in a consistently even spread across the connection buckets. In both testing and real-world scenarios, we have not found any measurable performance impact. Fixes: 08dcdbf6a7b9 ("ipv6: use a stronger hash for tcp") Signed-off-by: Stewart Smith Signed-off-by: Samuel Mendoza-Jonas Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230721222410.17914-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/ipv6.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8879c0ab0b89..4c8f97a6da5a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -663,12 +663,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a) /* more secured version of ipv6_addr_hash() */ static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { - u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; - - return jhash_3words(v, - (__force u32)a->s6_addr32[2], - (__force u32)a->s6_addr32[3], - initval); + return jhash2((__force const u32 *)a->s6_addr32, + ARRAY_SIZE(a->s6_addr32), initval); } static inline bool ipv6_addr_loopback(const struct in6_addr *a) From b2712c4bfc3bfb0d583bc07e8cd110f05a163ebd Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 21 Jul 2023 08:58:54 -0700 Subject: [PATCH 038/203] ice: Fix memory management in ice_ethtool_fdir.c [ Upstream commit a3336056504d780590ac6d6ac94fbba829994594 ] Fix ethtool FDIR logic to not use memory after its release. In the ice_ethtool_fdir.c file there are 2 spots where code can refer to pointers which may be missing. In the ice_cfg_fdir_xtrct_seq() function seg may be freed but even then may be still used by memcpy(&tun_seg[1], seg, sizeof(*seg)). In the ice_add_fdir_ethtool() function struct ice_fdir_fltr *input may first fail to be added via ice_fdir_update_list_entry() but then may be deleted by ice_fdir_update_list_entry. Terminate in both cases when the returned value of the previous operation is other than 0, free memory and don't use it anymore. Reported-by: Michal Schmidt Link: https://bugzilla.redhat.com/show_bug.cgi?id=2208423 Fixes: cac2a27cd9ab ("ice: Support IPv4 Flow Director filters") Reviewed-by: Przemek Kitszel Signed-off-by: Jedrzej Jagielski Reviewed-by: Leon Romanovsky Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230721155854.1292805-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/intel/ice/ice_ethtool_fdir.c | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 192729546bbf..a122a267ede5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1135,16 +1135,21 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, ICE_FLOW_FLD_OFF_INVAL); } - /* add filter for outer headers */ fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT); + + assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter); + + /* add filter for outer headers */ ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx, ICE_FD_HW_SEG_NON_TUN); - if (ret == -EEXIST) - /* Rule already exists, free memory and continue */ - devm_kfree(dev, seg); - else if (ret) + if (ret == -EEXIST) { + /* Rule already exists, free memory and count as success */ + ret = 0; + goto err_exit; + } else if (ret) { /* could not write filter, free memory */ goto err_exit; + } /* make tunneled filter HW entries if possible */ memcpy(&tun_seg[1], seg, sizeof(*seg)); @@ -1159,18 +1164,13 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, devm_kfree(dev, tun_seg); } - if (perfect_filter) - set_bit(fltr_idx, hw->fdir_perfect_fltr); - else - clear_bit(fltr_idx, hw->fdir_perfect_fltr); - return ret; err_exit: devm_kfree(dev, tun_seg); devm_kfree(dev, seg); - return -EOPNOTSUPP; + return ret; } /** @@ -1680,7 +1680,9 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) } /* input struct is added to the HW filter list */ - ice_fdir_update_list_entry(pf, input, fsp->location); + ret = ice_fdir_update_list_entry(pf, input, fsp->location); + if (ret) + goto release_lock; ret = ice_fdir_write_all_fltr(pf, input, true); if (ret) From de982f46be83d92dec4486e724a738189b800d40 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 21 Jul 2023 12:03:55 +0800 Subject: [PATCH 039/203] bonding: reset bond's flags when down link is P2P device [ Upstream commit da19a2b967cf1e2c426f50d28550d1915214a81d ] When adding a point to point downlink to the bond, we neglected to reset the bond's flags, which were still using flags like BROADCAST and MULTICAST. Consequently, this would initiate ARP/DAD for P2P downlink interfaces, such as when adding a GRE device to the bonding. To address this issue, let's reset the bond's flags for P2P interfaces. Before fix: 7: gre0@NONE: mtu 1500 qdisc noqueue master bond0 state UNKNOWN group default qlen 1000 link/gre6 2006:70:10::1 peer 2006:70:10::2 permaddr 167f:18:f188:: 8: bond0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/gre6 2006:70:10::1 brd 2006:70:10::2 inet6 fe80::200:ff:fe00:0/64 scope link valid_lft forever preferred_lft forever After fix: 7: gre0@NONE: mtu 1500 qdisc noqueue master bond2 state UNKNOWN group default qlen 1000 link/gre6 2006:70:10::1 peer 2006:70:10::2 permaddr c29e:557a:e9d9:: 8: bond0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/gre6 2006:70:10::1 peer 2006:70:10::2 inet6 fe80::1/64 scope link valid_lft forever preferred_lft forever Reported-by: Liang Li Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2221438 Fixes: 872254dd6b1f ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER") Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 381e6cdd603a..a260740269e9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1442,6 +1442,11 @@ static void bond_setup_by_slave(struct net_device *bond_dev, memcpy(bond_dev->broadcast, slave_dev->broadcast, slave_dev->addr_len); + + if (slave_dev->flags & IFF_POINTOPOINT) { + bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); + bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); + } } /* On bonding slaves other than the currently active slave, suppress From 75f57acda32fa57b442bdcdf0bafa2d5ef646666 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 21 Jul 2023 12:03:56 +0800 Subject: [PATCH 040/203] team: reset team's flags when down link is P2P device [ Upstream commit fa532bee17d15acf8bba4bc8e2062b7a093ba801 ] When adding a point to point downlink to team device, we neglected to reset the team's flags, which were still using flags like BROADCAST and MULTICAST. Consequently, this would initiate ARP/DAD for P2P downlink interfaces, such as when adding a GRE device to team device. Fix this by remove multicast/broadcast flags and add p2p and noarp flags. After removing the none ethernet interface and adding an ethernet interface to team, we need to reset team interface flags. Unlike bonding interface, team do not need restore IFF_MASTER, IFF_SLAVE flags. Reported-by: Liang Li Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2221438 Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/team/team.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 8a1619695421..36c7eae776d4 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2130,6 +2130,15 @@ static void team_setup_by_port(struct net_device *dev, dev->mtu = port_dev->mtu; memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len); eth_hw_addr_inherit(dev, port_dev); + + if (port_dev->flags & IFF_POINTOPOINT) { + dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); + dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); + } else if ((port_dev->flags & (IFF_BROADCAST | IFF_MULTICAST)) == + (IFF_BROADCAST | IFF_MULTICAST)) { + dev->flags |= (IFF_BROADCAST | IFF_MULTICAST); + dev->flags &= ~(IFF_POINTOPOINT | IFF_NOARP); + } } static int team_dev_type_check_change(struct net_device *dev, From 89060b831c415e8452d0fca43f0ef0891db1a590 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 21 Jul 2023 17:54:23 +0300 Subject: [PATCH 041/203] platform/x86: msi-laptop: Fix rfkill out-of-sync on MSI Wind U100 [ Upstream commit ad084a6d99bc182bf109c190c808e2ea073ec57b ] Only the HW rfkill state is toggled on laptops with quirks->ec_read_only (so far only MSI Wind U90/U100). There are, however, a few issues with the implementation: 1. The initial HW state is always unblocked, regardless of the actual state on boot, because msi_init_rfkill only sets the SW state, regardless of ec_read_only. 2. The initial SW state corresponds to the actual state on boot, but it can't be changed afterwards, because set_device_state returns -EOPNOTSUPP. It confuses the userspace, making Wi-Fi and/or Bluetooth unusable if it was blocked on boot, and breaking the airplane mode if the rfkill was unblocked on boot. Address the above issues by properly initializing the HW state on ec_read_only laptops and by allowing the userspace to toggle the SW state. Don't set the SW state ourselves and let the userspace fully control it. Toggling the SW state is a no-op, however, it allows the userspace to properly toggle the airplane mode. The actual SW radio disablement is handled by the corresponding rtl818x_pci and btusb drivers that have their own rfkills. Tested on MSI Wind U100 Plus, BIOS ver 1.0G, EC ver 130. Fixes: 0816392b97d4 ("msi-laptop: merge quirk tables to one") Fixes: 0de6575ad0a8 ("msi-laptop: Add MSI Wind U90/U100 support") Signed-off-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20230721145423.161057-1-maxtram95@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/msi-laptop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 0e804b6c2d24..dfb4af759aa7 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -210,7 +210,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask) return -EINVAL; if (quirks->ec_read_only) - return -EOPNOTSUPP; + return 0; /* read current device state */ result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata); @@ -841,15 +841,15 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str, static void msi_init_rfkill(struct work_struct *ignored) { if (rfk_wlan) { - rfkill_set_sw_state(rfk_wlan, !wlan_s); + msi_rfkill_set_state(rfk_wlan, !wlan_s); rfkill_wlan_set(NULL, !wlan_s); } if (rfk_bluetooth) { - rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s); + msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s); rfkill_bluetooth_set(NULL, !bluetooth_s); } if (rfk_threeg) { - rfkill_set_sw_state(rfk_threeg, !threeg_s); + msi_rfkill_set_state(rfk_threeg, !threeg_s); rfkill_threeg_set(NULL, !threeg_s); } } From 893cb3c3513cf661a0ff45fe0cfa83fe27131f76 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jul 2023 21:30:05 +0200 Subject: [PATCH 042/203] netfilter: nft_set_rbtree: fix overlap expiration walk [ Upstream commit f718863aca469a109895cb855e6b81fff4827d71 ] The lazy gc on insert that should remove timed-out entries fails to release the other half of the interval, if any. Can be reproduced with tests/shell/testcases/sets/0044interval_overlap_0 in nftables.git and kmemleak enabled kernel. Second bug is the use of rbe_prev vs. prev pointer. If rbe_prev() returns NULL after at least one iteration, rbe_prev points to element that is not an end interval, hence it should not be removed. Lastly, check the genmask of the end interval if this is active in the current generation. Fixes: c9e6978e2725 ("netfilter: nft_set_rbtree: Switch to node list walk for overlap detection") Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nft_set_rbtree.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 172b994790a0..eae760adae4d 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -216,29 +216,37 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, static int nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe) + struct nft_rbtree_elem *rbe, + u8 genmask) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); - struct nft_rbtree_elem *rbe_prev = NULL; + struct nft_rbtree_elem *rbe_prev; struct nft_set_gc_batch *gcb; gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); if (!gcb) return -ENOMEM; - /* search for expired end interval coming before this element. */ + /* search for end interval coming before this element. + * end intervals don't carry a timeout extension, they + * are coupled with the interval start element. + */ while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); - if (nft_rbtree_interval_end(rbe_prev)) + if (nft_rbtree_interval_end(rbe_prev) && + nft_set_elem_active(&rbe_prev->ext, genmask)) break; prev = rb_prev(prev); } - if (rbe_prev) { + if (prev) { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + rb_erase(&rbe_prev->node, &priv->root); atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, rbe_prev); } rb_erase(&rbe->node, &priv->root); @@ -320,7 +328,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, /* perform garbage collection to avoid bogus overlap reports. */ if (nft_set_elem_expired(&rbe->ext)) { - err = nft_rbtree_gc_elem(set, priv, rbe); + err = nft_rbtree_gc_elem(set, priv, rbe, genmask); if (err < 0) return err; From 7782ce022fea917ca3fe4fba826c5f4b4c4bc0ec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Apr 2021 18:05:48 +0200 Subject: [PATCH 043/203] netfilter: nftables: add helper function to validate set element data [ Upstream commit 97c976d662fb9080a6a5d1e1a108c7a1f5c9484d ] When binding sets to rule, validate set element data according to set definition. This patch adds a helper function to be reused by the catch-all set element support. Signed-off-by: Pablo Neira Ayuso Stable-dep-of: 0a771f7b266b ("netfilter: nf_tables: skip immediate deactivate in _PREPARE_ERROR") Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 356416564d9f..5ef9acba7c17 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4576,10 +4576,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_data_types type, unsigned int len); -static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_set_elem *elem) +static int nft_setelem_data_validate(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); enum nft_registers dreg; @@ -4591,6 +4590,14 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, set->dlen); } +static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + return nft_setelem_data_validate(ctx, set, elem); +} + int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) { From ab5a97a94b57324df76d659686ac2d30494170e6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 23 Jul 2023 16:24:46 +0200 Subject: [PATCH 044/203] netfilter: nf_tables: skip immediate deactivate in _PREPARE_ERROR [ Upstream commit 0a771f7b266b02d262900c75f1e175c7fe76fec2 ] On error when building the rule, the immediate expression unbinds the chain, hence objects can be deactivated by the transaction records. Otherwise, it is possible to trigger the following warning: WARNING: CPU: 3 PID: 915 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] CPU: 3 PID: 915 Comm: chain-bind-err- Not tainted 6.1.39 #1 RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] Fixes: 4bedf9eee016 ("netfilter: nf_tables: fix chain binding transaction logic") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nft_immediate.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 6b0efab4fad0..6bf1c852e8ea 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); } +static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx, + struct nft_chain *chain, + enum nft_trans_phase phase) +{ + struct nft_ctx chain_ctx; + struct nft_rule *rule; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_deactivate(&chain_ctx, rule, phase); +} + static void nft_immediate_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); const struct nft_data *data = &priv->data; - struct nft_ctx chain_ctx; struct nft_chain *chain; - struct nft_rule *rule; if (priv->dreg == NFT_REG_VERDICT) { switch (data->verdict.code) { @@ -143,20 +155,17 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, if (!nft_chain_binding(chain)) break; - chain_ctx = *ctx; - chain_ctx.chain = chain; - - list_for_each_entry(rule, &chain->rules, list) - nft_rule_expr_deactivate(&chain_ctx, rule, phase); - switch (phase) { case NFT_TRANS_PREPARE_ERROR: nf_tables_unbind_chain(ctx, chain); - fallthrough; + nft_deactivate_next(ctx->net, chain); + break; case NFT_TRANS_PREPARE: + nft_immediate_chain_deactivate(ctx, chain, phase); nft_deactivate_next(ctx->net, chain); break; default: + nft_immediate_chain_deactivate(ctx, chain, phase); nft_chain_del(chain); chain->bound = false; chain->table->use--; From 308a43f1521d5b7220693d0865b23e8dad3ed137 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 23 Jul 2023 16:41:48 +0200 Subject: [PATCH 045/203] netfilter: nf_tables: disallow rule addition to bound chain via NFTA_RULE_CHAIN_ID [ Upstream commit 0ebc1064e4874d5987722a2ddbc18f94aa53b211 ] Bail out with EOPNOTSUPP when adding rule to bound chain via NFTA_RULE_CHAIN_ID. The following warning splat is shown when adding a rule to a deleted bound chain: WARNING: CPU: 2 PID: 13692 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] CPU: 2 PID: 13692 Comm: chain-bound-rul Not tainted 6.1.39 #1 RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5ef9acba7c17..19653b8784bb 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3350,8 +3350,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) - return -EOPNOTSUPP; } else if (nla[NFTA_RULE_CHAIN_ID]) { chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID], @@ -3364,6 +3362,9 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EINVAL; } + if (nft_chain_is_bound(chain)) + return -EOPNOTSUPP; + if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); rule = __nft_rule_lookup(chain, handle); From 6227b461542f19a21d78605e687f43a5f2477851 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 4 Feb 2023 15:52:55 +0200 Subject: [PATCH 046/203] net/sched: mqprio: refactor nlattr parsing to a separate function [ Upstream commit feb2cf3dcfb930aec2ca65c66d1365543d5ba943 ] mqprio_init() is quite large and unwieldy to add more code to. Split the netlink attribute parsing to a dedicated function. Signed-off-by: Vladimir Oltean Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Signed-off-by: David S. Miller Stable-dep-of: 6c58c8816abb ("net/sched: mqprio: Add length check for TCA_MQPRIO_{MAX/MIN}_RATE64") Signed-off-by: Sasha Levin --- net/sched/sch_mqprio.c | 114 +++++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 50e15add6068..a5df5604e015 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -130,6 +130,67 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } +static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, + struct nlattr *opt) +{ + struct mqprio_sched *priv = qdisc_priv(sch); + struct nlattr *tb[TCA_MQPRIO_MAX + 1]; + struct nlattr *attr; + int i, rem, err; + + err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, + sizeof(*qopt)); + if (err < 0) + return err; + + if (!qopt->hw) + return -EINVAL; + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + + return 0; +} + static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -139,9 +200,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; - struct nlattr *tb[TCA_MQPRIO_MAX + 1]; - struct nlattr *attr; - int rem; int len; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); @@ -166,55 +224,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); if (len > 0) { - err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, - sizeof(*qopt)); - if (err < 0) + err = mqprio_parse_nlattr(sch, qopt, opt); + if (err) return err; - - if (!qopt->hw) - return -EINVAL; - - if (tb[TCA_MQPRIO_MODE]) { - priv->flags |= TC_MQPRIO_F_MODE; - priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); - } - - if (tb[TCA_MQPRIO_SHAPER]) { - priv->flags |= TC_MQPRIO_F_SHAPER; - priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); - } - - if (tb[TCA_MQPRIO_MIN_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) - return -EINVAL; - i = 0; - nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], - rem) { - if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) - return -EINVAL; - if (i >= qopt->num_tc) - break; - priv->min_rate[i] = *(u64 *)nla_data(attr); - i++; - } - priv->flags |= TC_MQPRIO_F_MIN_RATE; - } - - if (tb[TCA_MQPRIO_MAX_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) - return -EINVAL; - i = 0; - nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], - rem) { - if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) - return -EINVAL; - if (i >= qopt->num_tc) - break; - priv->max_rate[i] = *(u64 *)nla_data(attr); - i++; - } - priv->flags |= TC_MQPRIO_F_MAX_RATE; - } } /* pre-allocate qdisc, attachment can't fail */ From 3ed3729a6a8e38581da29f7fc3e661ae963472f7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 11 Apr 2023 21:01:51 +0300 Subject: [PATCH 047/203] net/sched: mqprio: add extack to mqprio_parse_nlattr() [ Upstream commit 57f21bf85400abadac0cb2a4db5de1d663f8863f ] Netlink attribute parsing in mqprio is a minesweeper game, with many options having the possibility of being passed incorrectly and the user being none the wiser. Try to make errors less sour by giving user space some information regarding what went wrong. Signed-off-by: Vladimir Oltean Reviewed-by: Ferenc Fejes Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski Stable-dep-of: 6c58c8816abb ("net/sched: mqprio: Add length check for TCA_MQPRIO_{MAX/MIN}_RATE64") Signed-off-by: Sasha Levin --- net/sched/sch_mqprio.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index a5df5604e015..4ec222a5530d 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -131,7 +131,8 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, } static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, - struct nlattr *opt) + struct nlattr *opt, + struct netlink_ext_ack *extack) { struct mqprio_sched *priv = qdisc_priv(sch); struct nlattr *tb[TCA_MQPRIO_MAX + 1]; @@ -143,8 +144,11 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, if (err < 0) return err; - if (!qopt->hw) + if (!qopt->hw) { + NL_SET_ERR_MSG(extack, + "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode"); return -EINVAL; + } if (tb[TCA_MQPRIO_MODE]) { priv->flags |= TC_MQPRIO_F_MODE; @@ -157,13 +161,19 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, } if (tb[TCA_MQPRIO_MIN_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64], + "min_rate accepted only when shaper is in bw_rlimit mode"); return -EINVAL; + } i = 0; nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], rem) { - if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MIN_RATE64"); return -EINVAL; + } if (i >= qopt->num_tc) break; priv->min_rate[i] = *(u64 *)nla_data(attr); @@ -173,13 +183,19 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, } if (tb[TCA_MQPRIO_MAX_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64], + "max_rate accepted only when shaper is in bw_rlimit mode"); return -EINVAL; + } i = 0; nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], rem) { - if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MAX_RATE64"); return -EINVAL; + } if (i >= qopt->num_tc) break; priv->max_rate[i] = *(u64 *)nla_data(attr); @@ -224,7 +240,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); if (len > 0) { - err = mqprio_parse_nlattr(sch, qopt, opt); + err = mqprio_parse_nlattr(sch, qopt, opt, extack); if (err) return err; } From f40f7a858b3b4b2efdc22b524426ea51a0c004fd Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 25 Jul 2023 10:42:27 +0800 Subject: [PATCH 048/203] net/sched: mqprio: Add length check for TCA_MQPRIO_{MAX/MIN}_RATE64 [ Upstream commit 6c58c8816abb7b93b21fa3b1d0c1726402e5e568 ] The nla_for_each_nested parsing in function mqprio_parse_nlattr() does not check the length of the nested attribute. This can lead to an out-of-attribute read and allow a malformed nlattr (e.g., length 0) to be viewed as 8 byte integer and passed to priv->max_rate/min_rate. This patch adds the check based on nla_len() when check the nla_type(), which ensures that the length of these two attribute must equals sizeof(u64). Fixes: 4e8b86c06269 ("mqprio: Introduce new hardware offload mode and shaper in mqprio") Reviewed-by: Victor Nogueira Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/20230725024227.426561-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_mqprio.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 4ec222a5530d..56d3dc5e95c7 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -174,6 +174,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, "Attribute type expected to be TCA_MQPRIO_MIN_RATE64"); return -EINVAL; } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + if (i >= qopt->num_tc) break; priv->min_rate[i] = *(u64 *)nla_data(attr); @@ -196,6 +203,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, "Attribute type expected to be TCA_MQPRIO_MAX_RATE64"); return -EINVAL; } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + if (i >= qopt->num_tc) break; priv->max_rate[i] = *(u64 *)nla_data(attr); From 3661bab5afcb5fc20228d8996e1d24c1d8008831 Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Tue, 25 Jul 2023 11:27:26 +0800 Subject: [PATCH 049/203] benet: fix return value check in be_lancer_xmit_workarounds() [ Upstream commit 5c85f7065718a949902b238a6abd8fc907c5d3e0 ] in be_lancer_xmit_workarounds(), it should go to label 'tx_drop' if an unexpected value is returned by pskb_trim(). Fixes: 93040ae5cc8d ("be2net: Fix to trim skb for padded vlan packets to workaround an ASIC Bug") Signed-off-by: Yuanjun Gong Link: https://lore.kernel.org/r/20230725032726.15002-1-ruc_gongyuanjun@163.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/emulex/benet/be_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 81be560a2643..52b399aa3213 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1139,7 +1139,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, (lancer_chip(adapter) || BE3_chip(adapter) || skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) { ip = (struct iphdr *)ip_hdr(skb); - pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)); + if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)))) + goto tx_drop; } /* If vlan tag is already inlined in the packet, skip HW VLAN From d03de937765f82c48876c615a46ce6ad2ea0c2c7 Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Tue, 25 Jul 2023 14:48:10 +0800 Subject: [PATCH 050/203] tipc: check return value of pskb_trim() [ Upstream commit e46e06ffc6d667a89b979701288e2264f45e6a7b ] goto free_skb if an unexpected result is returned by pskb_tirm() in tipc_crypto_rcv_complete(). Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Signed-off-by: Yuanjun Gong Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/20230725064810.5820-1-ruc_gongyuanjun@163.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tipc/crypto.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index de63d6d41645..2784d6989211 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1964,7 +1964,8 @@ rcv: skb_reset_network_header(*skb); skb_pull(*skb, tipc_ehdr_size(ehdr)); - pskb_trim(*skb, (*skb)->len - aead->authsize); + if (pskb_trim(*skb, (*skb)->len - aead->authsize)) + goto free_skb; /* Validate TIPCv2 message */ if (unlikely(!tipc_msg_validate(skb))) { From 9d1fd118bcb478f57f2988358a1883d178f3306e Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 26 Jul 2023 00:46:25 +0300 Subject: [PATCH 051/203] tipc: stop tipc crypto on failure in tipc_node_create [ Upstream commit de52e17326c3e9a719c9ead4adb03467b8fae0ef ] If tipc_link_bc_create() fails inside tipc_node_create() for a newly allocated tipc node then we should stop its tipc crypto and free the resources allocated with a call to tipc_crypto_start(). As the node ref is initialized to one to that point, just put the ref on tipc_link_bc_create() error case that would lead to tipc_node_free() be eventually executed and properly clean the node and its crypto resources. Found by Linux Verification Center (linuxtesting.org). Fixes: cb8092d70a6f ("tipc: move bc link creation back to tipc_node_create") Suggested-by: Xin Long Signed-off-by: Fedor Pchelkin Reviewed-by: Xin Long Link: https://lore.kernel.org/r/20230725214628.25246-1-pchelkin@ispras.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 38f61dccb855..9e3cfeb82a23 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -567,7 +567,7 @@ update: n->capabilities, &n->bc_entry.inputq1, &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n); + tipc_node_put(n); n = NULL; goto exit; } From 08aa5a5297e6d7d1b78ecafe7f7c47e339eabcde Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Jun 2023 09:07:37 +0300 Subject: [PATCH 052/203] RDMA/mlx4: Make check for invalid flags stricter [ Upstream commit d64b1ee12a168030fbb3e0aebf7bce49e9a07589 ] This code is trying to ensure that only the flags specified in the list are allowed. The problem is that ucmd->rx_hash_fields_mask is a u64 and the flags are an enum which is treated as a u32 in this context. That means the test doesn't check whether the highest 32 bits are zero. Fixes: 4d02ebd9bbbd ("IB/mlx4: Fix RSS hash fields restrictions") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/233ed975-982d-422a-b498-410f71d8a101@moroto.mountain Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/qp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 255194029e2d..50b355e34445 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -530,15 +530,15 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, return (-EOPNOTSUPP); } - if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4 | - MLX4_IB_RX_HASH_DST_IPV4 | - MLX4_IB_RX_HASH_SRC_IPV6 | - MLX4_IB_RX_HASH_DST_IPV6 | - MLX4_IB_RX_HASH_SRC_PORT_TCP | - MLX4_IB_RX_HASH_DST_PORT_TCP | - MLX4_IB_RX_HASH_SRC_PORT_UDP | - MLX4_IB_RX_HASH_DST_PORT_UDP | - MLX4_IB_RX_HASH_INNER)) { + if (ucmd->rx_hash_fields_mask & ~(u64)(MLX4_IB_RX_HASH_SRC_IPV4 | + MLX4_IB_RX_HASH_DST_IPV4 | + MLX4_IB_RX_HASH_SRC_IPV6 | + MLX4_IB_RX_HASH_DST_IPV6 | + MLX4_IB_RX_HASH_SRC_PORT_TCP | + MLX4_IB_RX_HASH_DST_PORT_TCP | + MLX4_IB_RX_HASH_SRC_PORT_UDP | + MLX4_IB_RX_HASH_DST_PORT_UDP | + MLX4_IB_RX_HASH_INNER)) { pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n", ucmd->rx_hash_fields_mask); return (-EOPNOTSUPP); From 5c9e03b8675623c5a028b2e2b73a873d1d8bbdba Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Jul 2023 22:39:32 +0300 Subject: [PATCH 053/203] drm/msm/dpu: drop enum dpu_core_perf_data_bus_id [ Upstream commit e8383f5cf1b3573ce140a80bfbfd809278ab16d6 ] Drop the leftover of bus-client -> interconnect conversion, the enum dpu_core_perf_data_bus_id. Fixes: cb88482e2570 ("drm/msm/dpu: clean up references of DPU custom bus scaling") Reviewed-by: Konrad Dybcio Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/546048/ Link: https://lore.kernel.org/r/20230707193942.3806526-2-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h index cf4b9b5964c6..cd6c3518ba02 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h @@ -14,19 +14,6 @@ #define DPU_PERF_DEFAULT_MAX_CORE_CLK_RATE 412500000 -/** - * enum dpu_core_perf_data_bus_id - data bus identifier - * @DPU_CORE_PERF_DATA_BUS_ID_MNOC: DPU/MNOC data bus - * @DPU_CORE_PERF_DATA_BUS_ID_LLCC: MNOC/LLCC data bus - * @DPU_CORE_PERF_DATA_BUS_ID_EBI: LLCC/EBI data bus - */ -enum dpu_core_perf_data_bus_id { - DPU_CORE_PERF_DATA_BUS_ID_MNOC, - DPU_CORE_PERF_DATA_BUS_ID_LLCC, - DPU_CORE_PERF_DATA_BUS_ID_EBI, - DPU_CORE_PERF_DATA_BUS_ID_MAX, -}; - /** * struct dpu_core_perf_params - definition of performance parameters * @max_per_pipe_ib: maximum instantaneous bandwidth request From 62a8a4cafa9610630fdddfd541b16bc92f48b1be Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 11 Jul 2023 10:54:07 -0700 Subject: [PATCH 054/203] drm/msm/adreno: Fix snapshot BINDLESS_DATA size [ Upstream commit bd846ceee9c478d0397428f02696602ba5eb264a ] The incorrect size was causing "CP | AHB bus error" when snapshotting the GPU state on a6xx gen4 (a660 family). Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/26 Signed-off-by: Rob Clark Reviewed-by: Akhil P Oommen Fixes: 1707add81551 ("drm/msm/a6xx: Add a6xx gpu state") Patchwork: https://patchwork.freedesktop.org/patch/546763/ Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h index 2fb58b7098e4..3bd2065a9d30 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -200,7 +200,7 @@ static const struct a6xx_shader_block { SHADER(A6XX_SP_LB_3_DATA, 0x800), SHADER(A6XX_SP_LB_4_DATA, 0x800), SHADER(A6XX_SP_LB_5_DATA, 0x200), - SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000), + SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x800), SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280), SHADER(A6XX_SP_UAV_DATA, 0x80), SHADER(A6XX_SP_INST_TAG, 0x80), From b0100bdb9dcd405f00248b1d4b6426bf14a2e576 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 13 Jul 2023 16:16:58 +0200 Subject: [PATCH 055/203] RDMA/mthca: Fix crash when polling CQ for shared QPs [ Upstream commit dc52aadbc1849cbe3fcf6bc54d35f6baa396e0a1 ] Commit 21c2fe94abb2 ("RDMA/mthca: Combine special QP struct with mthca QP") introduced a new struct mthca_sqp which doesn't contain struct mthca_qp any longer. Placing a pointer of this new struct into qptable leads to crashes, because mthca_poll_one() expects a qp pointer. Fix this by putting the correct pointer into qptable. Fixes: 21c2fe94abb2 ("RDMA/mthca: Combine special QP struct with mthca QP") Signed-off-by: Thomas Bogendoerfer Link: https://lore.kernel.org/r/20230713141658.9426-1-tbogendoerfer@suse.de Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mthca/mthca_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 08a2a7afafd3..3f57f7dfb822 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1390,7 +1390,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, if (mthca_array_get(&dev->qp_table.qp, mqpn)) err = -EBUSY; else - mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp); + mthca_array_set(&dev->qp_table.qp, mqpn, qp); spin_unlock_irq(&dev->qp_table.lock); if (err) From 94bac776cd273b694d3dbb7b092de120013b3af5 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 17 Jul 2023 09:47:38 +0800 Subject: [PATCH 056/203] drm/msm: Fix IS_ERR_OR_NULL() vs NULL check in a5xx_submit_in_rb() [ Upstream commit 6e8a996563ecbe68e49c49abd4aaeef69f11f2dc ] The msm_gem_get_vaddr() returns an ERR_PTR() on failure, and a null is catastrophic here, so we should use IS_ERR_OR_NULL() to check the return value. Fixes: 6a8bd08d0465 ("drm/msm: add sudo flag to submit ioctl") Signed-off-by: Gaosheng Cui Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/547712/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 0fcba2bc26b8..9ae0e60ecac3 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -81,7 +81,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit * since we've already mapped it once in * submit_reloc() */ - if (WARN_ON(!ptr)) + if (WARN_ON(IS_ERR_OR_NULL(ptr))) return; for (i = 0; i < dwords; i++) { From ab6d14bc404100a8afe8446866fba70e0d30993a Mon Sep 17 00:00:00 2001 From: Matus Gajdos Date: Wed, 19 Jul 2023 18:47:29 +0200 Subject: [PATCH 057/203] ASoC: fsl_spdif: Silence output on stop [ Upstream commit 0e4c2b6b0c4a4b4014d9424c27e5e79d185229c5 ] Clear TX registers on stop to prevent the SPDIF interface from sending last written word over and over again. Fixes: a2388a498ad2 ("ASoC: fsl: Add S/PDIF CPU DAI driver") Signed-off-by: Matus Gajdos Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20230719164729.19969-1-matuszpd@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_spdif.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c index d01e8d516df1..64b85b786bf6 100644 --- a/sound/soc/fsl/fsl_spdif.c +++ b/sound/soc/fsl/fsl_spdif.c @@ -612,6 +612,8 @@ static int fsl_spdif_trigger(struct snd_pcm_substream *substream, case SNDRV_PCM_TRIGGER_PAUSE_PUSH: regmap_update_bits(regmap, REG_SPDIF_SCR, dmaen, 0); regmap_update_bits(regmap, REG_SPDIF_SIE, intr, 0); + regmap_write(regmap, REG_SPDIF_STL, 0x0); + regmap_write(regmap, REG_SPDIF_STR, 0x0); break; default: return -EINVAL; From d82bfe9686f3f952f4bbee1e77ef4c81ce04fe79 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 6 Jul 2023 13:14:12 -0700 Subject: [PATCH 058/203] block: Fix a source code comment in include/uapi/linux/blkzoned.h [ Upstream commit e0933b526fbfd937c4a8f4e35fcdd49f0e22d411 ] Fix the symbolic names for zone conditions in the blkzoned.h header file. Cc: Hannes Reinecke Cc: Damien Le Moal Fixes: 6a0cb1bc106f ("block: Implement support for zoned block devices") Signed-off-by: Bart Van Assche Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20230706201422.3987341-1-bvanassche@acm.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/uapi/linux/blkzoned.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 656a326821a2..321965feee35 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -51,13 +51,13 @@ enum blk_zone_type { * * The Zone Condition state machine in the ZBC/ZAC standards maps the above * deinitions as: - * - ZC1: Empty | BLK_ZONE_EMPTY + * - ZC1: Empty | BLK_ZONE_COND_EMPTY * - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN * - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN - * - ZC4: Closed | BLK_ZONE_CLOSED - * - ZC5: Full | BLK_ZONE_FULL - * - ZC6: Read Only | BLK_ZONE_READONLY - * - ZC7: Offline | BLK_ZONE_OFFLINE + * - ZC4: Closed | BLK_ZONE_COND_CLOSED + * - ZC5: Full | BLK_ZONE_COND_FULL + * - ZC6: Read Only | BLK_ZONE_COND_READONLY + * - ZC7: Offline | BLK_ZONE_COND_OFFLINE * * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should * be considered invalid. From a43c761a7e1cdc44c34e0d696d525d4ce3d6926b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 8 Jul 2023 17:21:51 +0800 Subject: [PATCH 059/203] dm raid: fix missing reconfig_mutex unlock in raid_ctr() error paths [ Upstream commit bae3028799dc4f1109acc4df37c8ff06f2d8f1a0 ] In the error paths 'bad_stripe_cache' and 'bad_check_reshape', 'reconfig_mutex' is still held after raid_ctr() returns. Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target") Signed-off-by: Yu Kuai Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index a2d09c9c6e9f..1a25c8b01b6a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3278,15 +3278,19 @@ size_check: /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ if (rs_is_raid456(rs)) { r = rs_set_raid456_stripe_cache(rs); - if (r) + if (r) { + mddev_unlock(&rs->md); goto bad_stripe_cache; + } } /* Now do an early reshape check */ if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { r = rs_check_reshape(rs); - if (r) + if (r) { + mddev_unlock(&rs->md); goto bad_check_reshape; + } /* Restore new, ctr requested layout to perform check */ rs_config_restore(rs, &rs_layout); @@ -3295,6 +3299,7 @@ size_check: r = rs->md.pers->check_reshape(&rs->md); if (r) { ti->error = "Reshape check failed"; + mddev_unlock(&rs->md); goto bad_check_reshape; } } From d6a1cf4ee5eba1f0b6640ddaf5afe04ee47c949c Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 8 Jul 2023 17:21:52 +0800 Subject: [PATCH 060/203] dm raid: clean up four equivalent goto tags in raid_ctr() [ Upstream commit e74c874eabe2e9173a8fbdad616cd89c70eb8ffd ] There are four equivalent goto tags in raid_ctr(), clean them up to use just one. There is no functional change and this is preparation to fix raid_ctr()'s unprotected md_stop(). Signed-off-by: Yu Kuai Signed-off-by: Mike Snitzer Stable-dep-of: 7d5fff8982a2 ("dm raid: protect md_stop() with 'reconfig_mutex'") Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1a25c8b01b6a..f3a489b1b6e9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3258,8 +3258,7 @@ size_check: r = md_start(&rs->md); if (r) { ti->error = "Failed to start raid array"; - mddev_unlock(&rs->md); - goto bad_md_start; + goto bad_unlock; } /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ @@ -3267,8 +3266,7 @@ size_check: r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); if (r) { ti->error = "Failed to set raid4/5/6 journal mode"; - mddev_unlock(&rs->md); - goto bad_journal_mode_set; + goto bad_unlock; } } @@ -3278,19 +3276,15 @@ size_check: /* Try to adjust the raid4/5/6 stripe cache size to the stripe size */ if (rs_is_raid456(rs)) { r = rs_set_raid456_stripe_cache(rs); - if (r) { - mddev_unlock(&rs->md); - goto bad_stripe_cache; - } + if (r) + goto bad_unlock; } /* Now do an early reshape check */ if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { r = rs_check_reshape(rs); - if (r) { - mddev_unlock(&rs->md); - goto bad_check_reshape; - } + if (r) + goto bad_unlock; /* Restore new, ctr requested layout to perform check */ rs_config_restore(rs, &rs_layout); @@ -3299,8 +3293,7 @@ size_check: r = rs->md.pers->check_reshape(&rs->md); if (r) { ti->error = "Reshape check failed"; - mddev_unlock(&rs->md); - goto bad_check_reshape; + goto bad_unlock; } } } @@ -3311,10 +3304,8 @@ size_check: mddev_unlock(&rs->md); return 0; -bad_md_start: -bad_journal_mode_set: -bad_stripe_cache: -bad_check_reshape: +bad_unlock: + mddev_unlock(&rs->md); md_stop(&rs->md); bad: raid_set_free(rs); From 82ce0ae87a96fde2824cf0727d01c5bef6fb7baa Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 8 Jul 2023 17:21:53 +0800 Subject: [PATCH 061/203] dm raid: protect md_stop() with 'reconfig_mutex' [ Upstream commit 7d5fff8982a2199d49ec067818af7d84d4f95ca0 ] __md_stop_writes() and __md_stop() will modify many fields that are protected by 'reconfig_mutex', and all the callers will grab 'reconfig_mutex' except for md_stop(). Also, update md_stop() to make certain 'reconfig_mutex' is held using lockdep_assert_held(). Fixes: 9d09e663d550 ("dm: raid456 basic support") Signed-off-by: Yu Kuai Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-raid.c | 4 +++- drivers/md/md.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f3a489b1b6e9..140bdf2a6ee1 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3305,8 +3305,8 @@ size_check: return 0; bad_unlock: - mddev_unlock(&rs->md); md_stop(&rs->md); + mddev_unlock(&rs->md); bad: raid_set_free(rs); @@ -3317,7 +3317,9 @@ static void raid_dtr(struct dm_target *ti) { struct raid_set *rs = ti->private; + mddev_lock_nointr(&rs->md); md_stop(&rs->md); + mddev_unlock(&rs->md); raid_set_free(rs); } diff --git a/drivers/md/md.c b/drivers/md/md.c index ae0a857d6076..6efe49f7bdf5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6316,6 +6316,8 @@ static void __md_stop(struct mddev *mddev) void md_stop(struct mddev *mddev) { + lockdep_assert_held(&mddev->reconfig_mutex); + /* stop the array and free an attached data structures. * This is called from dm-raid */ From e410839c0cd863a1595c41820916289bc86272dd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Jul 2023 22:33:22 +0200 Subject: [PATCH 062/203] ata: pata_ns87415: mark ns87560_tf_read static [ Upstream commit 3fc2febb0f8ffae354820c1772ec008733237cfa ] The global function triggers a warning because of the missing prototype drivers/ata/pata_ns87415.c:263:6: warning: no previous prototype for 'ns87560_tf_read' [-Wmissing-prototypes] 263 | void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf) There are no other references to this, so just make it static. Fixes: c4b5b7b6c4423 ("pata_ns87415: Initial cut at 87415/87560 IDE support") Reviewed-by: Sergey Shtylyov Reviewed-by: Serge Semin Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/pata_ns87415.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c index 1532b2e3c672..921738577440 100644 --- a/drivers/ata/pata_ns87415.c +++ b/drivers/ata/pata_ns87415.c @@ -260,7 +260,7 @@ static u8 ns87560_check_status(struct ata_port *ap) * LOCKING: * Inherited from caller. */ -void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf) +static void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf) { struct ata_ioports *ioaddr = &ap->ioaddr; From 0efbdbc4530c8f859f0a658ab41a18770ce98542 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 24 Jul 2023 13:40:40 +0800 Subject: [PATCH 063/203] ring-buffer: Fix wrong stat of cpu_buffer->read [ Upstream commit 2d093282b0d4357373497f65db6a05eb0c28b7c8 ] When pages are removed in rb_remove_pages(), 'cpu_buffer->read' is set to 0 in order to make sure any read iterators reset themselves. However, this will mess 'entries' stating, see following steps: # cd /sys/kernel/tracing/ # 1. Enlarge ring buffer prepare for later reducing: # echo 20 > per_cpu/cpu0/buffer_size_kb # 2. Write a log into ring buffer of cpu0: # taskset -c 0 echo "hello1" > trace_marker # 3. Read the log: # cat per_cpu/cpu0/trace_pipe <...>-332 [000] ..... 62.406844: tracing_mark_write: hello1 # 4. Stop reading and see the stats, now 0 entries, and 1 event readed: # cat per_cpu/cpu0/stats entries: 0 [...] read events: 1 # 5. Reduce the ring buffer # echo 7 > per_cpu/cpu0/buffer_size_kb # 6. Now entries became unexpected 1 because actually no entries!!! # cat per_cpu/cpu0/stats entries: 1 [...] read events: 0 To fix it, introduce 'page_removed' field to count total removed pages since last reset, then use it to let read iterators reset themselves instead of changing the 'read' pointer. Link: https://lore.kernel.org/linux-trace-kernel/20230724054040.3489499-1-zhengyejian1@huawei.com Cc: Cc: Fixes: 83f40318dab0 ("ring-buffer: Make removal of ring buffer pages atomic") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 593e446f6c48..3b8c53264441 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -526,6 +526,8 @@ struct ring_buffer_per_cpu { rb_time_t write_stamp; rb_time_t before_stamp; u64 read_stamp; + /* pages removed since last reset */ + unsigned long pages_removed; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; struct list_head new_pages; /* new pages to add */ @@ -561,6 +563,7 @@ struct ring_buffer_iter { struct buffer_page *head_page; struct buffer_page *cache_reader_page; unsigned long cache_read; + unsigned long cache_pages_removed; u64 read_stamp; u64 page_stamp; struct ring_buffer_event *event; @@ -1833,6 +1836,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) to_remove = rb_list_head(to_remove)->next; head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; } + /* Read iterators need to reset themselves when some pages removed */ + cpu_buffer->pages_removed += nr_removed; next_page = rb_list_head(to_remove)->next; @@ -1854,12 +1859,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) cpu_buffer->head_page = list_entry(next_page, struct buffer_page, list); - /* - * change read pointer to make sure any read iterators reset - * themselves - */ - cpu_buffer->read = 0; - /* pages are removed, resume tracing and then free the pages */ atomic_dec(&cpu_buffer->record_disabled); raw_spin_unlock_irq(&cpu_buffer->reader_lock); @@ -4105,6 +4104,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->cache_reader_page = iter->head_page; iter->cache_read = cpu_buffer->read; + iter->cache_pages_removed = cpu_buffer->pages_removed; if (iter->head) { iter->read_stamp = cpu_buffer->read_stamp; @@ -4558,12 +4558,13 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) buffer = cpu_buffer->buffer; /* - * Check if someone performed a consuming read to - * the buffer. A consuming read invalidates the iterator - * and we need to reset the iterator in this case. + * Check if someone performed a consuming read to the buffer + * or removed some pages from the buffer. In these cases, + * iterator was invalidated and we need to reset it. */ if (unlikely(iter->cache_read != cpu_buffer->read || - iter->cache_reader_page != cpu_buffer->reader_page)) + iter->cache_reader_page != cpu_buffer->reader_page || + iter->cache_pages_removed != cpu_buffer->pages_removed)) rb_iter_reset(iter); again: @@ -5005,6 +5006,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->last_overrun = 0; rb_head_page_activate(cpu_buffer); + cpu_buffer->pages_removed = 0; } /* Must have disabled the cpu buffer then done a synchronize_rcu */ From a6d2fd1703cdc8ecfc3e73987e0fb7474ae2b074 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 26 Jul 2023 17:58:04 +0800 Subject: [PATCH 064/203] tracing: Fix warning in trace_buffered_event_disable() [ Upstream commit dea499781a1150d285c62b26659f62fb00824fce ] Warning happened in trace_buffered_event_disable() at WARN_ON_ONCE(!trace_buffered_event_ref) Call Trace: ? __warn+0xa5/0x1b0 ? trace_buffered_event_disable+0x189/0x1b0 __ftrace_event_enable_disable+0x19e/0x3e0 free_probe_data+0x3b/0xa0 unregister_ftrace_function_probe_func+0x6b8/0x800 event_enable_func+0x2f0/0x3d0 ftrace_process_regex.isra.0+0x12d/0x1b0 ftrace_filter_write+0xe6/0x140 vfs_write+0x1c9/0x6f0 [...] The cause of the warning is in __ftrace_event_enable_disable(), trace_buffered_event_enable() was called once while trace_buffered_event_disable() was called twice. Reproduction script show as below, for analysis, see the comments: ``` #!/bin/bash cd /sys/kernel/tracing/ # 1. Register a 'disable_event' command, then: # 1) SOFT_DISABLED_BIT was set; # 2) trace_buffered_event_enable() was called first time; echo 'cmdline_proc_show:disable_event:initcall:initcall_finish' > \ set_ftrace_filter # 2. Enable the event registered, then: # 1) SOFT_DISABLED_BIT was cleared; # 2) trace_buffered_event_disable() was called first time; echo 1 > events/initcall/initcall_finish/enable # 3. Try to call into cmdline_proc_show(), then SOFT_DISABLED_BIT was # set again!!! cat /proc/cmdline # 4. Unregister the 'disable_event' command, then: # 1) SOFT_DISABLED_BIT was cleared again; # 2) trace_buffered_event_disable() was called second time!!! echo '!cmdline_proc_show:disable_event:initcall:initcall_finish' > \ set_ftrace_filter ``` To fix it, IIUC, we can change to call trace_buffered_event_enable() at fist time soft-mode enabled, and call trace_buffered_event_disable() at last time soft-mode disabled. Link: https://lore.kernel.org/linux-trace-kernel/20230726095804.920457-1-zhengyejian1@huawei.com Cc: Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_events.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f8ed66f38175..a46d34d840f6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -371,7 +371,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; - unsigned long file_flags = file->flags; int ret = 0; int disable; @@ -395,6 +394,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, break; disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); + /* Disable use of trace_buffered_event */ + trace_buffered_event_disable(); } else disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); @@ -433,6 +434,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, if (atomic_inc_return(&file->sm_ref) > 1) break; set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); + /* Enable use of trace_buffered_event */ + trace_buffered_event_enable(); } if (!(file->flags & EVENT_FILE_FL_ENABLED)) { @@ -472,15 +475,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, break; } - /* Enable or disable use of trace_buffered_event */ - if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) != - (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) { - if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) - trace_buffered_event_enable(); - else - trace_buffered_event_disable(); - } - return ret; } From da1b105dc66d74a748bc413f1cd0c2830a9781d7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 4 Jul 2023 17:09:09 +0300 Subject: [PATCH 065/203] Revert "usb: gadget: tegra-xudc: Fix error check in tegra_xudc_powerdomain_init()" commit a8291be6b5dd465c22af229483dbac543a91e24e upstream. This reverts commit f08aa7c80dac27ee00fa6827f447597d2fba5465. The reverted commit was based on static analysis and a misunderstanding of how PTR_ERR() and NULLs are supposed to work. When a function returns both pointer errors and NULL then normally the NULL means "continue operating without a feature because it was deliberately turned off". The NULL should not be treated as a failure. If a driver cannot work when that feature is disabled then the KConfig should enforce that the function cannot return NULL. We should not need to test for it. In this driver, the bug means that probe cannot succeed when CONFIG_PM is disabled. Signed-off-by: Dan Carpenter Fixes: f08aa7c80dac ("usb: gadget: tegra-xudc: Fix error check in tegra_xudc_powerdomain_init()") Cc: stable Link: https://lore.kernel.org/r/ZKQoBa84U/ykEh3C@moroto Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 66d5f6a85c84..c5f0fbb8ffe4 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3693,15 +3693,15 @@ static int tegra_xudc_powerdomain_init(struct tegra_xudc *xudc) int err; xudc->genpd_dev_device = dev_pm_domain_attach_by_name(dev, "dev"); - if (IS_ERR_OR_NULL(xudc->genpd_dev_device)) { - err = PTR_ERR(xudc->genpd_dev_device) ? : -ENODATA; + if (IS_ERR(xudc->genpd_dev_device)) { + err = PTR_ERR(xudc->genpd_dev_device); dev_err(dev, "failed to get device power domain: %d\n", err); return err; } xudc->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "ss"); - if (IS_ERR_OR_NULL(xudc->genpd_dev_ss)) { - err = PTR_ERR(xudc->genpd_dev_ss) ? : -ENODATA; + if (IS_ERR(xudc->genpd_dev_ss)) { + err = PTR_ERR(xudc->genpd_dev_ss); dev_err(dev, "failed to get SuperSpeed power domain: %d\n", err); return err; } From 68e6287ac61dc22513cd39f02b9ac1fef28513e4 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 14 Jul 2023 15:40:11 +0800 Subject: [PATCH 066/203] USB: gadget: Fix the memory leak in raw_gadget driver commit 83e30f2bf86ef7c38fbd476ed81a88522b620628 upstream. Currently, increasing raw_dev->count happens before invoke the raw_queue_event(), if the raw_queue_event() return error, invoke raw_release() will not trigger the dev_free() to be called. [ 268.905865][ T5067] raw-gadget.0 gadget.0: failed to queue event [ 268.912053][ T5067] udc dummy_udc.0: failed to start USB Raw Gadget: -12 [ 268.918885][ T5067] raw-gadget.0: probe of gadget.0 failed with error -12 [ 268.925956][ T5067] UDC core: USB Raw Gadget: couldn't find an available UDC or it's busy [ 268.934657][ T5067] misc raw-gadget: fail, usb_gadget_register_driver returned -16 BUG: memory leak [] kmalloc_trace+0x24/0x90 mm/slab_common.c:1076 [] kmalloc include/linux/slab.h:582 [inline] [] kzalloc include/linux/slab.h:703 [inline] [] dev_new drivers/usb/gadget/legacy/raw_gadget.c:191 [inline] [] raw_open+0x45/0x110 drivers/usb/gadget/legacy/raw_gadget.c:385 [] misc_open+0x1a9/0x1f0 drivers/char/misc.c:165 [] kmalloc_trace+0x24/0x90 mm/slab_common.c:1076 [] kmalloc include/linux/slab.h:582 [inline] [] raw_ioctl_init+0xdf/0x410 drivers/usb/gadget/legacy/raw_gadget.c:460 [] raw_ioctl+0x5f9/0x1120 drivers/usb/gadget/legacy/raw_gadget.c:1250 [] vfs_ioctl fs/ioctl.c:51 [inline] [] kmalloc_trace+0x24/0x90 mm/slab_common.c:1076 [] kmalloc include/linux/slab.h:582 [inline] [] kzalloc include/linux/slab.h:703 [inline] [] dummy_alloc_request+0x5a/0xe0 drivers/usb/gadget/udc/dummy_hcd.c:665 [] usb_ep_alloc_request+0x22/0xd0 drivers/usb/gadget/udc/core.c:196 [] gadget_bind+0x6d/0x370 drivers/usb/gadget/legacy/raw_gadget.c:292 This commit therefore invoke kref_get() under the condition that raw_queue_event() return success. Reported-by: syzbot+feb045d335c1fdde5bf7@syzkaller.appspotmail.com Cc: stable Closes: https://syzkaller.appspot.com/bug?extid=feb045d335c1fdde5bf7 Signed-off-by: Zqiang Reviewed-by: Andrey Konovalov Tested-by: Andrey Konovalov Link: https://lore.kernel.org/r/20230714074011.20989-1-qiang.zhang1211@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index b496ca937dee..ddb39e672801 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -309,13 +309,15 @@ static int gadget_bind(struct usb_gadget *gadget, dev->eps_num = i; spin_unlock_irqrestore(&dev->lock, flags); + ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL); + if (ret < 0) { + dev_err(&gadget->dev, "failed to queue event\n"); + set_gadget_data(gadget, NULL); + return ret; + } + /* Matches kref_put() in gadget_unbind(). */ kref_get(&dev->count); - - ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL); - if (ret < 0) - dev_err(&gadget->dev, "failed to queue event\n"); - return ret; } From 9e671a6116f4fb171429148efc4ef1c1cce7dd60 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 13 Jul 2023 16:57:41 +0200 Subject: [PATCH 067/203] serial: qcom-geni: drop bogus runtime pm state update commit 4dd8752a14ca0303fbdf0a6c68ff65f0a50bd2fa upstream. The runtime PM state should not be changed by drivers that do not implement runtime PM even if it happens to work around a bug in PM core. With the wake irq arming now fixed, drop the bogus runtime PM state update which left the device in active state (and could potentially prevent a parent device from suspending). Fixes: f3974413cf02 ("tty: serial: qcom_geni_serial: Wakeup IRQ cleanup") Cc: 5.6+ # 5.6+ Signed-off-by: Johan Hovold Reviewed-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index f50ffc8076d8..65a0c4e2bb29 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1468,13 +1468,6 @@ static int qcom_geni_serial_probe(struct platform_device *pdev) goto err; } - /* - * Set pm_runtime status as ACTIVE so that wakeup_irq gets - * enabled/disabled from dev_pm_arm_wake_irq during system - * suspend/resume respectively. - */ - pm_runtime_set_active(&pdev->dev); - if (port->wakeup_irq > 0) { device_init_wakeup(&pdev->dev, true); ret = dev_pm_set_dedicated_wake_irq(&pdev->dev, From 6209a7383d3a893cc712b45358e2be007cd0f128 Mon Sep 17 00:00:00 2001 From: Ruihong Luo Date: Thu, 13 Jul 2023 08:42:36 +0800 Subject: [PATCH 068/203] serial: 8250_dw: Preserve original value of DLF register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 748c5ea8b8796ae8ee80b8d3a3d940570b588d59 upstream. Preserve the original value of the Divisor Latch Fraction (DLF) register. When the DLF register is modified without preservation, it can disrupt the baudrate settings established by firmware or bootloader, leading to data corruption and the generation of unreadable or distorted characters. Fixes: 701c5e73b296 ("serial: 8250_dw: add fractional divisor support") Cc: stable Signed-off-by: Ruihong Luo Link: https://lore.kernel.org/stable/20230713004235.35904-1-colorsu1922%40gmail.com Reviewed-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230713004235.35904-1-colorsu1922@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dwlib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dwlib.c b/drivers/tty/serial/8250/8250_dwlib.c index 6d6a78eead3e..1cf229cca592 100644 --- a/drivers/tty/serial/8250/8250_dwlib.c +++ b/drivers/tty/serial/8250/8250_dwlib.c @@ -80,7 +80,7 @@ static void dw8250_set_divisor(struct uart_port *p, unsigned int baud, void dw8250_setup_port(struct uart_port *p) { struct uart_8250_port *up = up_to_u8250p(p); - u32 reg; + u32 reg, old_dlf; /* * If the Component Version Register returns zero, we know that @@ -93,9 +93,11 @@ void dw8250_setup_port(struct uart_port *p) dev_dbg(p->dev, "Designware UART version %c.%c%c\n", (reg >> 24) & 0xff, (reg >> 16) & 0xff, (reg >> 8) & 0xff); + /* Preserve value written by firmware or bootloader */ + old_dlf = dw8250_readl_ext(p, DW_UART_DLF); dw8250_writel_ext(p, DW_UART_DLF, ~0U); reg = dw8250_readl_ext(p, DW_UART_DLF); - dw8250_writel_ext(p, DW_UART_DLF, 0); + dw8250_writel_ext(p, DW_UART_DLF, old_dlf); if (reg) { struct dw8250_port_data *d = p->private_data; From 1037ee1dbf739a12b2861ea15197f3fb02fc975b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 23 Jun 2023 23:01:59 -0700 Subject: [PATCH 069/203] serial: sifive: Fix sifive_serial_console_setup() section commit 9b8fef6345d5487137d4193bb0a0eae2203c284e upstream. This function is called indirectly from the platform driver probe function. Even if the driver is built in, it may be probed after free_initmem() due to deferral or unbinding/binding via sysfs. Thus the function cannot be marked as __init. Fixes: 45c054d0815b ("tty: serial: add driver for the SiFive UART") Cc: stable Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20230624060159.3401369-1-samuel.holland@sifive.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sifive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 91952be01074..c234114b5052 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -844,7 +844,7 @@ static void sifive_serial_console_write(struct console *co, const char *s, local_irq_restore(flags); } -static int __init sifive_serial_console_setup(struct console *co, char *options) +static int sifive_serial_console_setup(struct console *co, char *options) { struct sifive_serial_port *ssp; int baud = SIFIVE_DEFAULT_BAUD_RATE; From c704cb21138bad853d96b5005c6d4722659cf95a Mon Sep 17 00:00:00 2001 From: Jerry Meng Date: Thu, 29 Jun 2023 17:35:22 +0800 Subject: [PATCH 070/203] USB: serial: option: support Quectel EM060K_128 commit 4f7cab49cecee16120d27c1734cfdf3d6c0e5329 upstream. EM060K_128 is EM060K's sub-model, having the same name "Quectel EM060K-GL" MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0128 Rev= 5.04 S: Manufacturer=Quectel S: Product=Quectel EM060K-GL S: SerialNumber=f6fa08b6 C:* #Ifs= 8 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8f(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Jerry Meng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 625d9dc776be..799c0b1cc54f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM061K_LTA 0x0123 #define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 +#define QUECTEL_PRODUCT_EM060K_128 0x0128 #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 @@ -1197,6 +1198,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, From fca2a74eee5a2de09766d0186f4ad52d776e177d Mon Sep 17 00:00:00 2001 From: Mohsen Tahmasebi Date: Mon, 10 Jul 2023 11:22:18 +0330 Subject: [PATCH 071/203] USB: serial: option: add Quectel EC200A module support commit 857ea9005806e2a458016880278f98715873e977 upstream. Add Quectel EC200A "DIAG, AT, MODEM": 0x6005: ECM / RNDIS + DIAG + AT + MODEM T: Bus=01 Lev=01 Prnt=02 Port=05 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=6005 Rev=03.18 S: Manufacturer=Android S: Product=Android S: SerialNumber=0000 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=0c(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms Signed-off-by: Mohsen Tahmasebi Tested-by: Mostafa Ghofrani Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 799c0b1cc54f..0b3422c06ab9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -269,6 +269,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_RM520N 0x0801 #define QUECTEL_PRODUCT_EC200U 0x0901 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 +#define QUECTEL_PRODUCT_EC200A 0x6005 #define QUECTEL_PRODUCT_EM061K_LWW 0x6008 #define QUECTEL_PRODUCT_EM061K_LCN 0x6009 #define QUECTEL_PRODUCT_EC200T 0x6026 @@ -1229,6 +1230,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */ .driver_info = ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200A, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, From 81c54eef1dd7c3cc9c3c4fde83b3149d0e55b044 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 12 Jul 2023 16:16:41 +0200 Subject: [PATCH 072/203] USB: serial: simple: add Kaufmann RKS+CAN VCP commit dd92c8a1f99bcd166204ffc219ea5a23dd65d64f upstream. Add the device and product ID for this CAN bus interface / license dongle. The device is usable either directly from user space or can be attached to a kernel CAN interface with slcan_attach. Reported-by: Kaufmann Automotive GmbH Tested-by: Kaufmann Automotive GmbH Signed-off-by: Oliver Neukum [ johan: amend commit message and move entries in sort order ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 4c6747889a19..3c552e4b87ce 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -63,6 +63,11 @@ DEVICE(flashloader, FLASHLOADER_IDS); 0x01) } DEVICE(google, GOOGLE_IDS); +/* KAUFMANN RKS+CAN VCP */ +#define KAUFMANN_IDS() \ + { USB_DEVICE(0x16d0, 0x0870) } +DEVICE(kaufmann, KAUFMANN_IDS); + /* Libtransistor USB console */ #define LIBTRANSISTOR_IDS() \ { USB_DEVICE(0x1209, 0x8b00) } @@ -124,6 +129,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &funsoft_device, &flashloader_device, &google_device, + &kaufmann_device, &libtransistor_device, &vivopay_device, &moto_modem_device, @@ -142,6 +148,7 @@ static const struct usb_device_id id_table[] = { FUNSOFT_IDS(), FLASHLOADER_IDS(), GOOGLE_IDS(), + KAUFMANN_IDS(), LIBTRANSISTOR_IDS(), VIVOPAY_IDS(), MOTO_IDS(), From d5db33a667f5ee181cc4cb0d2d2d5b0407e431ca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 20 Jul 2023 09:53:57 +0200 Subject: [PATCH 073/203] USB: serial: simple: sort driver entries commit d245aedc00775c4d7265a9f4522cc4e1fd34d102 upstream. Sort the driver symbols alphabetically in order to make it more obvious where new driver entries should be added. Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 66 +++++++++++++------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 3c552e4b87ce..24b8772a345e 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -38,16 +38,6 @@ static struct usb_serial_driver vendor##_device = { \ { USB_DEVICE(0x0a21, 0x8001) } /* MMT-7305WW */ DEVICE(carelink, CARELINK_IDS); -/* ZIO Motherboard USB driver */ -#define ZIO_IDS() \ - { USB_DEVICE(0x1CBE, 0x0103) } -DEVICE(zio, ZIO_IDS); - -/* Funsoft Serial USB driver */ -#define FUNSOFT_IDS() \ - { USB_DEVICE(0x1404, 0xcddc) } -DEVICE(funsoft, FUNSOFT_IDS); - /* Infineon Flashloader driver */ #define FLASHLOADER_IDS() \ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \ @@ -55,6 +45,11 @@ DEVICE(funsoft, FUNSOFT_IDS); { USB_DEVICE(0x8087, 0x0801) } DEVICE(flashloader, FLASHLOADER_IDS); +/* Funsoft Serial USB driver */ +#define FUNSOFT_IDS() \ + { USB_DEVICE(0x1404, 0xcddc) } +DEVICE(funsoft, FUNSOFT_IDS); + /* Google Serial USB SubClass */ #define GOOGLE_IDS() \ { USB_VENDOR_AND_INTERFACE_INFO(0x18d1, \ @@ -63,6 +58,11 @@ DEVICE(flashloader, FLASHLOADER_IDS); 0x01) } DEVICE(google, GOOGLE_IDS); +/* HP4x (48/49) Generic Serial driver */ +#define HP4X_IDS() \ + { USB_DEVICE(0x03f0, 0x0121) } +DEVICE(hp4x, HP4X_IDS); + /* KAUFMANN RKS+CAN VCP */ #define KAUFMANN_IDS() \ { USB_DEVICE(0x16d0, 0x0870) } @@ -73,11 +73,6 @@ DEVICE(kaufmann, KAUFMANN_IDS); { USB_DEVICE(0x1209, 0x8b00) } DEVICE(libtransistor, LIBTRANSISTOR_IDS); -/* ViVOpay USB Serial Driver */ -#define VIVOPAY_IDS() \ - { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */ -DEVICE(vivopay, VIVOPAY_IDS); - /* Motorola USB Phone driver */ #define MOTO_IDS() \ { USB_DEVICE(0x05c6, 0x3197) }, /* unknown Motorola phone */ \ @@ -106,10 +101,10 @@ DEVICE(nokia, NOKIA_IDS); { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ DEVICE_N(novatel_gps, NOVATEL_IDS, 3); -/* HP4x (48/49) Generic Serial driver */ -#define HP4X_IDS() \ - { USB_DEVICE(0x03f0, 0x0121) } -DEVICE(hp4x, HP4X_IDS); +/* Siemens USB/MPI adapter */ +#define SIEMENS_IDS() \ + { USB_DEVICE(0x908, 0x0004) } +DEVICE(siemens_mpi, SIEMENS_IDS); /* Suunto ANT+ USB Driver */ #define SUUNTO_IDS() \ @@ -117,47 +112,52 @@ DEVICE(hp4x, HP4X_IDS); { USB_DEVICE(0x0fcf, 0x1009) } /* Dynastream ANT USB-m Stick */ DEVICE(suunto, SUUNTO_IDS); -/* Siemens USB/MPI adapter */ -#define SIEMENS_IDS() \ - { USB_DEVICE(0x908, 0x0004) } -DEVICE(siemens_mpi, SIEMENS_IDS); +/* ViVOpay USB Serial Driver */ +#define VIVOPAY_IDS() \ + { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */ +DEVICE(vivopay, VIVOPAY_IDS); + +/* ZIO Motherboard USB driver */ +#define ZIO_IDS() \ + { USB_DEVICE(0x1CBE, 0x0103) } +DEVICE(zio, ZIO_IDS); /* All of the above structures mushed into two lists */ static struct usb_serial_driver * const serial_drivers[] = { &carelink_device, - &zio_device, - &funsoft_device, &flashloader_device, + &funsoft_device, &google_device, + &hp4x_device, &kaufmann_device, &libtransistor_device, - &vivopay_device, &moto_modem_device, &motorola_tetra_device, &nokia_device, &novatel_gps_device, - &hp4x_device, - &suunto_device, &siemens_mpi_device, + &suunto_device, + &vivopay_device, + &zio_device, NULL }; static const struct usb_device_id id_table[] = { CARELINK_IDS(), - ZIO_IDS(), - FUNSOFT_IDS(), FLASHLOADER_IDS(), + FUNSOFT_IDS(), GOOGLE_IDS(), + HP4X_IDS(), KAUFMANN_IDS(), LIBTRANSISTOR_IDS(), - VIVOPAY_IDS(), MOTO_IDS(), MOTOROLA_TETRA_IDS(), NOKIA_IDS(), NOVATEL_IDS(), - HP4X_IDS(), - SUUNTO_IDS(), SIEMENS_IDS(), + SUUNTO_IDS(), + VIVOPAY_IDS(), + ZIO_IDS(), { }, }; MODULE_DEVICE_TABLE(usb, id_table); From bf468806b63fde296238fc7bf2fcb6e123d8596b Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Jul 2023 11:43:54 +0200 Subject: [PATCH 074/203] can: gs_usb: gs_can_close(): add missing set of CAN state to CAN_STATE_STOPPED commit f8a2da6ec2417cca169fa85a8ab15817bccbb109 upstream. After an initial link up the CAN device is in ERROR-ACTIVE mode. Due to a missing CAN_STATE_STOPPED in gs_can_close() it doesn't change to STOPPED after a link down: | ip link set dev can0 up | ip link set dev can0 down | ip --details link show can0 | 13: can0: mtu 16 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE restart-ms 1000 Add missing assignment of CAN_STATE_STOPPED in gs_can_close(). Cc: stable@vger.kernel.org Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Link: https://lore.kernel.org/all/20230718-gs_usb-fix-can-state-v1-1-f19738ae2c23@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/gs_usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index a879200eaab0..1f81293f137c 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -732,6 +732,8 @@ static int gs_can_close(struct net_device *netdev) usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); + dev->can.state = CAN_STATE_STOPPED; + /* reset the device */ rc = gs_cmd_reset(dev); if (rc < 0) From 75ad45cef6992caa3ce374f53f5b5d07221f3d97 Mon Sep 17 00:00:00 2001 From: Jakub Vanek Date: Fri, 14 Jul 2023 14:24:19 +0200 Subject: [PATCH 075/203] Revert "usb: dwc3: core: Enable AutoRetry feature in the controller" commit 734ae15ab95a18d3d425fc9cb38b7a627d786f08 upstream. This reverts commit b138e23d3dff90c0494925b4c1874227b81bddf7. AutoRetry has been found to sometimes cause controller freezes when communicating with buggy USB devices. This controller feature allows the controller in host mode to send non-terminating/burst retry ACKs instead of terminating retry ACKs to devices when a transaction error (CRC error or overflow) occurs. Unfortunately, if the USB device continues to respond with a CRC error, the controller will not complete endpoint-related commands while it keeps trying to auto-retry. [3] The xHCI driver will notice this once it tries to abort the transfer using a Stop Endpoint command and does not receive a completion in time. [1] This situation is reported to dmesg: [sda] tag#29 uas_eh_abort_handler 0 uas-tag 1 inflight: CMD IN [sda] tag#29 CDB: opcode=0x28 28 00 00 69 42 80 00 00 48 00 xhci-hcd: xHCI host not responding to stop endpoint command xhci-hcd: xHCI host controller not responding, assume dead xhci-hcd: HC died; cleaning up Some users observed this problem on an Odroid HC2 with the JMS578 USB3-to-SATA bridge. The issue can be triggered by starting a read-heavy workload on an attached SSD. After a while, the host controller would die and the SSD would disappear from the system. [1] Further analysis by Synopsys determined that controller revisions other than the one in Odroid HC2 are also affected by this. The recommended solution was to disable AutoRetry altogether. This change does not have a noticeable performance impact. [2] Revert the enablement commit. This will keep the AutoRetry bit in the default state configured during SoC design [2]. Fixes: b138e23d3dff ("usb: dwc3: core: Enable AutoRetry feature in the controller") Link: https://lore.kernel.org/r/a21f34c04632d250cd0a78c7c6f4a1c9c7a43142.camel@gmail.com/ [1] Link: https://lore.kernel.org/r/20230711214834.kyr6ulync32d4ktk@synopsys.com/ [2] Link: https://lore.kernel.org/r/20230712225518.2smu7wse6djc7l5o@synopsys.com/ [3] Cc: stable@vger.kernel.org Cc: Mauro Ribeiro Cc: Krzysztof Kozlowski Suggested-by: Thinh Nguyen Signed-off-by: Jakub Vanek Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230714122419.27741-1-linuxtardis@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 16 ---------------- drivers/usb/dwc3/core.h | 3 --- 2 files changed, 19 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 5709b959b1d9..62517f3bfb58 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1066,22 +1066,6 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_GUCTL1, reg); } - if (dwc->dr_mode == USB_DR_MODE_HOST || - dwc->dr_mode == USB_DR_MODE_OTG) { - reg = dwc3_readl(dwc->regs, DWC3_GUCTL); - - /* - * Enable Auto retry Feature to make the controller operating in - * Host mode on seeing transaction errors(CRC errors or internal - * overrun scenerios) on IN transfers to reply to the device - * with a non-terminating retry ACK (i.e, an ACK transcation - * packet with Retry=1 & Nump != 0) - */ - reg |= DWC3_GUCTL_HSTINAUTORETRY; - - dwc3_writel(dwc->regs, DWC3_GUCTL, reg); - } - /* * Must config both number of packets and max burst settings to enable * RX and/or TX threshold. diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index cbebe541f7e8..291893d27429 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -249,9 +249,6 @@ #define DWC3_GCTL_GBLHIBERNATIONEN BIT(1) #define DWC3_GCTL_DSBLCLKGTNG BIT(0) -/* Global User Control Register */ -#define DWC3_GUCTL_HSTINAUTORETRY BIT(14) - /* Global User Control 1 Register */ #define DWC3_GUCTL1_PARKMODE_DISABLE_SS BIT(17) #define DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS BIT(28) From 05b201de44184574f95cf73dccd1c188ef32d1c7 Mon Sep 17 00:00:00 2001 From: Gratian Crisan Date: Wed, 26 Jul 2023 13:45:56 -0500 Subject: [PATCH 076/203] usb: dwc3: pci: skip BYT GPIO lookup table for hardwired phy commit b32b8f2b9542d8039f5468303a6ca78c1b5611a5 upstream. Hardware based on the Bay Trail / BYT SoCs require an external ULPI phy for USB device-mode. The phy chip usually has its 'reset' and 'chip select' lines connected to GPIOs described by ACPI fwnodes in the DSDT table. Because of hardware with missing ACPI resources for the 'reset' and 'chip select' GPIOs commit 5741022cbdf3 ("usb: dwc3: pci: Add GPIO lookup table on platforms without ACPI GPIO resources") introduced a fallback gpiod_lookup_table with hard-coded mappings for Bay Trail devices. However there are existing Bay Trail based devices, like the National Instruments cRIO-903x series, where the phy chip has its 'reset' and 'chip-select' lines always asserted in hardware via resistor pull-ups. On this hardware the phy chip is always enabled and the ACPI dsdt table is missing information not only for the 'chip-select' and 'reset' lines but also for the BYT GPIO controller itself "INT33FC". With the introduction of the gpiod_lookup_table initializing the USB device-mode on these hardware now errors out. The error comes from the gpiod_get_optional() calls in dwc3_pci_quirks() which will now return an -ENOENT error due to the missing ACPI entry for the INT33FC gpio controller used in the aforementioned table. This hardware used to work before because gpiod_get_optional() will return NULL instead of -ENOENT if no GPIO has been assigned to the requested function. The dwc3_pci_quirks() code for setting the 'cs' and 'reset' GPIOs was then skipped (due to the NULL return). This is the correct behavior in cases where the phy chip is hardwired and there are no GPIOs to control. Since the gpiod_lookup_table relies on the presence of INT33FC fwnode in ACPI tables only add the table if we know the entry for the INT33FC gpio controller is present. This allows Bay Trail based devices with hardwired dwc3 ULPI phys to continue working. Fixes: 5741022cbdf3 ("usb: dwc3: pci: Add GPIO lookup table on platforms without ACPI GPIO resources") Cc: stable Signed-off-by: Gratian Crisan Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20230726184555.218091-2-gratian.crisan@ni.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index a5a8c5712bce..9f420cc8d7c7 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -173,10 +173,12 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) /* * A lot of BYT devices lack ACPI resource entries for - * the GPIOs, add a fallback mapping to the reference + * the GPIOs. If the ACPI entry for the GPIO controller + * is present add a fallback mapping to the reference * design GPIOs which all boards seem to use. */ - gpiod_add_lookup_table(&platform_bytcr_gpios); + if (acpi_dev_present("INT33FC", NULL, -1)) + gpiod_add_lookup_table(&platform_bytcr_gpios); /* * These GPIOs will turn on the USB2 PHY. Note that we have to From 96c433aff5fd427fde29aba18dbec3df60e8c538 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 28 Jun 2023 00:20:18 +0800 Subject: [PATCH 077/203] usb: dwc3: don't reset device side if dwc3 was configured as host-only commit e835c0a4e23c38531dcee5ef77e8d1cf462658c7 upstream. Commit c4a5153e87fd ("usb: dwc3: core: Power-off core/PHYs on system_suspend in host mode") replaces check for HOST only dr_mode with current_dr_role. But during booting, the current_dr_role isn't initialized, thus the device side reset is always issued even if dwc3 was configured as host-only. What's more, on some platforms with host only dwc3, aways issuing device side reset by accessing device register block can cause kernel panic. Fixes: c4a5153e87fd ("usb: dwc3: core: Power-off core/PHYs on system_suspend in host mode") Cc: stable Signed-off-by: Jisheng Zhang Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230627162018.739-1-jszhang@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 62517f3bfb58..b02b10acb884 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -275,9 +275,9 @@ int dwc3_core_soft_reset(struct dwc3 *dwc) /* * We're resetting only the device side because, if we're in host mode, * XHCI driver will reset the host block. If dwc3 was configured for - * host-only mode, then we can return early. + * host-only mode or current role is host, then we can return early. */ - if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) + if (dwc->dr_mode == USB_DR_MODE_HOST || dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST) return 0; reg = dwc3_readl(dwc->regs, DWC3_DCTL); From 66a622275a11fb070b4dae3926d3a1a5d9cbecda Mon Sep 17 00:00:00 2001 From: Guiting Shen Date: Mon, 26 Jun 2023 23:27:13 +0800 Subject: [PATCH 078/203] usb: ohci-at91: Fix the unhandle interrupt when resume commit c55afcbeaa7a6f4fffdbc999a9bf3f0b29a5186f upstream. The ohci_hcd_at91_drv_suspend() sets ohci->rh_state to OHCI_RH_HALTED when suspend which will let the ohci_irq() skip the interrupt after resume. And nobody to handle this interrupt. According to the comment in ohci_hcd_at91_drv_suspend(), it need to reset when resume from suspend(MEM) to fix by setting "hibernated" argument of ohci_resume(). Signed-off-by: Guiting Shen Cc: stable Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20230626152713.18950-1-aarongt.shen@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 99e994fd3d1d..2ba3c1b6ad6d 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -647,7 +647,13 @@ ohci_hcd_at91_drv_resume(struct device *dev) else at91_start_clock(ohci_at91); - ohci_resume(hcd, false); + /* + * According to the comment in ohci_hcd_at91_drv_suspend() + * we need to do a reset if the 48Mhz clock was stopped, + * that is, if ohci_at91->wakeup is clear. Tell ohci_resume() + * to reset in this case by setting its "hibernated" flag. + */ + ohci_resume(hcd, !ohci_at91->wakeup); return 0; } From d0b588bbe4d9d0777418415d870b455c1293c8b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Bartosik?= Date: Mon, 24 Jul 2023 13:29:11 +0200 Subject: [PATCH 079/203] USB: quirks: add quirk for Focusrite Scarlett MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9dc162e22387080e2d06de708b89920c0e158c9a upstream. The Focusrite Scarlett audio device does not behave correctly during resumes. Below is what happens during every resume (captured with Beagle 5000): // The Scarlett disconnects and is enumerated again. However from time to time it drops completely off the USB bus during resume. Below is captured occurrence of such an event: // // To fix the condition a user has to unplug and plug the device again. With USB_QUIRK_RESET_RESUME applied ("usbcore.quirks=1235:8211:b") for the Scarlett audio device the issue still reproduces. Applying USB_QUIRK_DISCONNECT_SUSPEND ("usbcore.quirks=1235:8211:m") fixed the issue and the Scarlett audio device didn't drop off the USB bus for ~5000 suspend/resume cycles where originally issue reproduced in ~100 or less suspend/resume cycles. Signed-off-by: Łukasz Bartosik Cc: stable Link: https://lore.kernel.org/r/20230724112911.1802577-1-lb@semihalf.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 4ac1c22f13be..856947620f14 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -437,6 +437,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* novation SoundControl XL */ { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Focusrite Scarlett Solo USB */ + { USB_DEVICE(0x1235, 0x8211), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, + /* Huawei 4G LTE module */ { USB_DEVICE(0x12d1, 0x15bb), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, From b0fd110578e78c21063c73704f859375c4faabd0 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 19 Jul 2023 13:01:04 +0000 Subject: [PATCH 080/203] usb: xhci-mtk: set the dma max_seg_size commit 9fd10829a9eb482e192a845675ecc5480e0bfa10 upstream. Allow devices to have dma operations beyond 64K, and avoid warnings such as: DMA-API: xhci-mtk 11200000.usb: mapping sg segment longer than device claims to support [len=98304] [max=65536] Fixes: 0cbd4b34cda9 ("xhci: mediatek: support MTK xHCI host controller") Cc: stable Tested-by: Zubin Mithra Reported-by: Zubin Mithra Signed-off-by: Ricardo Ribalda Link: https://lore.kernel.org/r/20230628-mtk-usb-v2-1-c8c34eb9f229@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 1c331577fca9..122777b21b24 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -535,6 +535,7 @@ static int xhci_mtk_probe(struct platform_device *pdev) } device_init_wakeup(dev, true); + dma_set_max_seg_size(dev, UINT_MAX); xhci = hcd_to_xhci(hcd); xhci->main_hcd = hcd; From 65cd02e3c37e431be841d859fab8d2e10ecc7bc8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 4 Jul 2023 17:08:27 +0300 Subject: [PATCH 081/203] Revert "usb: xhci: tegra: Fix error check" commit 288b4fa1798e3637a9304c6e90a93d900e02369c upstream. This reverts commit 18fc7c435be3f17ea26a21b2e2312fcb9088e01f. The reverted commit was based on static analysis and a misunderstanding of how PTR_ERR() and NULLs are supposed to work. When a function returns both pointer errors and NULL then normally the NULL means "continue operating without a feature because it was deliberately turned off". The NULL should not be treated as a failure. If a driver cannot work when that feature is disabled then the KConfig should enforce that the function cannot return NULL. We should not need to test for it. In this code, the patch means that certain tegra_xusb_probe() will fail if the firmware supports power-domains but CONFIG_PM is disabled. Signed-off-by: Dan Carpenter Fixes: 18fc7c435be3 ("usb: xhci: tegra: Fix error check") Cc: stable Link: https://lore.kernel.org/r/8baace8d-fb4b-41a4-ad5f-848ae643a23b@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 9fa4f8f39830..ffb09737b5d0 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1042,15 +1042,15 @@ static int tegra_xusb_powerdomain_init(struct device *dev, int err; tegra->genpd_dev_host = dev_pm_domain_attach_by_name(dev, "xusb_host"); - if (IS_ERR_OR_NULL(tegra->genpd_dev_host)) { - err = PTR_ERR(tegra->genpd_dev_host) ? : -ENODATA; + if (IS_ERR(tegra->genpd_dev_host)) { + err = PTR_ERR(tegra->genpd_dev_host); dev_err(dev, "failed to get host pm-domain: %d\n", err); return err; } tegra->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "xusb_ss"); - if (IS_ERR_OR_NULL(tegra->genpd_dev_ss)) { - err = PTR_ERR(tegra->genpd_dev_ss) ? : -ENODATA; + if (IS_ERR(tegra->genpd_dev_ss)) { + err = PTR_ERR(tegra->genpd_dev_ss); dev_err(dev, "failed to get superspeed pm-domain: %d\n", err); return err; } From 0d5b23743bedfdbc385b53c69daefd43aa84e152 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 30 Jun 2023 09:14:20 +0200 Subject: [PATCH 082/203] Documentation: security-bugs.rst: update preferences when dealing with the linux-distros group commit 4fee0915e649bd0cea56dece6d96f8f4643df33c upstream. Because the linux-distros group forces reporters to release information about reported bugs, and they impose arbitrary deadlines in having those bugs fixed despite not actually being kernel developers, the kernel security team recommends not interacting with them at all as this just causes confusion and the early-release of reported security problems. Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/2023063020-throat-pantyhose-f110@gregkh Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/security-bugs.rst | 24 ++++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst index c32eb786201c..a70b474069b2 100644 --- a/Documentation/admin-guide/security-bugs.rst +++ b/Documentation/admin-guide/security-bugs.rst @@ -63,20 +63,18 @@ information submitted to the security list and any followup discussions of the report are treated confidentially even after the embargo has been lifted, in perpetuity. -Coordination ------------- +Coordination with other groups +------------------------------ -Fixes for sensitive bugs, such as those that might lead to privilege -escalations, may need to be coordinated with the private - mailing list so that distribution vendors -are well prepared to issue a fixed kernel upon public disclosure of the -upstream fix. Distros will need some time to test the proposed patch and -will generally request at least a few days of embargo, and vendor update -publication prefers to happen Tuesday through Thursday. When appropriate, -the security team can assist with this coordination, or the reporter can -include linux-distros from the start. In this case, remember to prefix -the email Subject line with "[vs]" as described in the linux-distros wiki: - +The kernel security team strongly recommends that reporters of potential +security issues NEVER contact the "linux-distros" mailing list until +AFTER discussing it with the kernel security team. Do not Cc: both +lists at once. You may contact the linux-distros mailing list after a +fix has been agreed on and you fully understand the requirements that +doing so will impose on you and the kernel community. + +The different lists have different goals and the linux-distros rules do +not contribute to actually fixing any potential security problems. CVE assignment -------------- From baef414b1ca020c88bda023b891cd15bcdb6c609 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 30 Jun 2023 09:14:21 +0200 Subject: [PATCH 083/203] Documentation: security-bugs.rst: clarify CVE handling commit 3c1897ae4b6bc7cc586eda2feaa2cd68325ec29c upstream. The kernel security team does NOT assign CVEs, so document that properly and provide the "if you want one, ask MITRE for it" response that we give on a weekly basis in the document, so we don't have to constantly say it to everyone who asks. Link: https://lore.kernel.org/r/2023063022-retouch-kerosene-7e4a@gregkh Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/security-bugs.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst index a70b474069b2..d6d93e96128e 100644 --- a/Documentation/admin-guide/security-bugs.rst +++ b/Documentation/admin-guide/security-bugs.rst @@ -79,13 +79,12 @@ not contribute to actually fixing any potential security problems. CVE assignment -------------- -The security team does not normally assign CVEs, nor do we require them -for reports or fixes, as this can needlessly complicate the process and -may delay the bug handling. If a reporter wishes to have a CVE identifier -assigned ahead of public disclosure, they will need to contact the private -linux-distros list, described above. When such a CVE identifier is known -before a patch is provided, it is desirable to mention it in the commit -message if the reporter agrees. +The security team does not assign CVEs, nor do we require them for +reports or fixes, as this can needlessly complicate the process and may +delay the bug handling. If a reporter wishes to have a CVE identifier +assigned, they should find one by themselves, for example by contacting +MITRE directly. However under no circumstances will a patch inclusion +be delayed to wait for a CVE identifier to arrive. Non-disclosure agreements ------------------------- From caac4b6c15b66feae4d83f602e1e46f124540202 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Sun, 9 Jul 2023 13:50:07 +0800 Subject: [PATCH 084/203] staging: ks7010: potential buffer overflow in ks_wlan_set_encode_ext() commit 5f1c7031e044cb2fba82836d55cc235e2ad619dc upstream. The "exc->key_len" is a u16 that comes from the user. If it's over IW_ENCODING_TOKEN_MAX (64) that could lead to memory corruption. Fixes: b121d84882b9 ("staging: ks7010: simplify calls to memcpy()") Cc: stable Signed-off-by: Zhang Shurong Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/tencent_5153B668C0283CAA15AA518325346E026A09@qq.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ks7010/ks_wlan_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c index 09e7b4cd0138..604882279adc 100644 --- a/drivers/staging/ks7010/ks_wlan_net.c +++ b/drivers/staging/ks7010/ks_wlan_net.c @@ -1584,8 +1584,10 @@ static int ks_wlan_set_encode_ext(struct net_device *dev, commit |= SME_WEP_FLAG; } if (enc->key_len) { - memcpy(&key->key_val[0], &enc->key[0], enc->key_len); - key->key_len = enc->key_len; + int key_len = clamp_val(enc->key_len, 0, IW_ENCODING_TOKEN_MAX); + + memcpy(&key->key_val[0], &enc->key[0], key_len); + key->key_len = key_len; commit |= (SME_WEP_VAL1 << index); } break; From 8fc0eabaa73bbd9bd705577071564616da5c8c61 Mon Sep 17 00:00:00 2001 From: Chaoyuan Peng Date: Tue, 18 Jul 2023 04:39:43 +0000 Subject: [PATCH 085/203] tty: n_gsm: fix UAF in gsm_cleanup_mux commit 9b9c8195f3f0d74a826077fc1c01b9ee74907239 upstream. In gsm_cleanup_mux() the 'gsm->dlci' pointer was not cleaned properly, leaving it a dangling pointer after gsm_dlci_release. This leads to use-after-free where 'gsm->dlci[0]' are freed and accessed by the subsequent gsm_cleanup_mux(). Such is the case in the following call trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106 print_address_description+0x63/0x3b0 mm/kasan/report.c:248 __kasan_report mm/kasan/report.c:434 [inline] kasan_report+0x16b/0x1c0 mm/kasan/report.c:451 gsm_cleanup_mux+0x76a/0x850 drivers/tty/n_gsm.c:2397 gsm_config drivers/tty/n_gsm.c:2653 [inline] gsmld_ioctl+0xaae/0x15b0 drivers/tty/n_gsm.c:2986 tty_ioctl+0x8ff/0xc50 drivers/tty/tty_io.c:2816 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl+0xf1/0x160 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x61/0xcb Allocated by task 3501: kasan_save_stack mm/kasan/common.c:38 [inline] kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] ____kasan_kmalloc+0xba/0xf0 mm/kasan/common.c:513 kasan_kmalloc include/linux/kasan.h:264 [inline] kmem_cache_alloc_trace+0x143/0x290 mm/slub.c:3247 kmalloc include/linux/slab.h:591 [inline] kzalloc include/linux/slab.h:721 [inline] gsm_dlci_alloc+0x53/0x3a0 drivers/tty/n_gsm.c:1932 gsm_activate_mux+0x1c/0x330 drivers/tty/n_gsm.c:2438 gsm_config drivers/tty/n_gsm.c:2677 [inline] gsmld_ioctl+0xd46/0x15b0 drivers/tty/n_gsm.c:2986 tty_ioctl+0x8ff/0xc50 drivers/tty/tty_io.c:2816 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl+0xf1/0x160 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x61/0xcb Freed by task 3501: kasan_save_stack mm/kasan/common.c:38 [inline] kasan_set_track+0x4b/0x80 mm/kasan/common.c:46 kasan_set_free_info+0x1f/0x40 mm/kasan/generic.c:360 ____kasan_slab_free+0xd8/0x120 mm/kasan/common.c:366 kasan_slab_free include/linux/kasan.h:230 [inline] slab_free_hook mm/slub.c:1705 [inline] slab_free_freelist_hook+0xdd/0x160 mm/slub.c:1731 slab_free mm/slub.c:3499 [inline] kfree+0xf1/0x270 mm/slub.c:4559 dlci_put drivers/tty/n_gsm.c:1988 [inline] gsm_dlci_release drivers/tty/n_gsm.c:2021 [inline] gsm_cleanup_mux+0x574/0x850 drivers/tty/n_gsm.c:2415 gsm_config drivers/tty/n_gsm.c:2653 [inline] gsmld_ioctl+0xaae/0x15b0 drivers/tty/n_gsm.c:2986 tty_ioctl+0x8ff/0xc50 drivers/tty/tty_io.c:2816 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl+0xf1/0x160 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x61/0xcb Fixes: aa371e96f05d ("tty: n_gsm: fix restart handling via CLD command") Signed-off-by: Chaoyuan Peng Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 23b014b8c919..d439afef9212 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2178,8 +2178,10 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) /* Free up any link layer users and finally the control channel */ for (i = NUM_DLCI - 1; i >= 0; i--) - if (gsm->dlci[i]) + if (gsm->dlci[i]) { gsm_dlci_release(gsm->dlci[i]); + gsm->dlci[i] = NULL; + } mutex_unlock(&gsm->mutex); /* Now wipe the queues */ tty_ldisc_flush(gsm->tty); From 7c6df7f0fc3dfc951b4f6ff2bfcf3209e8b96ef4 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Tue, 25 Jul 2023 13:15:09 +0200 Subject: [PATCH 086/203] ALSA: hda/relatek: Enable Mute LED on HP 250 G8 commit d510acb610e6aa07a04b688236868b2a5fd60deb upstream. This HP Notebook used ALC236 codec with COEF 0x07 idx 1 controlling the mute LED. Enable already existing quirk for this device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20230725111509.623773-1-l.guzenko@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6bfc7e28515a..db8593d79431 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9015,6 +9015,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x8812, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x881d, "HP 250 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), From 5e0e4e72ca21f58acdab429ebe05673792378f23 Mon Sep 17 00:00:00 2001 From: Gilles Buloz Date: Mon, 24 Jul 2023 08:04:44 +0000 Subject: [PATCH 087/203] hwmon: (nct7802) Fix for temp6 (PECI1) processed even if PECI1 disabled commit 54685abe660a59402344d5045ce08c43c6a5ac42 upstream. Because of hex value 0x46 used instead of decimal 46, the temp6 (PECI1) temperature is always declared visible and then displayed even if disabled in the chip Signed-off-by: Gilles Buloz Link: https://lore.kernel.org/r/DU0PR10MB62526435ADBC6A85243B90E08002A@DU0PR10MB6252.EURPRD10.PROD.OUTLOOK.COM Fixes: fcdc5739dce03 ("hwmon: (nct7802) add temperature sensor type attribute") Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7802.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 604af2f6103a..88eddb8d61d3 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -708,7 +708,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj, if (index >= 38 && index < 46 && !(reg & 0x01)) /* PECI 0 */ return 0; - if (index >= 0x46 && (!(reg & 0x02))) /* PECI 1 */ + if (index >= 46 && !(reg & 0x02)) /* PECI 1 */ return 0; return attr->mode; From c5c797ccc3b58aef012a9f8d344580b28e4e77dc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 21 Jul 2023 10:49:21 +0100 Subject: [PATCH 088/203] btrfs: check for commit error at btrfs_attach_transaction_barrier() commit b28ff3a7d7e97456fd86b68d24caa32e1cfa7064 upstream. btrfs_attach_transaction_barrier() is used to get a handle pointing to the current running transaction if the transaction has not started its commit yet (its state is < TRANS_STATE_COMMIT_START). If the transaction commit has started, then we wait for the transaction to commit and finish before returning - however we completely ignore if the transaction was aborted due to some error during its commit, we simply return ERR_PT(-ENOENT), which makes the caller assume everything is fine and no errors happened. This could make an fsync return success (0) to user space when in fact we had a transaction abort and the target inode changes were therefore not persisted. Fix this by checking for the return value from btrfs_wait_for_commit(), and if it returned an error, return it back to the caller. Fixes: d4edf39bd5db ("Btrfs: fix uncompleted transaction") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/transaction.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8daa9e4eb1d2..ff372f1226a3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -821,8 +821,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) trans = start_transaction(root, 0, TRANS_ATTACH, BTRFS_RESERVE_NO_FLUSH, true); - if (trans == ERR_PTR(-ENOENT)) - btrfs_wait_for_commit(root->fs_info, 0); + if (trans == ERR_PTR(-ENOENT)) { + int ret; + + ret = btrfs_wait_for_commit(root->fs_info, 0); + if (ret) + return ERR_PTR(ret); + } return trans; } From 4a871fcebc0290c88d41a3ec19f99e31ec7d6606 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Jul 2023 17:00:49 +0200 Subject: [PATCH 089/203] file: always lock position for FMODE_ATOMIC_POS commit 20ea1e7d13c1b544fe67c4a8dc3943bb1ab33e6f upstream. The pidfd_getfd() system call allows a caller with ptrace_may_access() abilities on another process to steal a file descriptor from this process. This system call is used by debuggers, container runtimes, system call supervisors, networking proxies etc. So while it is a special interest system call it is used in common tools. That ability ends up breaking our long-time optimization in fdget_pos(), which "knew" that if we had exclusive access to the file descriptor nobody else could access it, and we didn't need the lock for the file position. That check for file_count(file) was always fairly subtle - it depended on __fdget() not incrementing the file count for single-threaded processes and thus included that as part of the rule - but it did mean that we didn't need to take the lock in all those traditional unix process contexts. So it's sad to see this go, and I'd love to have some way to re-instate the optimization. At the same time, the lock obviously isn't ever contended in the case we optimized, so all we were optimizing away is the atomics and the cacheline dirtying. Let's see if anybody even notices that the optimization is gone. Link: https://lore.kernel.org/linux-fsdevel/20230724-vfs-fdget_pos-v1-1-a4abfd7103f3@kernel.org/ Fixes: 8649c322f75c ("pid: Implement pidfd_getfd syscall") Cc: stable@kernel.org Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/file.c b/fs/file.c index 173d318208b8..2feeccec3f4d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1013,10 +1013,8 @@ unsigned long __fdget_pos(unsigned int fd) struct file *file = (struct file *)(v & ~3); if (file && (file->f_mode & FMODE_ATOMIC_POS)) { - if (file_count(file) > 1) { - v |= FDPUT_POS_UNLOCK; - mutex_lock(&file->f_pos_lock); - } + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); } return v; } From 5be81139d2ffc5f7bcc8996d65c8028f7a83a714 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Jul 2023 08:38:37 -0400 Subject: [PATCH 090/203] nfsd: Remove incorrect check in nfsd4_validate_stateid commit f75546f58a70da5cfdcec5a45ffc377885ccbee8 upstream. If the client is calling TEST_STATEID, then it is because some event occurred that requires it to check all the stateids for validity and call FREE_STATEID on the ones that have been revoked. In this case, either the stateid exists in the list of stateids associated with that nfs4_client, in which case it should be tested, or it does not. There are no additional conditions to be considered. Reported-by: "Frank Ch. Eigler" Fixes: 7df302f75ee2 ("NFSD: TEST_STATEID should not return NFS4ERR_STALE_STATEID") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cb13a1649632..1c1b231b2ab3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5656,8 +5656,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; - if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) - return status; spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) From b1867cddd780965e049e6ef9bf3cdad7ae6ea184 Mon Sep 17 00:00:00 2001 From: Alexander Steffen Date: Tue, 13 Jun 2023 20:02:56 +0200 Subject: [PATCH 091/203] tpm_tis: Explicitly check for error code commit 513253f8c293c0c8bd46d09d337fc892bf8f9f48 upstream. recv_data either returns the number of received bytes, or a negative value representing an error code. Adding the return value directly to the total number of received bytes therefore looks a little weird, since it might add a negative error code to a sum of bytes. The following check for size < expected usually makes the function return ETIME in that case, so it does not cause too many problems in practice. But to make the code look cleaner and because the caller might still be interested in the original error code, explicitly check for the presence of an error code and pass that through. Cc: stable@vger.kernel.org Fixes: cb5354253af2 ("[PATCH] tpm: spacing cleanups 2") Signed-off-by: Alexander Steffen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 365761055df3..d7c440ac465f 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -314,6 +314,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) int size = 0; int status; u32 expected; + int rc; if (count < TPM_HEADER_SIZE) { size = -EIO; @@ -333,8 +334,13 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) goto out; } - size += recv_data(chip, &buf[TPM_HEADER_SIZE], - expected - TPM_HEADER_SIZE); + rc = recv_data(chip, &buf[TPM_HEADER_SIZE], + expected - TPM_HEADER_SIZE); + if (rc < 0) { + size = rc; + goto out; + } + size += rc; if (size < expected) { dev_err(&chip->dev, "Unable to read remainder of result\n"); size = -ETIME; From 57b8db5800a5da054f146939fcf9764daf0c2475 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 29 Jun 2023 09:26:20 +0200 Subject: [PATCH 092/203] irq-bcm6345-l1: Do not assume a fixed block to cpu mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 55ad24857341c36616ecc1d9580af5626c226cf1 ] The irq to block mapping is fixed, and interrupts from the first block will always be routed to the first parent IRQ. But the parent interrupts themselves can be routed to any available CPU. This is used by the bootloader to map the first parent interrupt to the boot CPU, regardless wether the boot CPU is the first one or the second one. When booting from the second CPU, the assumption that the first block's IRQ is mapped to the first CPU breaks, and the system hangs because interrupts do not get routed correctly. Fix this by passing the appropriate bcm6434_l1_cpu to the interrupt handler instead of the chip itself, so the handler always has the right block. Fixes: c7c42ec2baa1 ("irqchips/bmips: Add bcm6345-l1 interrupt controller") Signed-off-by: Jonas Gorski Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Florian Fainelli Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230629072620.62527-1-jonas.gorski@gmail.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-bcm6345-l1.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c index 1bd0621c4ce2..4827a1183247 100644 --- a/drivers/irqchip/irq-bcm6345-l1.c +++ b/drivers/irqchip/irq-bcm6345-l1.c @@ -82,6 +82,7 @@ struct bcm6345_l1_chip { }; struct bcm6345_l1_cpu { + struct bcm6345_l1_chip *intc; void __iomem *map_base; unsigned int parent_irq; u32 enable_cache[]; @@ -115,17 +116,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc, static void bcm6345_l1_irq_handle(struct irq_desc *desc) { - struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc); - struct bcm6345_l1_cpu *cpu; + struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc); + struct bcm6345_l1_chip *intc = cpu->intc; struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int idx; -#ifdef CONFIG_SMP - cpu = intc->cpus[cpu_logical_map(smp_processor_id())]; -#else - cpu = intc->cpus[0]; -#endif - chained_irq_enter(chip, desc); for (idx = 0; idx < intc->n_words; idx++) { @@ -257,6 +252,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn, if (!cpu) return -ENOMEM; + cpu->intc = intc; cpu->map_base = ioremap(res.start, sz); if (!cpu->map_base) return -ENOMEM; @@ -272,7 +268,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn, return -EINVAL; } irq_set_chained_handler_and_data(cpu->parent_irq, - bcm6345_l1_irq_handle, intc); + bcm6345_l1_irq_handle, cpu); return 0; } From 90cd5ab951ea202b83d7b49b61bfb6014b5fc848 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 17 Jun 2023 08:32:42 +0100 Subject: [PATCH 093/203] irqchip/gic-v4.1: Properly lock VPEs when doing a directLPI invalidation [ Upstream commit 926846a703cbf5d0635cc06e67d34b228746554b ] We normally rely on the irq_to_cpuid_[un]lock() primitives to make sure nothing will change col->idx while performing a LPI invalidation. However, these primitives do not cover VPE doorbells, and we have some open-coded locking for that. Unfortunately, this locking is pretty bogus. Instead, extend the above primitives to cover VPE doorbells and convert the whole thing to it. Fixes: f3a059219bc7 ("irqchip/gic-v4.1: Ensure mutual exclusion between vPE affinity change and RD access") Reported-by: Kunkun Jiang Signed-off-by: Marc Zyngier Cc: Zenghui Yu Cc: wanghaibin.wang@huawei.com Tested-by: Kunkun Jiang Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20230617073242.3199746-1-maz@kernel.org Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 75 ++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5ec091c64d47..f1fa98e5ea13 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -267,13 +267,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags) raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); } +static struct irq_chip its_vpe_irq_chip; + static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags) { - struct its_vlpi_map *map = get_vlpi_map(d); + struct its_vpe *vpe = NULL; int cpu; - if (map) { - cpu = vpe_to_cpuid_lock(map->vpe, flags); + if (d->chip == &its_vpe_irq_chip) { + vpe = irq_data_get_irq_chip_data(d); + } else { + struct its_vlpi_map *map = get_vlpi_map(d); + if (map) + vpe = map->vpe; + } + + if (vpe) { + cpu = vpe_to_cpuid_lock(vpe, flags); } else { /* Physical LPIs are already locked via the irq_desc lock */ struct its_device *its_dev = irq_data_get_irq_chip_data(d); @@ -287,10 +297,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags) static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags) { - struct its_vlpi_map *map = get_vlpi_map(d); + struct its_vpe *vpe = NULL; - if (map) - vpe_to_cpuid_unlock(map->vpe, flags); + if (d->chip == &its_vpe_irq_chip) { + vpe = irq_data_get_irq_chip_data(d); + } else { + struct its_vlpi_map *map = get_vlpi_map(d); + if (map) + vpe = map->vpe; + } + + if (vpe) + vpe_to_cpuid_unlock(vpe, flags); } static struct its_collection *valid_col(struct its_collection *col) @@ -1422,13 +1440,28 @@ static void wait_for_syncr(void __iomem *rdbase) cpu_relax(); } +static void __direct_lpi_inv(struct irq_data *d, u64 val) +{ + void __iomem *rdbase; + unsigned long flags; + int cpu; + + /* Target the redistributor this LPI is currently routed to */ + cpu = irq_to_cpuid_lock(d, &flags); + raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); + + rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; + gic_write_lpir(val, rdbase + GICR_INVLPIR); + wait_for_syncr(rdbase); + + raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); + irq_to_cpuid_unlock(d, flags); +} + static void direct_lpi_inv(struct irq_data *d) { struct its_vlpi_map *map = get_vlpi_map(d); - void __iomem *rdbase; - unsigned long flags; u64 val; - int cpu; if (map) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); @@ -1442,15 +1475,7 @@ static void direct_lpi_inv(struct irq_data *d) val = d->hwirq; } - /* Target the redistributor this LPI is currently routed to */ - cpu = irq_to_cpuid_lock(d, &flags); - raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); - rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; - gic_write_lpir(val, rdbase + GICR_INVLPIR); - - wait_for_syncr(rdbase); - raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); - irq_to_cpuid_unlock(d, flags); + __direct_lpi_inv(d, val); } static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) @@ -3916,18 +3941,10 @@ static void its_vpe_send_inv(struct irq_data *d) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - if (gic_rdists->has_direct_lpi) { - void __iomem *rdbase; - - /* Target the redistributor this VPE is currently known on */ - raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock); - rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; - gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR); - wait_for_syncr(rdbase); - raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock); - } else { + if (gic_rdists->has_direct_lpi) + __direct_lpi_inv(d, d->parent_data->hwirq); + else its_vpe_send_cmd(vpe, its_send_inv); - } } static void its_vpe_mask_irq(struct irq_data *d) From 96c73bc9efef5d1d2a9d6a1ef0da9609f7694109 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:59 -0700 Subject: [PATCH 094/203] KVM: VMX: Invert handling of CR0.WP for EPT without unrestricted guest [ Upstream commit ee5a5584cba316bc90bc2fad0c6d10b71f1791cb ] Opt-in to forcing CR0.WP=1 for shadow paging, and stop lying about WP being "always on" for unrestricted guest. In addition to making KVM a wee bit more honest, this paves the way for additional cleanup. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-22-seanjc@google.com> Signed-off-by: Paolo Bonzini Stable-dep-of: c4abd7352023 ("KVM: VMX: Don't fudge CR0 and CR4 for restricted L2 guest") Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9aedc7b06da7..574acfa98fa9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -135,8 +135,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE #define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) + (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) @@ -3064,9 +3063,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, - unsigned long cr0, - struct kvm_vcpu *vcpu) +static void ept_update_paging_mode_cr0(unsigned long cr0, struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3085,9 +3082,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, vcpu->arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); } - - if (!(cr0 & X86_CR0_WP)) - *hw_cr0 &= ~X86_CR0_WP; } void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) @@ -3100,6 +3094,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; else { hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; + if (!enable_ept) + hw_cr0 |= X86_CR0_WP; if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); @@ -3118,7 +3114,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif if (enable_ept && !is_unrestricted_guest(vcpu)) - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + ept_update_paging_mode_cr0(cr0, vcpu); vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); From 7fc51da40b9d14491d90f21fc2ac769241b6840b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:01 -0700 Subject: [PATCH 095/203] KVM: VMX: Fold ept_update_paging_mode_cr0() back into vmx_set_cr0() [ Upstream commit c834fd7fc1308a0e0429d203a6c3af528cd902fa ] Move the CR0/CR3/CR4 shenanigans for EPT without unrestricted guest back into vmx_set_cr0(). This will allow a future patch to eliminate the rather gross stuffing of vcpu->arch.cr0 in the paging transition cases by snapshotting the old CR0. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-24-seanjc@google.com> Signed-off-by: Paolo Bonzini Stable-dep-of: c4abd7352023 ("KVM: VMX: Don't fudge CR0 and CR4 for restricted L2 guest") Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 574acfa98fa9..b9abe08c9d59 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3063,27 +3063,6 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } -static void ept_update_paging_mode_cr0(unsigned long cr0, struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) - vmx_cache_reg(vcpu, VCPU_EXREG_CR3); - if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } -} - void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3113,8 +3092,23 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (enable_ept && !is_unrestricted_guest(vcpu)) - ept_update_paging_mode_cr0(cr0, vcpu); + if (enable_ept && !is_unrestricted_guest(vcpu)) { + if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) + vmx_cache_reg(vcpu, VCPU_EXREG_CR3); + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } + } vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); From c710ff061237f10269eb18c4a8f435a64138b26a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:02 -0700 Subject: [PATCH 096/203] KVM: nVMX: Do not clear CR3 load/store exiting bits if L1 wants 'em [ Upstream commit 470750b3425513b9f63f176e564e63e0e7998afc ] Keep CR3 load/store exiting enable as needed when running L2 in order to honor L1's desires. This fixes a largely theoretical bug where L1 could intercept CR3 but not CR0.PG and end up not getting the desired CR3 exits when L2 enables paging. In other words, the existing !is_paging() check inadvertantly handles the normal case for L2 where vmx_set_cr0() is called during VM-Enter, which is guaranteed to run with paging enabled, and thus will never clear the bits. Removing the !is_paging() check will also allow future consolidation and cleanup of the related code. From a performance perspective, this is all a nop, as the VMCS controls shadow will optimize away the VMWRITE when the controls are in the desired state. Add a comment explaining why CR3 is intercepted, with a big disclaimer about not querying the old CR3. Because vmx_set_cr0() is used for flows that are not directly tied to MOV CR3, e.g. vCPU RESET/INIT and nested VM-Enter, it's possible that is_paging() is not synchronized with CR3 load/store exiting. This is actually guaranteed in the current code, as KVM starts with CR3 interception disabled. Obviously that can be fixed, but there's no good reason to play whack-a-mole, and it tends to end poorly, e.g. descriptor table exiting for UMIP emulation attempted to be precise in the past and ended up botching the interception toggling. Fixes: fe3ef05c7572 ("KVM: nVMX: Prepare vmcs02 from vmcs01 and vmcs12") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-25-seanjc@google.com> Signed-off-by: Paolo Bonzini Stable-dep-of: c4abd7352023 ("KVM: VMX: Don't fudge CR0 and CR4 for restricted L2 guest") Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 46 +++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b9abe08c9d59..ca51df0df3f9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3063,10 +3063,14 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } +#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ + CPU_BASED_CR3_STORE_EXITING) + void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long hw_cr0; + u32 tmp; hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); if (is_unrestricted_guest(vcpu)) @@ -3093,18 +3097,42 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif if (enable_ept && !is_unrestricted_guest(vcpu)) { + /* + * Ensure KVM has an up-to-date snapshot of the guest's CR3. If + * the below code _enables_ CR3 exiting, vmx_cache_reg() will + * (correctly) stop reading vmcs.GUEST_CR3 because it thinks + * KVM's CR3 is installed. + */ if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) vmx_cache_reg(vcpu, VCPU_EXREG_CR3); + + /* + * When running with EPT but not unrestricted guest, KVM must + * intercept CR3 accesses when paging is _disabled_. This is + * necessary because restricted guests can't actually run with + * paging disabled, and so KVM stuffs its own CR3 in order to + * run the guest when identity mapped page tables. + * + * Do _NOT_ check the old CR0.PG, e.g. to optimize away the + * update, it may be stale with respect to CR3 interception, + * e.g. after nested VM-Enter. + * + * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or + * stores to forward them to L1, even if KVM does not need to + * intercept them to preserve its identity mapped page tables. + */ if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); + exec_controls_setbit(vmx, CR3_EXITING_BITS); + } else if (!is_guest_mode(vcpu)) { + exec_controls_clearbit(vmx, CR3_EXITING_BITS); + } else { + tmp = exec_controls_get(vmx); + tmp &= ~CR3_EXITING_BITS; + tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS; + exec_controls_set(vmx, tmp); + } + + if (!is_paging(vcpu) != !(cr0 & X86_CR0_PG)) { vcpu->arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); } From 836b13168336c000fa5bac258f11c555711268fc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jun 2023 13:30:36 -0700 Subject: [PATCH 097/203] KVM: VMX: Don't fudge CR0 and CR4 for restricted L2 guest [ Upstream commit c4abd7352023aa96114915a0bb2b88016a425cda ] Stuff CR0 and/or CR4 to be compliant with a restricted guest if and only if KVM itself is not configured to utilize unrestricted guests, i.e. don't stuff CR0/CR4 for a restricted L2 that is running as the guest of an unrestricted L1. Any attempt to VM-Enter a restricted guest with invalid CR0/CR4 values should fail, i.e. in a nested scenario, KVM (as L0) should never observe a restricted L2 with incompatible CR0/CR4, since nested VM-Enter from L1 should have failed. And if KVM does observe an active, restricted L2 with incompatible state, e.g. due to a KVM bug, fudging CR0/CR4 instead of letting VM-Enter fail does more harm than good, as KVM will often neglect to undo the side effects, e.g. won't clear rmode.vm86_active on nested VM-Exit, and thus the damage can easily spill over to L1. On the other hand, letting VM-Enter fail due to bad guest state is more likely to contain the damage to L2 as KVM relies on hardware to perform most guest state consistency checks, i.e. KVM needs to be able to reflect a failed nested VM-Enter into L1 irrespective of (un)restricted guest behavior. Cc: Jim Mattson Cc: stable@vger.kernel.org Fixes: bddd82d19e2e ("KVM: nVMX: KVM needs to unset "unrestricted guest" VM-execution control in vmcs02 if vmcs12 doesn't set it") Signed-off-by: Sean Christopherson Message-Id: <20230613203037.1968489-3-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx/vmx.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ca51df0df3f9..2445c6103895 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1519,6 +1519,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long old_rflags; + /* + * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU + * is an unrestricted guest in order to mark L2 as needing emulation + * if L1 runs L2 as a restricted guest. + */ if (is_unrestricted_guest(vcpu)) { kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS); vmx->rflags = rflags; @@ -3073,7 +3078,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) u32 tmp; hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); - if (is_unrestricted_guest(vcpu)) + if (enable_unrestricted_guest) hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; else { hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; @@ -3096,7 +3101,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (enable_ept && !is_unrestricted_guest(vcpu)) { + if (enable_ept && !enable_unrestricted_guest) { /* * Ensure KVM has an up-to-date snapshot of the guest's CR3. If * the below code _enables_ CR3 exiting, vmx_cache_reg() will @@ -3231,7 +3236,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long hw_cr4; hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); - if (is_unrestricted_guest(vcpu)) + if (enable_unrestricted_guest) hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; else if (vmx->rmode.vm86_active) hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; @@ -3251,7 +3256,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vcpu->arch.cr4 = cr4; kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); - if (!is_unrestricted_guest(vcpu)) { + if (!enable_unrestricted_guest) { if (enable_ept) { if (!is_paging(vcpu)) { hw_cr4 &= ~X86_CR4_PAE; From 223ecf871b42fda915b1eba88b85a0d785436795 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 6 Apr 2021 22:41:32 +0200 Subject: [PATCH 098/203] staging: rtl8712: Use constants from [ Upstream commit f179515da9780c4cd37bee76c3cbb6f7364451d6 ] Some constants defined in wifi.h are already defined in with some other (but similar) names. Be consistent and use the ones from . The conversions made are: _SSID_IE_ --> WLAN_EID_SSID _SUPPORTEDRATES_IE_ --> WLAN_EID_SUPP_RATES _DSSET_IE_ --> WLAN_EID_DS_PARAMS _IBSS_PARA_IE_ --> WLAN_EID_IBSS_PARAMS _ERPINFO_IE_ --> WLAN_EID_ERP_INFO _EXT_SUPPORTEDRATES_IE_ --> WLAN_EID_EXT_SUPP_RATES _HT_CAPABILITY_IE_ --> WLAN_EID_HT_CAPABILITY _HT_EXTRA_INFO_IE_ --> WLAN_EID_HT_OPERATION (not used) _HT_ADD_INFO_IE_ --> WLAN_EID_HT_OPERATION _VENDOR_SPECIFIC_IE_ --> WLAN_EID_VENDOR_SPECIFIC _RESERVED47_ --> (not used) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/fe35fb45323adc3a30f31b7280cec7700fd325d8.1617741313.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman Stable-dep-of: ac83631230f7 ("staging: r8712: Fix memory leak in _r8712_init_xmit_priv()") Signed-off-by: Sasha Levin --- drivers/staging/rtl8712/ieee80211.c | 12 ++++++------ drivers/staging/rtl8712/rtl871x_ioctl_linux.c | 8 ++++---- drivers/staging/rtl8712/rtl871x_mlme.c | 10 +++++----- drivers/staging/rtl8712/rtl871x_xmit.c | 3 ++- drivers/staging/rtl8712/wifi.h | 15 --------------- 5 files changed, 17 insertions(+), 31 deletions(-) diff --git a/drivers/staging/rtl8712/ieee80211.c b/drivers/staging/rtl8712/ieee80211.c index b4a099169c7c..8075ed2ba61e 100644 --- a/drivers/staging/rtl8712/ieee80211.c +++ b/drivers/staging/rtl8712/ieee80211.c @@ -181,25 +181,25 @@ int r8712_generate_ie(struct registry_priv *registrypriv) sz += 2; ie += 2; /*SSID*/ - ie = r8712_set_ie(ie, _SSID_IE_, dev_network->Ssid.SsidLength, + ie = r8712_set_ie(ie, WLAN_EID_SSID, dev_network->Ssid.SsidLength, dev_network->Ssid.Ssid, &sz); /*supported rates*/ set_supported_rate(dev_network->rates, registrypriv->wireless_mode); rate_len = r8712_get_rateset_len(dev_network->rates); if (rate_len > 8) { - ie = r8712_set_ie(ie, _SUPPORTEDRATES_IE_, 8, + ie = r8712_set_ie(ie, WLAN_EID_SUPP_RATES, 8, dev_network->rates, &sz); - ie = r8712_set_ie(ie, _EXT_SUPPORTEDRATES_IE_, (rate_len - 8), + ie = r8712_set_ie(ie, WLAN_EID_EXT_SUPP_RATES, (rate_len - 8), (dev_network->rates + 8), &sz); } else { - ie = r8712_set_ie(ie, _SUPPORTEDRATES_IE_, + ie = r8712_set_ie(ie, WLAN_EID_SUPP_RATES, rate_len, dev_network->rates, &sz); } /*DS parameter set*/ - ie = r8712_set_ie(ie, _DSSET_IE_, 1, + ie = r8712_set_ie(ie, WLAN_EID_DS_PARAMS, 1, (u8 *)&dev_network->Configuration.DSConfig, &sz); /*IBSS Parameter Set*/ - ie = r8712_set_ie(ie, _IBSS_PARA_IE_, 2, + ie = r8712_set_ie(ie, WLAN_EID_IBSS_PARAMS, 2, (u8 *)&dev_network->Configuration.ATIMWindow, &sz); return sz; } diff --git a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c index 2a661b04cd25..15c6ac518c16 100644 --- a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c +++ b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c @@ -236,7 +236,7 @@ static char *translate_scan(struct _adapter *padapter, start = iwe_stream_add_point(info, start, stop, &iwe, pnetwork->network.Ssid.Ssid); /* parsing HT_CAP_IE */ - p = r8712_get_ie(&pnetwork->network.IEs[12], _HT_CAPABILITY_IE_, + p = r8712_get_ie(&pnetwork->network.IEs[12], WLAN_EID_HT_CAPABILITY, &ht_ielen, pnetwork->network.IELength - 12); if (p && ht_ielen > 0) ht_cap = true; @@ -567,7 +567,7 @@ static int r871x_set_wpa_ie(struct _adapter *padapter, char *pie, while (cnt < ielen) { eid = buf[cnt]; - if ((eid == _VENDOR_SPECIFIC_IE_) && + if ((eid == WLAN_EID_VENDOR_SPECIFIC) && (!memcmp(&buf[cnt + 2], wps_oui, 4))) { netdev_info(padapter->pnetdev, "r8712u: SET WPS_IE\n"); padapter->securitypriv.wps_ie_len = @@ -609,7 +609,7 @@ static int r8711_wx_get_name(struct net_device *dev, if (check_fwstate(pmlmepriv, _FW_LINKED | WIFI_ADHOC_MASTER_STATE) == true) { /* parsing HT_CAP_IE */ - p = r8712_get_ie(&pcur_bss->IEs[12], _HT_CAPABILITY_IE_, + p = r8712_get_ie(&pcur_bss->IEs[12], WLAN_EID_HT_CAPABILITY, &ht_ielen, pcur_bss->IELength - 12); if (p && ht_ielen > 0) ht_cap = true; @@ -1403,7 +1403,7 @@ static int r8711_wx_get_rate(struct net_device *dev, i = 0; if (!check_fwstate(pmlmepriv, _FW_LINKED | WIFI_ADHOC_MASTER_STATE)) return -ENOLINK; - p = r8712_get_ie(&pcur_bss->IEs[12], _HT_CAPABILITY_IE_, &ht_ielen, + p = r8712_get_ie(&pcur_bss->IEs[12], WLAN_EID_HT_CAPABILITY, &ht_ielen, pcur_bss->IELength - 12); if (p && ht_ielen > 0) { ht_cap = true; diff --git a/drivers/staging/rtl8712/rtl871x_mlme.c b/drivers/staging/rtl8712/rtl871x_mlme.c index 6074383ec0b5..250cb0c4ed08 100644 --- a/drivers/staging/rtl8712/rtl871x_mlme.c +++ b/drivers/staging/rtl8712/rtl871x_mlme.c @@ -1649,11 +1649,11 @@ unsigned int r8712_restructure_ht_ie(struct _adapter *padapter, u8 *in_ie, struct ht_priv *phtpriv = &pmlmepriv->htpriv; phtpriv->ht_option = 0; - p = r8712_get_ie(in_ie + 12, _HT_CAPABILITY_IE_, &ielen, in_len - 12); + p = r8712_get_ie(in_ie + 12, WLAN_EID_HT_CAPABILITY, &ielen, in_len - 12); if (p && (ielen > 0)) { if (pqospriv->qos_option == 0) { out_len = *pout_len; - r8712_set_ie(out_ie + out_len, _VENDOR_SPECIFIC_IE_, + r8712_set_ie(out_ie + out_len, WLAN_EID_VENDOR_SPECIFIC, _WMM_IE_Length_, WMM_IE, pout_len); pqospriv->qos_option = 1; } @@ -1667,7 +1667,7 @@ unsigned int r8712_restructure_ht_ie(struct _adapter *padapter, u8 *in_ie, IEEE80211_HT_CAP_DSSSCCK40); ht_capie.ampdu_params_info = (IEEE80211_HT_AMPDU_PARM_FACTOR & 0x03) | (IEEE80211_HT_AMPDU_PARM_DENSITY & 0x00); - r8712_set_ie(out_ie + out_len, _HT_CAPABILITY_IE_, + r8712_set_ie(out_ie + out_len, WLAN_EID_HT_CAPABILITY, sizeof(struct rtl_ieee80211_ht_cap), (unsigned char *)&ht_capie, pout_len); phtpriv->ht_option = 1; @@ -1698,7 +1698,7 @@ static void update_ht_cap(struct _adapter *padapter, u8 *pie, uint ie_len) /*check Max Rx A-MPDU Size*/ len = 0; p = r8712_get_ie(pie + sizeof(struct NDIS_802_11_FIXED_IEs), - _HT_CAPABILITY_IE_, + WLAN_EID_HT_CAPABILITY, &len, ie_len - sizeof(struct NDIS_802_11_FIXED_IEs)); if (p && len > 0) { @@ -1733,7 +1733,7 @@ static void update_ht_cap(struct _adapter *padapter, u8 *pie, uint ie_len) } len = 0; p = r8712_get_ie(pie + sizeof(struct NDIS_802_11_FIXED_IEs), - _HT_ADD_INFO_IE_, &len, + WLAN_EID_HT_OPERATION, &len, ie_len - sizeof(struct NDIS_802_11_FIXED_IEs)); } diff --git a/drivers/staging/rtl8712/rtl871x_xmit.c b/drivers/staging/rtl8712/rtl871x_xmit.c index fd99782a400a..15491859aeda 100644 --- a/drivers/staging/rtl8712/rtl871x_xmit.c +++ b/drivers/staging/rtl8712/rtl871x_xmit.c @@ -22,6 +22,7 @@ #include "osdep_intf.h" #include "usb_ops.h" +#include static const u8 P802_1H_OUI[P80211_OUI_LEN] = {0x00, 0x00, 0xf8}; static const u8 RFC1042_OUI[P80211_OUI_LEN] = {0x00, 0x00, 0x00}; @@ -709,7 +710,7 @@ void r8712_update_protection(struct _adapter *padapter, u8 *ie, uint ie_len) break; case AUTO_VCS: default: - perp = r8712_get_ie(ie, _ERPINFO_IE_, &erp_len, ie_len); + perp = r8712_get_ie(ie, WLAN_EID_ERP_INFO, &erp_len, ie_len); if (!perp) { pxmitpriv->vcs = NONE_VCS; } else { diff --git a/drivers/staging/rtl8712/wifi.h b/drivers/staging/rtl8712/wifi.h index 601d4ff607bc..9bb310b24589 100644 --- a/drivers/staging/rtl8712/wifi.h +++ b/drivers/staging/rtl8712/wifi.h @@ -374,21 +374,6 @@ static inline unsigned char *get_hdr_bssid(unsigned char *pframe) #define _FIXED_IE_LENGTH_ _BEACON_IE_OFFSET_ -#define _SSID_IE_ 0 -#define _SUPPORTEDRATES_IE_ 1 -#define _DSSET_IE_ 3 -#define _IBSS_PARA_IE_ 6 -#define _ERPINFO_IE_ 42 -#define _EXT_SUPPORTEDRATES_IE_ 50 - -#define _HT_CAPABILITY_IE_ 45 -#define _HT_EXTRA_INFO_IE_ 61 -#define _HT_ADD_INFO_IE_ 61 /* _HT_EXTRA_INFO_IE_ */ - -#define _VENDOR_SPECIFIC_IE_ 221 - -#define _RESERVED47_ 47 - /* --------------------------------------------------------------------------- * Below is the fixed elements... * --------------------------------------------------------------------------- From fc511ae405f7ba29fbcb0246061ec15c272386e1 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 14 Jul 2023 12:54:17 -0500 Subject: [PATCH 099/203] staging: r8712: Fix memory leak in _r8712_init_xmit_priv() [ Upstream commit ac83631230f77dda94154ed0ebfd368fc81c70a3 ] In the above mentioned routine, memory is allocated in several places. If the first succeeds and a later one fails, the routine will leak memory. This patch fixes commit 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel"). A potential memory leak in r8712_xmit_resource_alloc() is also addressed. Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Reported-by: syzbot+cf71097ffb6755df8251@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/x/log.txt?x=11ac3fa0a80000 Cc: stable@vger.kernel.org Cc: Nam Cao Signed-off-by: Larry Finger Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20230714175417.18578-1-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/rtl8712/rtl871x_xmit.c | 43 ++++++++++++++++++++------ drivers/staging/rtl8712/xmit_linux.c | 6 ++++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/staging/rtl8712/rtl871x_xmit.c b/drivers/staging/rtl8712/rtl871x_xmit.c index 15491859aeda..eb6493047aaf 100644 --- a/drivers/staging/rtl8712/rtl871x_xmit.c +++ b/drivers/staging/rtl8712/rtl871x_xmit.c @@ -22,6 +22,7 @@ #include "osdep_intf.h" #include "usb_ops.h" +#include #include static const u8 P802_1H_OUI[P80211_OUI_LEN] = {0x00, 0x00, 0xf8}; @@ -56,6 +57,7 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv, sint i; struct xmit_buf *pxmitbuf; struct xmit_frame *pxframe; + int j; memset((unsigned char *)pxmitpriv, 0, sizeof(struct xmit_priv)); spin_lock_init(&pxmitpriv->lock); @@ -118,11 +120,8 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv, _init_queue(&pxmitpriv->pending_xmitbuf_queue); pxmitpriv->pallocated_xmitbuf = kmalloc(NR_XMITBUFF * sizeof(struct xmit_buf) + 4, GFP_ATOMIC); - if (!pxmitpriv->pallocated_xmitbuf) { - kfree(pxmitpriv->pallocated_frame_buf); - pxmitpriv->pallocated_frame_buf = NULL; - return -ENOMEM; - } + if (!pxmitpriv->pallocated_xmitbuf) + goto clean_up_frame_buf; pxmitpriv->pxmitbuf = pxmitpriv->pallocated_xmitbuf + 4 - ((addr_t)(pxmitpriv->pallocated_xmitbuf) & 3); pxmitbuf = (struct xmit_buf *)pxmitpriv->pxmitbuf; @@ -130,13 +129,17 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv, INIT_LIST_HEAD(&pxmitbuf->list); pxmitbuf->pallocated_buf = kmalloc(MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ, GFP_ATOMIC); - if (!pxmitbuf->pallocated_buf) - return -ENOMEM; + if (!pxmitbuf->pallocated_buf) { + j = 0; + goto clean_up_alloc_buf; + } pxmitbuf->pbuf = pxmitbuf->pallocated_buf + XMITBUF_ALIGN_SZ - ((addr_t) (pxmitbuf->pallocated_buf) & (XMITBUF_ALIGN_SZ - 1)); - if (r8712_xmit_resource_alloc(padapter, pxmitbuf)) - return -ENOMEM; + if (r8712_xmit_resource_alloc(padapter, pxmitbuf)) { + j = 1; + goto clean_up_alloc_buf; + } list_add_tail(&pxmitbuf->list, &(pxmitpriv->free_xmitbuf_queue.queue)); pxmitbuf++; @@ -147,6 +150,28 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv, init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry); tasklet_setup(&pxmitpriv->xmit_tasklet, r8712_xmit_bh); return 0; + +clean_up_alloc_buf: + if (j) { + /* failure happened in r8712_xmit_resource_alloc() + * delete extra pxmitbuf->pallocated_buf + */ + kfree(pxmitbuf->pallocated_buf); + } + for (j = 0; j < i; j++) { + int k; + + pxmitbuf--; /* reset pointer */ + kfree(pxmitbuf->pallocated_buf); + for (k = 0; k < 8; k++) /* delete xmit urb's */ + usb_free_urb(pxmitbuf->pxmit_urb[k]); + } + kfree(pxmitpriv->pallocated_xmitbuf); + pxmitpriv->pallocated_xmitbuf = NULL; +clean_up_frame_buf: + kfree(pxmitpriv->pallocated_frame_buf); + pxmitpriv->pallocated_frame_buf = NULL; + return -ENOMEM; } void _free_xmit_priv(struct xmit_priv *pxmitpriv) diff --git a/drivers/staging/rtl8712/xmit_linux.c b/drivers/staging/rtl8712/xmit_linux.c index 1f67d86c606f..9050e51aa407 100644 --- a/drivers/staging/rtl8712/xmit_linux.c +++ b/drivers/staging/rtl8712/xmit_linux.c @@ -119,6 +119,12 @@ int r8712_xmit_resource_alloc(struct _adapter *padapter, for (i = 0; i < 8; i++) { pxmitbuf->pxmit_urb[i] = usb_alloc_urb(0, GFP_KERNEL); if (!pxmitbuf->pxmit_urb[i]) { + int k; + + for (k = i - 1; k >= 0; k--) { + /* handle allocation errors part way through loop */ + usb_free_urb(pxmitbuf->pxmit_urb[k]); + } netdev_err(padapter->pnetdev, "pxmitbuf->pxmit_urb[i] == NULL\n"); return -ENOMEM; } From 3d10481a90c39497bc64350d4b03cb5ac0a62494 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 21 Jul 2023 10:49:20 +0100 Subject: [PATCH 100/203] btrfs: check if the transaction was aborted at btrfs_wait_for_commit() [ Upstream commit bf7ecbe9875061bf3fce1883e3b26b77f847d1e8 ] At btrfs_wait_for_commit() we wait for a transaction to finish and then always return 0 (success) without checking if it was aborted, in which case the transaction didn't happen due to some critical error. Fix this by checking if the transaction was aborted. Fixes: 462045928bda ("Btrfs: add START_SYNC, WAIT_SYNC ioctls") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/transaction.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ff372f1226a3..abd67f984fbc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -891,6 +891,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) } wait_for_commit(cur_trans); + ret = cur_trans->aborted; btrfs_put_transaction(cur_trans); out: return ret; From 276738b382cf4067136b1ec52fdecc964d8e6105 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 25 Jul 2023 03:20:49 -0400 Subject: [PATCH 101/203] virtio-net: fix race between set queues and probe commit 25266128fe16d5632d43ada34c847d7b8daba539 upstream. A race were found where set_channels could be called after registering but before virtnet_set_queues() in virtnet_probe(). Fixing this by moving the virtnet_set_queues() before netdevice registering. While at it, use _virtnet_set_queues() to avoid holding rtnl as the device is not even registered at that time. Cc: stable@vger.kernel.org Fixes: a220871be66f ("virtio-net: correctly enable multiqueue") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20230725072049.617289-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 119a32f34b53..165149bcf0b1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3220,6 +3220,8 @@ static int virtnet_probe(struct virtio_device *vdev) } } + _virtnet_set_queues(vi, vi->curr_queue_pairs); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ rtnl_lock(); @@ -3240,8 +3242,6 @@ static int virtnet_probe(struct virtio_device *vdev) goto free_unregister_netdev; } - virtnet_set_queues(vi, vi->curr_queue_pairs); - /* Assume link up if device can't report link status, otherwise get link status from config. */ netif_carrier_off(dev); From c837f121bc988412b382625478aae771c87ff59e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 21 Jul 2023 21:36:44 +0200 Subject: [PATCH 102/203] s390/dasd: fix hanging device after quiesce/resume commit 05f1d8ed03f547054efbc4d29bb7991c958ede95 upstream. Quiesce and resume are functions that tell the DASD driver to stop/resume issuing I/Os to a specific DASD. On resume dasd_schedule_block_bh() is called to kick handling of IO requests again. This does unfortunately not cover internal requests which are used for path verification for example. This could lead to a hanging device when a path event or anything else that triggers internal requests occurs on a quiesced device. Fix by also calling dasd_schedule_device_bh() which triggers handling of internal requests on resume. Fixes: 8e09f21574ea ("[S390] dasd: add hyper PAV support to DASD device driver, part 1") Cc: stable@vger.kernel.org Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20230721193647.3889634-2-sth@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 6d5c9cb83592..99b1b01e23e9 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -133,6 +133,7 @@ static int dasd_ioctl_resume(struct dasd_block *block) spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); dasd_schedule_block_bh(block); + dasd_schedule_device_bh(base); return 0; } From 4f669618c99f07578fd09ca453c7523b6e738375 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 23 Jul 2023 00:27:22 +0100 Subject: [PATCH 103/203] ASoC: wm8904: Fill the cache for WM8904_ADC_TEST_0 register commit f061e2be8689057cb4ec0dbffa9f03e1a23cdcb2 upstream. The WM8904_ADC_TEST_0 register is modified as part of updating the OSR controls but does not have a cache default, leading to errors when we try to modify these controls in cache only mode with no prior read: wm8904 3-001a: ASoC: error at snd_soc_component_update_bits on wm8904.3-001a for register: [0x000000c6] -16 Add a read of the register to probe() to fill the cache and avoid both the error messages and the misconfiguration of the chip which will result. Acked-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230723-asoc-fix-wm8904-adc-test-read-v1-1-2cdf2edd83fd@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8904.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index cc96c9bdff41..c90e776f7a54 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -2306,6 +2306,9 @@ static int wm8904_i2c_probe(struct i2c_client *i2c, regmap_update_bits(wm8904->regmap, WM8904_BIAS_CONTROL_0, WM8904_POBCTRL, 0); + /* Fill the cache for the ADC test register */ + regmap_read(wm8904->regmap, WM8904_ADC_TEST_0, &val); + /* Can leave the device powered off until we need it */ regcache_cache_only(wm8904->regmap, true); regulator_bulk_disable(ARRAY_SIZE(wm8904->supplies), wm8904->supplies); From aae65b1793bbe1ae36b6b31ce93695976e3c4acd Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 20 Jul 2023 11:33:55 +0800 Subject: [PATCH 104/203] ceph: never send metrics if disable_send_metrics is set commit 50164507f6b7b7ed85d8c3ac0266849fbd908db7 upstream. Even the 'disable_send_metrics' is true so when the session is being opened it will always trigger to send the metric for the first time. Cc: stable@vger.kernel.org Signed-off-by: Xiubo Li Reviewed-by: Venky Shankar Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/metric.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 9e0a0e26294e..906e446abb46 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -130,7 +130,7 @@ static void metric_delayed_work(struct work_struct *work) struct ceph_mds_client *mdsc = container_of(m, struct ceph_mds_client, metric); - if (mdsc->stopping) + if (mdsc->stopping || disable_send_metrics) return; if (!m->session || !check_session_state(m->session)) { From c0f2b2b020961f1346c8f07e9de43e6cca48726a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 25 Jul 2023 11:44:41 -0400 Subject: [PATCH 105/203] dm cache policy smq: ensure IO doesn't prevent cleaner policy progress commit 1e4ab7b4c881cf26c1c72b3f56519e03475486fb upstream. When using the cleaner policy to decommission the cache, there is never any writeback started from the cache as it is constantly delayed due to normal I/O keeping the device busy. Meaning @idle=false was always being passed to clean_target_met() Fix this by adding a specific 'cleaner' flag that is set when the cleaner policy is configured. This flag serves to always allow the cleaner's writeback work to be queued until the cache is decommissioned (even if the cache isn't idle). Reported-by: David Jeffery Fixes: b29d4986d0da ("dm cache: significant rework to leverage dm-bio-prison-v2") Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-policy-smq.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index b61aac00ff40..859073193f5b 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -854,7 +854,13 @@ struct smq_policy { struct background_tracker *bg_work; - bool migrations_allowed; + bool migrations_allowed:1; + + /* + * If this is set the policy will try and clean the whole cache + * even if the device is not idle. + */ + bool cleaner:1; }; /*----------------------------------------------------------------*/ @@ -1133,7 +1139,7 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) * Cache entries may not be populated. So we cannot rely on the * size of the clean queue. */ - if (idle) { + if (idle || mq->cleaner) { /* * We'd like to clean everything. */ @@ -1716,11 +1722,9 @@ static void calc_hotspot_params(sector_t origin_size, *hotspot_block_size /= 2u; } -static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, - sector_t origin_size, - sector_t cache_block_size, - bool mimic_mq, - bool migrations_allowed) +static struct dm_cache_policy * +__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size, + bool mimic_mq, bool migrations_allowed, bool cleaner) { unsigned i; unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; @@ -1807,6 +1811,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, goto bad_btracker; mq->migrations_allowed = migrations_allowed; + mq->cleaner = cleaner; return &mq->policy; @@ -1830,21 +1835,24 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, false, true); + return __smq_create(cache_size, origin_size, cache_block_size, + false, true, false); } static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, true, true); + return __smq_create(cache_size, origin_size, cache_block_size, + true, true, false); } static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - return __smq_create(cache_size, origin_size, cache_block_size, false, false); + return __smq_create(cache_size, origin_size, cache_block_size, + false, false, true); } /*----------------------------------------------------------------*/ From 4148f28f9824c00f81e41e729ee0a001d274a566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 12 Mar 2021 09:34:39 +0100 Subject: [PATCH 106/203] drm/ttm: make ttm_bo_unpin more defensive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c5403173a13a08ff61dbdafa4c0ed4a9dedbfe0 upstream. We seem to have some more driver bugs than thought. Signed-off-by: Christian König Fixes: deb0814b43f3 ("drm/ttm: add ttm_bo_pin()/ttm_bo_unpin() v2") Acked-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210312093810.2202-1-christian.koenig@amd.com Signed-off-by: Greg Kroah-Hartman --- include/drm/ttm/ttm_bo_api.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 33aca60870e2..09ccfee48fb4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -628,8 +628,10 @@ static inline void ttm_bo_pin(struct ttm_buffer_object *bo) static inline void ttm_bo_unpin(struct ttm_buffer_object *bo) { dma_resv_assert_held(bo->base.resv); - WARN_ON_ONCE(!bo->pin_count); - --bo->pin_count; + if (bo->pin_count) + --bo->pin_count; + else + WARN_ON_ONCE(true); } int ttm_mem_evict_first(struct ttm_bo_device *bdev, From ad938dd2af281a63ed8fb66a7b169a937f0a8369 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Dec 2022 22:21:49 +0100 Subject: [PATCH 107/203] ACPI: processor: perflib: Use the "no limit" frequency QoS commit c02d5feb6e2f60affc6ba8606d8d614c071e2ba6 upstream. When _PPC returns 0, it means that the CPU frequency is not limited by the platform firmware, so make acpi_processor_get_platform_limit() update the frequency QoS request used by it to "no limit" in that case. This addresses a problem with limiting CPU frequency artificially on some systems after CPU offline/online to the frequency that corresponds to the first entry in the _PSS return package. Reported-by: Pratyush Yadav Signed-off-by: Rafael J. Wysocki Reviewed-by: Pratyush Yadav Tested-by: Pratyush Yadav Tested-by: Hagar Hemdan Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/processor_perflib.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index b04a68950ff1..2e13c395b6b7 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -56,6 +56,8 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) { acpi_status status = 0; unsigned long long ppc = 0; + s32 qos_value; + int index; int ret; if (!pr) @@ -75,17 +77,27 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) return -ENODEV; } - pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id, - (int)ppc, ppc ? "" : "not"); + index = ppc; - pr->performance_platform_limit = (int)ppc; + pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id, + index, index ? "is" : "is not"); + + pr->performance_platform_limit = index; if (ppc >= pr->performance->state_count || unlikely(!freq_qos_request_active(&pr->perflib_req))) return 0; - ret = freq_qos_update_request(&pr->perflib_req, - pr->performance->states[ppc].core_frequency * 1000); + /* + * If _PPC returns 0, it means that all of the available states can be + * used ("no limit"). + */ + if (index == 0) + qos_value = FREQ_QOS_MAX_DEFAULT_VALUE; + else + qos_value = pr->performance->states[index].core_frequency * 1000; + + ret = freq_qos_update_request(&pr->perflib_req, qos_value); if (ret < 0) { pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n", pr->id, ret); From a241fc02f1ce391da7b67d548a315dea575a9c6d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Dec 2022 22:24:10 +0100 Subject: [PATCH 108/203] ACPI: processor: perflib: Avoid updating frequency QoS unnecessarily commit 99387b016022c29234c4ebf9abd34358c6e56532 upstream. Modify acpi_processor_get_platform_limit() to avoid updating its frequency QoS request when the _PPC return value has not changed by comparing that value to the previous _PPC return value stored in the performance_platform_limit field of the struct acpi_processor corresponding to the given CPU. While at it, do the _PPC return value check against the state count earlier, to avoid setting performance_platform_limit to an invalid value, and make acpi_processor_ppc_init() use FREQ_QOS_MAX_DEFAULT_VALUE as the "no limit" frequency QoS for consistency. Signed-off-by: Rafael J. Wysocki Tested-by: Hagar Hemdan Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/processor_perflib.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 2e13c395b6b7..fc42d649c7e4 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -79,13 +79,16 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) index = ppc; + if (pr->performance_platform_limit == index || + ppc >= pr->performance->state_count) + return 0; + pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id, index, index ? "is" : "is not"); pr->performance_platform_limit = index; - if (ppc >= pr->performance->state_count || - unlikely(!freq_qos_request_active(&pr->perflib_req))) + if (unlikely(!freq_qos_request_active(&pr->perflib_req))) return 0; /* @@ -180,9 +183,16 @@ void acpi_processor_ppc_init(struct cpufreq_policy *policy) if (!pr) continue; + /* + * Reset performance_platform_limit in case there is a stale + * value in it, so as to make it match the "no limit" QoS value + * below. + */ + pr->performance_platform_limit = 0; + ret = freq_qos_add_request(&policy->constraints, - &pr->perflib_req, - FREQ_QOS_MAX, INT_MAX); + &pr->perflib_req, FREQ_QOS_MAX, + FREQ_QOS_MAX_DEFAULT_VALUE); if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); From f0acd6c3a2cd9d65f385d8ef9ecce206c1418743 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 28 Dec 2022 22:26:04 +0100 Subject: [PATCH 109/203] cpufreq: intel_pstate: Drop ACPI _PSS states table patching commit e8a0e30b742f76ebd0f3b196973df4bf65d8fbbb upstream. After making acpi_processor_get_platform_limit() use the "no limit" value for its frequency QoS request when _PPC returns 0, it is not necessary to replace the frequency corresponding to the first _PSS return package entry with the maximum turbo frequency of the given CPU in intel_pstate_init_acpi_perf_limits() any more, so drop the code doing that along with the comment explaining it. Signed-off-by: Rafael J. Wysocki Tested-by: Hagar Hemdan Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/intel_pstate.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4b06b81d8bb0..4359ed1d3b7e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -443,20 +443,6 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) (u32) cpu->acpi_perf_data.states[i].control); } - /* - * The _PSS table doesn't contain whole turbo frequency range. - * This just contains +1 MHZ above the max non turbo frequency, - * with control value corresponding to max turbo ratio. But - * when cpufreq set policy is called, it will call with this - * max frequency, which will cause a reduced performance as - * this driver uses real max turbo frequency as the max - * frequency. So correct this frequency in _PSS table to - * correct max turbo frequency based on the turbo state. - * Also need to convert to MHz as _PSS freq is in MHz. - */ - if (!global.turbo_disabled) - cpu->acpi_perf_data.states[0].core_frequency = - policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; pr_debug("_PPC limits will be enforced\n"); From 9164e27c5a8e4ec926133ecd471469a711667dc0 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:40 +0200 Subject: [PATCH 110/203] selftests: mptcp: depend on SYN_COOKIES commit 6c8880fcaa5c45355179b759c1d11737775e31fc upstream. MPTCP selftests are using TCP SYN Cookies for quite a while now, since v5.9. Some CIs don't have this config option enabled and this is causing issues in the tests: # ns1 MPTCP -> ns1 (10.0.1.1:10000 ) MPTCP (duration 167ms) sysctl: cannot stat /proc/sys/net/ipv4/tcp_syncookies: No such file or directory # [ OK ]./mptcp_connect.sh: line 554: [: -eq: unary operator expected There is no impact in the results but the test is not doing what it is supposed to do. Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 1a4c11a444d9..8867c40258b5 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -6,3 +6,4 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_SYN_COOKIES=y From 4d360a819453b0d695372e2f060224191e93177f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 Jul 2023 13:16:53 -0600 Subject: [PATCH 111/203] io_uring: treat -EAGAIN for REQ_F_NOWAIT as final for io-wq commit a9be202269580ca611c6cebac90eaf1795497800 upstream. io-wq assumes that an issue is blocking, but it may not be if the request type has asked for a non-blocking attempt. If we get -EAGAIN for that case, then we need to treat it as a final result and not retry or arm poll for it. Cc: stable@vger.kernel.org # 5.10+ Link: https://github.com/axboe/liburing/issues/897 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f84584b762d0..c18c4c025da4 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -6895,6 +6895,14 @@ static void io_wq_submit_work(struct io_wq_work *work) */ if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL)) break; + + /* + * If REQ_F_NOWAIT is set, then don't wait or retry with + * poll. -EAGAIN is final for that case. + */ + if (req->flags & REQ_F_NOWAIT) + break; + cond_resched(); } while (1); } From 7a413fa0472610c9eccff7c4d89a6918dab8949b Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 13 Jul 2023 13:21:12 +0200 Subject: [PATCH 112/203] ASoC: cs42l51: fix driver to properly autoload with automatic module loading commit e51df4f81b02bcdd828a04de7c1eb6a92988b61e upstream. In commit 2cb1e0259f50 ("ASoC: cs42l51: re-hook of_match_table pointer"), 9 years ago, some random guy fixed the cs42l51 after it was split into a core part and an I2C part to properly match based on a Device Tree compatible string. However, the fix in this commit is wrong: the MODULE_DEVICE_TABLE(of, ....) is in the core part of the driver, not the I2C part. Therefore, automatic module loading based on module.alias, based on matching with the DT compatible string, loads the core part of the driver, but not the I2C part. And threfore, the i2c_driver is not registered, and the codec is not known to the system, nor matched with a DT node with the corresponding compatible string. In order to fix that, we move the MODULE_DEVICE_TABLE(of, ...) into the I2C part of the driver. The cs42l51_of_match[] array is also moved as well, as it is not possible to have this definition in one file, and the MODULE_DEVICE_TABLE(of, ...) invocation in another file, due to how MODULE_DEVICE_TABLE works. Thanks to this commit, the I2C part of the driver now properly autoloads, and thanks to its dependency on the core part, the core part gets autoloaded as well, resulting in a functional sound card without having to manually load kernel modules. Fixes: 2cb1e0259f50 ("ASoC: cs42l51: re-hook of_match_table pointer") Cc: stable@vger.kernel.org Signed-off-by: Thomas Petazzoni Link: https://lore.kernel.org/r/20230713112112.778576-1-thomas.petazzoni@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/cs42l51-i2c.c | 6 ++++++ sound/soc/codecs/cs42l51.c | 7 ------- sound/soc/codecs/cs42l51.h | 1 - 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/cs42l51-i2c.c b/sound/soc/codecs/cs42l51-i2c.c index 70260e0a8f09..3ff73367897d 100644 --- a/sound/soc/codecs/cs42l51-i2c.c +++ b/sound/soc/codecs/cs42l51-i2c.c @@ -19,6 +19,12 @@ static struct i2c_device_id cs42l51_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, cs42l51_i2c_id); +const struct of_device_id cs42l51_of_match[] = { + { .compatible = "cirrus,cs42l51", }, + { } +}; +MODULE_DEVICE_TABLE(of, cs42l51_of_match); + static int cs42l51_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index c61b17dc2af8..4b026e1c3fe3 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -825,13 +825,6 @@ int __maybe_unused cs42l51_resume(struct device *dev) } EXPORT_SYMBOL_GPL(cs42l51_resume); -const struct of_device_id cs42l51_of_match[] = { - { .compatible = "cirrus,cs42l51", }, - { } -}; -MODULE_DEVICE_TABLE(of, cs42l51_of_match); -EXPORT_SYMBOL_GPL(cs42l51_of_match); - MODULE_AUTHOR("Arnaud Patard "); MODULE_DESCRIPTION("Cirrus Logic CS42L51 ALSA SoC Codec Driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/cs42l51.h b/sound/soc/codecs/cs42l51.h index 9d06cf7f8876..4f13c38484b7 100644 --- a/sound/soc/codecs/cs42l51.h +++ b/sound/soc/codecs/cs42l51.h @@ -16,7 +16,6 @@ int cs42l51_probe(struct device *dev, struct regmap *regmap); int cs42l51_remove(struct device *dev); int __maybe_unused cs42l51_suspend(struct device *dev); int __maybe_unused cs42l51_resume(struct device *dev); -extern const struct of_device_id cs42l51_of_match[]; #define CS42L51_CHIP_ID 0x1B #define CS42L51_CHIP_REV_A 0x00 From 5601d812c8d89a30ea47b1e76d3a7ceeb603ca34 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 5 Jul 2023 14:46:45 +0800 Subject: [PATCH 113/203] kprobes/x86: Fix fall-through warnings for Clang [ Upstream commit e689b300c99ca2dd80d3f662e19499bba27cda09 ] In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by explicitly adding a break statement instead of just letting the code fall through to the next case. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Peter Zijlstra (Intel) Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 5de757099186..ed9a4fb87168 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -880,6 +880,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs, p->ainsn.boostable = true; goto no_change; } + break; default: break; } From ff97a14c8a86a20bf328d84d677891cb28ae746b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Jul 2023 14:46:46 +0800 Subject: [PATCH 114/203] x86/kprobes: Do not decode opcode in resume_execution() [ Upstream commit abd82e533d88df1521e3da6799b83ce88852ab88 ] Currently, kprobes decodes the opcode right after single-stepping in resume_execution(). But the opcode was already decoded while preparing arch_specific_insn in arch_copy_kprobe(). Decode the opcode in arch_copy_kprobe() instead of in resume_execution() and set some flags which classify the opcode for the resuming process. [ bp: Massage commit message. ] Signed-off-by: Masami Hiramatsu Signed-off-by: Borislav Petkov Acked-by: Steven Rostedt (VMware) Link: https://lkml.kernel.org/r/160830072561.349576.3014979564448023213.stgit@devnote2 Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kprobes.h | 11 ++- arch/x86/kernel/kprobes/core.c | 168 +++++++++++++++------------------ 2 files changed, 81 insertions(+), 98 deletions(-) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 991a7ad540c7..d20a3d6be36e 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -58,14 +58,17 @@ struct arch_specific_insn { /* copy of the original instruction */ kprobe_opcode_t *insn; /* - * boostable = false: This instruction type is not boostable. - * boostable = true: This instruction has been boosted: we have + * boostable = 0: This instruction type is not boostable. + * boostable = 1: This instruction has been boosted: we have * added a relative jump after the instruction copy in insn, * so no single-step and fixup are needed (unless there's * a post_handler). */ - bool boostable; - bool if_modifier; + unsigned boostable:1; + unsigned if_modifier:1; + unsigned is_call:1; + unsigned is_pushf:1; + unsigned is_abs_ip:1; /* Number of bytes of text poked */ int tp_len; }; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index ed9a4fb87168..19ca5164be4d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -133,26 +133,6 @@ void synthesize_relcall(void *dest, void *from, void *to) } NOKPROBE_SYMBOL(synthesize_relcall); -/* - * Skip the prefixes of the instruction. - */ -static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) -{ - insn_attr_t attr; - - attr = inat_get_opcode_attribute((insn_byte_t)*insn); - while (inat_is_legacy_prefix(attr)) { - insn++; - attr = inat_get_opcode_attribute((insn_byte_t)*insn); - } -#ifdef CONFIG_X86_64 - if (inat_is_rex_prefix(attr)) - insn++; -#endif - return insn; -} -NOKPROBE_SYMBOL(skip_prefixes); - /* * Returns non-zero if INSN is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode @@ -326,25 +306,6 @@ static int can_probe(unsigned long paddr) return (addr == paddr); } -/* - * Returns non-zero if opcode modifies the interrupt flag. - */ -static int is_IF_modifier(kprobe_opcode_t *insn) -{ - /* Skip prefixes */ - insn = skip_prefixes(insn); - - switch (*insn) { - case 0xfa: /* cli */ - case 0xfb: /* sti */ - case 0xcf: /* iret/iretd */ - case 0x9d: /* popf/popfd */ - return 1; - } - - return 0; -} - /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative @@ -427,9 +388,9 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); len += JMP32_INSN_SIZE; - p->ainsn.boostable = true; + p->ainsn.boostable = 1; } else { - p->ainsn.boostable = false; + p->ainsn.boostable = 0; } return len; @@ -466,6 +427,67 @@ void free_insn_page(void *page) module_memfree(page); } +static void set_resume_flags(struct kprobe *p, struct insn *insn) +{ + insn_byte_t opcode = insn->opcode.bytes[0]; + + switch (opcode) { + case 0xfa: /* cli */ + case 0xfb: /* sti */ + case 0x9d: /* popf/popfd */ + /* Check whether the instruction modifies Interrupt Flag or not */ + p->ainsn.if_modifier = 1; + break; + case 0x9c: /* pushfl */ + p->ainsn.is_pushf = 1; + break; + case 0xcf: /* iret */ + p->ainsn.if_modifier = 1; + fallthrough; + case 0xc2: /* ret/lret */ + case 0xc3: + case 0xca: + case 0xcb: + case 0xea: /* jmp absolute -- ip is correct */ + /* ip is already adjusted, no more changes required */ + p->ainsn.is_abs_ip = 1; + /* Without resume jump, this is boostable */ + p->ainsn.boostable = 1; + break; + case 0xe8: /* call relative - Fix return addr */ + p->ainsn.is_call = 1; + break; +#ifdef CONFIG_X86_32 + case 0x9a: /* call absolute -- same as call absolute, indirect */ + p->ainsn.is_call = 1; + p->ainsn.is_abs_ip = 1; + break; +#endif + case 0xff: + opcode = insn->opcode.bytes[1]; + if ((opcode & 0x30) == 0x10) { + /* + * call absolute, indirect + * Fix return addr; ip is correct. + * But this is not boostable + */ + p->ainsn.is_call = 1; + p->ainsn.is_abs_ip = 1; + break; + } else if (((opcode & 0x31) == 0x20) || + ((opcode & 0x31) == 0x21)) { + /* + * jmp near and far, absolute indirect + * ip is correct. + */ + p->ainsn.is_abs_ip = 1; + /* Without resume jump, this is boostable */ + p->ainsn.boostable = 1; + } + break; + } +} + static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; @@ -483,8 +505,8 @@ static int arch_copy_kprobe(struct kprobe *p) */ len = prepare_boost(buf, p, &insn); - /* Check whether the instruction modifies Interrupt Flag or not */ - p->ainsn.if_modifier = is_IF_modifier(buf); + /* Analyze the opcode and set resume flags */ + set_resume_flags(p, &insn); /* Also, displacement change doesn't affect the first byte */ p->opcode = buf[0]; @@ -507,6 +529,9 @@ int arch_prepare_kprobe(struct kprobe *p) if (!can_probe((unsigned long)p->addr)) return -EILSEQ; + + memset(&p->ainsn, 0, sizeof(p->ainsn)); + /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -822,11 +847,6 @@ NOKPROBE_SYMBOL(trampoline_handler); * 2) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. - * - * If this is the first time we've single-stepped the instruction at - * this probepoint, and the instruction is boostable, boost it: add a - * jump instruction after the copied instruction, that jumps to the next - * instruction after the probepoint. */ static void resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) @@ -834,60 +854,20 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs, unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; unsigned long orig_ip = (unsigned long)p->addr; - kprobe_opcode_t *insn = p->ainsn.insn; - - /* Skip prefixes */ - insn = skip_prefixes(insn); regs->flags &= ~X86_EFLAGS_TF; - switch (*insn) { - case 0x9c: /* pushfl */ + + /* Fixup the contents of top of stack */ + if (p->ainsn.is_pushf) { *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); *tos |= kcb->kprobe_old_flags; - break; - case 0xc2: /* iret/ret/lret */ - case 0xc3: - case 0xca: - case 0xcb: - case 0xcf: - case 0xea: /* jmp absolute -- ip is correct */ - /* ip is already adjusted, no more changes required */ - p->ainsn.boostable = true; - goto no_change; - case 0xe8: /* call relative - Fix return addr */ + } else if (p->ainsn.is_call) { *tos = orig_ip + (*tos - copy_ip); - break; -#ifdef CONFIG_X86_32 - case 0x9a: /* call absolute -- same as call absolute, indirect */ - *tos = orig_ip + (*tos - copy_ip); - goto no_change; -#endif - case 0xff: - if ((insn[1] & 0x30) == 0x10) { - /* - * call absolute, indirect - * Fix return addr; ip is correct. - * But this is not boostable - */ - *tos = orig_ip + (*tos - copy_ip); - goto no_change; - } else if (((insn[1] & 0x31) == 0x20) || - ((insn[1] & 0x31) == 0x21)) { - /* - * jmp near and far, absolute indirect - * ip is correct. And this is boostable - */ - p->ainsn.boostable = true; - goto no_change; - } - break; - default: - break; } - regs->ip += orig_ip - copy_ip; + if (!p->ainsn.is_abs_ip) + regs->ip += orig_ip - copy_ip; -no_change: restore_btf(); } NOKPROBE_SYMBOL(resume_execution); From 41db23bad95226fcf42517d44b489c1ef38acdab Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Jul 2023 14:46:47 +0800 Subject: [PATCH 115/203] x86/kprobes: Retrieve correct opcode for group instruction [ Upstream commit d60ad3d46f1d04a282c56159f1deb675c12733fd ] Since the opcodes start from 0xff are group5 instruction group which is not 2 bytes opcode but the extended opcode determined by the MOD/RM byte. The commit abd82e533d88 ("x86/kprobes: Do not decode opcode in resume_execution()") used insn->opcode.bytes[1], but that is not correct. We have to refer the insn->modrm.bytes[1] instead. Fixes: abd82e533d88 ("x86/kprobes: Do not decode opcode in resume_execution()") Signed-off-by: Masami Hiramatsu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/161469872400.49483.18214724458034233166.stgit@devnote2 Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 19ca5164be4d..40d5c603ce8e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -464,7 +464,11 @@ static void set_resume_flags(struct kprobe *p, struct insn *insn) break; #endif case 0xff: - opcode = insn->opcode.bytes[1]; + /* + * Since the 0xff is an extended group opcode, the instruction + * is determined by the MOD/RM byte. + */ + opcode = insn->modrm.bytes[0]; if ((opcode & 0x30) == 0x10) { /* * call absolute, indirect From 2c57553a77e18b973f85194193284c388ce58b2b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Jul 2023 14:46:48 +0800 Subject: [PATCH 116/203] x86/kprobes: Identify far indirect JMP correctly [ Upstream commit a194acd316f93f3435a64de3b37dca2b5a77b338 ] Since Grp5 far indirect JMP is FF "mod 101 r/m", it should be (modrm & 0x38) == 0x28, and near indirect JMP is also 0x38 == 0x20. So we can mask modrm with 0x30 and check 0x20. This is actually what the original code does, it also doesn't care the last bit. So the result code is same. Thus, I think this is just a cosmetic cleanup. Signed-off-by: Masami Hiramatsu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/161469873475.49483.13257083019966335137.stgit@devnote2 Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 40d5c603ce8e..d03baf1f4024 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -478,8 +478,7 @@ static void set_resume_flags(struct kprobe *p, struct insn *insn) p->ainsn.is_call = 1; p->ainsn.is_abs_ip = 1; break; - } else if (((opcode & 0x31) == 0x20) || - ((opcode & 0x31) == 0x21)) { + } else if ((opcode & 0x30) == 0x20) { /* * jmp near and far, absolute indirect * ip is correct. From ba7d1dae9fe866abe74bb1e849fb85983b7c4c37 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Jul 2023 14:46:49 +0800 Subject: [PATCH 117/203] x86/kprobes: Use int3 instead of debug trap for single-step [ Upstream commit 6256e668b7af9d81472e03c6a171630c08f8858a ] Use int3 instead of debug trap exception for single-stepping the probed instructions. Some instructions which change the ip registers or modify IF flags are emulated because those are not able to be single-stepped by int3 or may allow the interrupt while single-stepping. This actually changes the kprobes behavior. - kprobes can not probe following instructions; int3, iret, far jmp/call which get absolute address as immediate, indirect far jmp/call, indirect near jmp/call with addressing by memory (register-based indirect jmp/call are OK), and vmcall/vmlaunch/vmresume/vmxoff. - If the kprobe post_handler doesn't set before registering, it may not be called in some case even if you set it afterwards. (IOW, kprobe booster is enabled at registration, user can not change it) But both are rare issue, unsupported instructions will not be used in the kernel (or rarely used), and post_handlers are rarely used (I don't see it except for the test code). Suggested-by: Andy Lutomirski Signed-off-by: Masami Hiramatsu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/161469874601.49483.11985325887166921076.stgit@devnote2 [Huafei: Fix trivial conflict in arch/x86/kernel/kprobes/core.c due to the previously backported commit 6dd3b8c9f5881] Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kprobes.h | 21 +- arch/x86/kernel/kprobes/core.c | 517 +++++++++++++++++++++------------ arch/x86/kernel/traps.c | 3 - 3 files changed, 353 insertions(+), 188 deletions(-) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index d20a3d6be36e..bd7f5886a789 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -65,10 +65,22 @@ struct arch_specific_insn { * a post_handler). */ unsigned boostable:1; - unsigned if_modifier:1; - unsigned is_call:1; - unsigned is_pushf:1; - unsigned is_abs_ip:1; + unsigned char size; /* The size of insn */ + union { + unsigned char opcode; + struct { + unsigned char type; + } jcc; + struct { + unsigned char type; + unsigned char asize; + } loop; + struct { + unsigned char reg; + } indirect; + }; + s32 rel32; /* relative offset must be s32, s16, or s8 */ + void (*emulate_op)(struct kprobe *p, struct pt_regs *regs); /* Number of bytes of text poked */ int tp_len; }; @@ -107,7 +119,6 @@ extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); extern int kprobe_int3_handler(struct pt_regs *regs); -extern int kprobe_debug_handler(struct pt_regs *regs); #else diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index d03baf1f4024..7778b3791bee 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -185,6 +185,9 @@ int can_boost(struct insn *insn, void *addr) case 0xf0: /* clear and set flags are boostable */ return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); + case 0xff: + /* indirect jmp is boostable */ + return X86_MODRM_REG(insn->modrm.bytes[0]) == 4; default: /* call is not boostable */ return opcode != 0x9a; @@ -373,13 +376,14 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return insn->length; } -/* Prepare reljump right after instruction to boost */ -static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, - struct insn *insn) +/* Prepare reljump or int3 right after instruction */ +static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, + struct insn *insn) { int len = insn->length; - if (can_boost(insn, p->addr) && + if (!IS_ENABLED(CONFIG_PREEMPTION) && + !p->post_handler && can_boost(insn, p->addr) && MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { /* * These instructions can be executed directly if it @@ -390,7 +394,12 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, len += JMP32_INSN_SIZE; p->ainsn.boostable = 1; } else { - p->ainsn.boostable = 0; + /* Otherwise, put an int3 for trapping singlestep */ + if (MAX_INSN_SIZE - len < INT3_INSN_SIZE) + return -ENOSPC; + + buf[len] = INT3_INSN_OPCODE; + len += INT3_INSN_SIZE; } return len; @@ -427,42 +436,232 @@ void free_insn_page(void *page) module_memfree(page); } -static void set_resume_flags(struct kprobe *p, struct insn *insn) +/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ + +static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) +{ + switch (p->ainsn.opcode) { + case 0xfa: /* cli */ + regs->flags &= ~(X86_EFLAGS_IF); + break; + case 0xfb: /* sti */ + regs->flags |= X86_EFLAGS_IF; + break; + case 0x9c: /* pushf */ + int3_emulate_push(regs, regs->flags); + break; + case 0x9d: /* popf */ + regs->flags = int3_emulate_pop(regs); + break; + } + regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; +} +NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers); + +static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs) +{ + int3_emulate_ret(regs); +} +NOKPROBE_SYMBOL(kprobe_emulate_ret); + +static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size; + + func += p->ainsn.rel32; + int3_emulate_call(regs, func); +} +NOKPROBE_SYMBOL(kprobe_emulate_call); + +static nokprobe_inline +void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond) +{ + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; + + if (cond) + ip += p->ainsn.rel32; + int3_emulate_jmp(regs, ip); +} + +static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) +{ + __kprobe_emulate_jmp(p, regs, true); +} +NOKPROBE_SYMBOL(kprobe_emulate_jmp); + +static const unsigned long jcc_mask[6] = { + [0] = X86_EFLAGS_OF, + [1] = X86_EFLAGS_CF, + [2] = X86_EFLAGS_ZF, + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, + [4] = X86_EFLAGS_SF, + [5] = X86_EFLAGS_PF, +}; + +static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) +{ + bool invert = p->ainsn.jcc.type & 1; + bool match; + + if (p->ainsn.jcc.type < 0xc) { + match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1]; + } else { + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + if (p->ainsn.jcc.type >= 0xe) + match = match && (regs->flags & X86_EFLAGS_ZF); + } + __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); +} +NOKPROBE_SYMBOL(kprobe_emulate_jcc); + +static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) +{ + bool match; + + if (p->ainsn.loop.type != 3) { /* LOOP* */ + if (p->ainsn.loop.asize == 32) + match = ((*(u32 *)®s->cx)--) != 0; +#ifdef CONFIG_X86_64 + else if (p->ainsn.loop.asize == 64) + match = ((*(u64 *)®s->cx)--) != 0; +#endif + else + match = ((*(u16 *)®s->cx)--) != 0; + } else { /* JCXZ */ + if (p->ainsn.loop.asize == 32) + match = *(u32 *)(®s->cx) == 0; +#ifdef CONFIG_X86_64 + else if (p->ainsn.loop.asize == 64) + match = *(u64 *)(®s->cx) == 0; +#endif + else + match = *(u16 *)(®s->cx) == 0; + } + + if (p->ainsn.loop.type == 0) /* LOOPNE */ + match = match && !(regs->flags & X86_EFLAGS_ZF); + else if (p->ainsn.loop.type == 1) /* LOOPE */ + match = match && (regs->flags & X86_EFLAGS_ZF); + + __kprobe_emulate_jmp(p, regs, match); +} +NOKPROBE_SYMBOL(kprobe_emulate_loop); + +static const int addrmode_regoffs[] = { + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di), +#ifdef CONFIG_X86_64 + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), +#endif +}; + +static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; + + int3_emulate_call(regs, regs_get_register(regs, offs)); +} +NOKPROBE_SYMBOL(kprobe_emulate_call_indirect); + +static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; + + int3_emulate_jmp(regs, regs_get_register(regs, offs)); +} +NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect); + +static int prepare_emulation(struct kprobe *p, struct insn *insn) { insn_byte_t opcode = insn->opcode.bytes[0]; switch (opcode) { case 0xfa: /* cli */ case 0xfb: /* sti */ + case 0x9c: /* pushfl */ case 0x9d: /* popf/popfd */ - /* Check whether the instruction modifies Interrupt Flag or not */ - p->ainsn.if_modifier = 1; + /* + * IF modifiers must be emulated since it will enable interrupt while + * int3 single stepping. + */ + p->ainsn.emulate_op = kprobe_emulate_ifmodifiers; + p->ainsn.opcode = opcode; break; - case 0x9c: /* pushfl */ - p->ainsn.is_pushf = 1; - break; - case 0xcf: /* iret */ - p->ainsn.if_modifier = 1; - fallthrough; case 0xc2: /* ret/lret */ case 0xc3: case 0xca: case 0xcb: - case 0xea: /* jmp absolute -- ip is correct */ - /* ip is already adjusted, no more changes required */ - p->ainsn.is_abs_ip = 1; - /* Without resume jump, this is boostable */ - p->ainsn.boostable = 1; + p->ainsn.emulate_op = kprobe_emulate_ret; break; - case 0xe8: /* call relative - Fix return addr */ - p->ainsn.is_call = 1; + case 0x9a: /* far call absolute -- segment is not supported */ + case 0xea: /* far jmp absolute -- segment is not supported */ + case 0xcc: /* int3 */ + case 0xcf: /* iret -- in-kernel IRET is not supported */ + return -EOPNOTSUPP; break; -#ifdef CONFIG_X86_32 - case 0x9a: /* call absolute -- same as call absolute, indirect */ - p->ainsn.is_call = 1; - p->ainsn.is_abs_ip = 1; + case 0xe8: /* near call relative */ + p->ainsn.emulate_op = kprobe_emulate_call; + if (insn->immediate.nbytes == 2) + p->ainsn.rel32 = *(s16 *)&insn->immediate.value; + else + p->ainsn.rel32 = *(s32 *)&insn->immediate.value; + break; + case 0xeb: /* short jump relative */ + case 0xe9: /* near jump relative */ + p->ainsn.emulate_op = kprobe_emulate_jmp; + if (insn->immediate.nbytes == 1) + p->ainsn.rel32 = *(s8 *)&insn->immediate.value; + else if (insn->immediate.nbytes == 2) + p->ainsn.rel32 = *(s16 *)&insn->immediate.value; + else + p->ainsn.rel32 = *(s32 *)&insn->immediate.value; + break; + case 0x70 ... 0x7f: + /* 1 byte conditional jump */ + p->ainsn.emulate_op = kprobe_emulate_jcc; + p->ainsn.jcc.type = opcode & 0xf; + p->ainsn.rel32 = *(char *)insn->immediate.bytes; + break; + case 0x0f: + opcode = insn->opcode.bytes[1]; + if ((opcode & 0xf0) == 0x80) { + /* 2 bytes Conditional Jump */ + p->ainsn.emulate_op = kprobe_emulate_jcc; + p->ainsn.jcc.type = opcode & 0xf; + if (insn->immediate.nbytes == 2) + p->ainsn.rel32 = *(s16 *)&insn->immediate.value; + else + p->ainsn.rel32 = *(s32 *)&insn->immediate.value; + } else if (opcode == 0x01 && + X86_MODRM_REG(insn->modrm.bytes[0]) == 0 && + X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) { + /* VM extensions - not supported */ + return -EOPNOTSUPP; + } + break; + case 0xe0: /* Loop NZ */ + case 0xe1: /* Loop */ + case 0xe2: /* Loop */ + case 0xe3: /* J*CXZ */ + p->ainsn.emulate_op = kprobe_emulate_loop; + p->ainsn.loop.type = opcode & 0x3; + p->ainsn.loop.asize = insn->addr_bytes * 8; + p->ainsn.rel32 = *(s8 *)&insn->immediate.value; break; -#endif case 0xff: /* * Since the 0xff is an extended group opcode, the instruction @@ -470,46 +669,57 @@ static void set_resume_flags(struct kprobe *p, struct insn *insn) */ opcode = insn->modrm.bytes[0]; if ((opcode & 0x30) == 0x10) { - /* - * call absolute, indirect - * Fix return addr; ip is correct. - * But this is not boostable - */ - p->ainsn.is_call = 1; - p->ainsn.is_abs_ip = 1; - break; + if ((opcode & 0x8) == 0x8) + return -EOPNOTSUPP; /* far call */ + /* call absolute, indirect */ + p->ainsn.emulate_op = kprobe_emulate_call_indirect; } else if ((opcode & 0x30) == 0x20) { - /* - * jmp near and far, absolute indirect - * ip is correct. - */ - p->ainsn.is_abs_ip = 1; - /* Without resume jump, this is boostable */ - p->ainsn.boostable = 1; - } + if ((opcode & 0x8) == 0x8) + return -EOPNOTSUPP; /* far jmp */ + /* jmp near absolute indirect */ + p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; + } else + break; + + if (insn->addr_bytes != sizeof(unsigned long)) + return -EOPNOTSUPP; /* Don't support differnt size */ + if (X86_MODRM_MOD(opcode) != 3) + return -EOPNOTSUPP; /* TODO: support memory addressing */ + + p->ainsn.indirect.reg = X86_MODRM_RM(opcode); +#ifdef CONFIG_X86_64 + if (X86_REX_B(insn->rex_prefix.value)) + p->ainsn.indirect.reg += 8; +#endif + break; + default: break; } + p->ainsn.size = insn->length; + + return 0; } static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - int len; + int ret, len; /* Copy an instruction with recovering if other optprobe modifies it.*/ len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); if (!len) return -EINVAL; - /* - * __copy_instruction can modify the displacement of the instruction, - * but it doesn't affect boostable check. - */ - len = prepare_boost(buf, p, &insn); + /* Analyze the opcode and setup emulate functions */ + ret = prepare_emulation(p, &insn); + if (ret < 0) + return ret; - /* Analyze the opcode and set resume flags */ - set_resume_flags(p, &insn); + /* Add int3 for single-step or booster jmp */ + len = prepare_singlestep(buf, p, &insn); + if (len < 0) + return len; /* Also, displacement change doesn't affect the first byte */ p->opcode = buf[0]; @@ -602,29 +812,7 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs, { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags - = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - if (p->ainsn.if_modifier) - kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; -} - -static nokprobe_inline void clear_btf(void) -{ - if (test_thread_flag(TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - } -} - -static nokprobe_inline void restore_btf(void) -{ - if (test_thread_flag(TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - } + = (regs->flags & X86_EFLAGS_IF); } void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) @@ -639,6 +827,22 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) } NOKPROBE_SYMBOL(arch_prepare_kretprobe); +static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); +} +NOKPROBE_SYMBOL(kprobe_post_process); + static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) { @@ -646,7 +850,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, return; #if !defined(CONFIG_PREEMPTION) - if (p->ainsn.boostable && !p->post_handler) { + if (p->ainsn.boostable) { /* Boost up -- we can execute copied instructions directly */ if (!reenter) reset_current_kprobe(); @@ -665,18 +869,50 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; } else kcb->kprobe_status = KPROBE_HIT_SS; - /* Prepare real single stepping */ - clear_btf(); - regs->flags |= X86_EFLAGS_TF; + + if (p->ainsn.emulate_op) { + p->ainsn.emulate_op(p, regs); + kprobe_post_process(p, regs, kcb); + return; + } + + /* Disable interrupt, and set ip register on trampoline */ regs->flags &= ~X86_EFLAGS_IF; - /* single step inline if the instruction is an int3 */ - if (p->opcode == INT3_INSN_OPCODE) - regs->ip = (unsigned long)p->addr; - else - regs->ip = (unsigned long)p->ainsn.insn; + regs->ip = (unsigned long)p->ainsn.insn; } NOKPROBE_SYMBOL(setup_singlestep); +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "int3" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again + * right after the copied instruction. + * Different from the trap single-step, "int3" single-step can not + * handle the instruction which changes the ip register, e.g. jmp, + * call, conditional jmp, and the instructions which changes the IF + * flags because interrupt must be disabled around the single-stepping. + * Such instructions are software emulated, but others are single-stepped + * using "int3". + * + * When the 2nd "int3" handled, the regs->ip and regs->flags needs to + * be adjusted, so that we can resume execution on correct code. + */ +static void resume_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + unsigned long copy_ip = (unsigned long)p->ainsn.insn; + unsigned long orig_ip = (unsigned long)p->addr; + + /* Restore saved interrupt flag and ip register */ + regs->flags |= kcb->kprobe_saved_flags; + /* Note that regs->ip is executed int3 so must be a step back */ + regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE; +} +NOKPROBE_SYMBOL(resume_singlestep); + /* * We have reentered the kprobe_handler(), since another probe was hit while * within the handler. We save the original kprobes variables and just single @@ -712,6 +948,12 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, } NOKPROBE_SYMBOL(reenter_kprobe); +static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb) +{ + return (kcb->kprobe_status == KPROBE_HIT_SS || + kcb->kprobe_status == KPROBE_REENTER); +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -756,7 +998,18 @@ int kprobe_int3_handler(struct pt_regs *regs) reset_current_kprobe(); return 1; } - } else if (*addr != INT3_INSN_OPCODE) { + } else if (kprobe_is_ss(kcb)) { + p = kprobe_running(); + if ((unsigned long)p->ainsn.insn < regs->ip && + (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) { + /* Most provably this is the second int3 for singlestep */ + resume_singlestep(p, regs, kcb); + kprobe_post_process(p, regs, kcb); + return 1; + } + } + + if (*addr != INT3_INSN_OPCODE) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed @@ -829,91 +1082,6 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(trampoline_handler); -/* - * Called after single-stepping. p->addr is the address of the - * instruction whose first byte has been replaced by the "int 3" - * instruction. To avoid the SMP problems that can occur when we - * temporarily put back the original opcode to single-step, we - * single-stepped a copy of the instruction. The address of this - * copy is p->ainsn.insn. - * - * This function prepares to return from the post-single-step - * interrupt. We have to fix up the stack as follows: - * - * 0) Except in the case of absolute or indirect jump or call instructions, - * the new ip is relative to the copied instruction. We need to make - * it relative to the original instruction. - * - * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed flags, and may need to be cleared. - * - * 2) If the single-stepped instruction was a call, the return address - * that is atop the stack is the address following the copied instruction. - * We need to make it the address following the original instruction. - */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - unsigned long *tos = stack_addr(regs); - unsigned long copy_ip = (unsigned long)p->ainsn.insn; - unsigned long orig_ip = (unsigned long)p->addr; - - regs->flags &= ~X86_EFLAGS_TF; - - /* Fixup the contents of top of stack */ - if (p->ainsn.is_pushf) { - *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); - *tos |= kcb->kprobe_old_flags; - } else if (p->ainsn.is_call) { - *tos = orig_ip + (*tos - copy_ip); - } - - if (!p->ainsn.is_abs_ip) - regs->ip += orig_ip - copy_ip; - - restore_btf(); -} -NOKPROBE_SYMBOL(resume_execution); - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled throughout this function. - */ -int kprobe_debug_handler(struct pt_regs *regs) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (!cur) - return 0; - - resume_execution(cur, regs, kcb); - regs->flags |= kcb->kprobe_saved_flags; - - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - - /* Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: - /* - * if somebody else is singlestepping across a probe point, flags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (regs->flags & X86_EFLAGS_TF) - return 0; - - return 1; -} -NOKPROBE_SYMBOL(kprobe_debug_handler); - int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); @@ -931,20 +1099,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->ip = (unsigned long)cur->addr; - /* - * Trap flag (TF) has been set here because this fault - * happened where the single stepping will be done. - * So clear it by resetting the current kprobe: - */ - regs->flags &= ~X86_EFLAGS_TF; - /* - * Since the single step (trap) has been cancelled, - * we need to restore BTF here. - */ - restore_btf(); /* - * If the TF flag was set before the kprobe hit, + * If the IF flag was set before the kprobe hit, * don't touch it: */ regs->flags |= kcb->kprobe_old_flags; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3780c728345c..98838b784524 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -917,9 +917,6 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) dr6 &= ~DR_STEP; - if (kprobe_debug_handler(regs)) - goto out; - /* * The kernel doesn't use INT1 */ From 2b5afe25f5b6eb64a1d519be627164049f9a48a6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 5 Jul 2023 14:46:50 +0800 Subject: [PATCH 118/203] x86/kprobes: Fix to identify indirect jmp and others using range case [ Upstream commit 2f706e0e5e263c0d204e37ea496cbb0e98aac2d2 ] Fix can_boost() to identify indirect jmp and others using range case correctly. Since the condition in switch statement is opcode & 0xf0, it can not evaluate to 0xff case. This should be under the 0xf0 case. However, there is no reason to use the conbinations of the bit-masked condition and lower bit checking. Use range case to clean up the switch statement too. Fixes: 6256e668b7 ("x86/kprobes: Use int3 instead of debug trap for single-step") Reported-by: Colin Ian King Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/161666692308.1120877.4675552834049546493.stgit@devnote2 Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 44 ++++++++++++++++------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7778b3791bee..109221af5d49 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -165,32 +165,28 @@ int can_boost(struct insn *insn, void *addr) opcode = insn->opcode.bytes[0]; - switch (opcode & 0xf0) { - case 0x60: - /* can't boost "bound" */ - return (opcode != 0x62); - case 0x70: - return 0; /* can't boost conditional jump */ - case 0x90: - return opcode != 0x9a; /* can't boost call far */ - case 0xc0: - /* can't boost software-interruptions */ - return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; - case 0xd0: - /* can boost AA* and XLAT */ - return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); - case 0xe0: - /* can boost in/out and absolute jmps */ - return ((opcode & 0x04) || opcode == 0xea); - case 0xf0: - /* clear and set flags are boostable */ - return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); - case 0xff: - /* indirect jmp is boostable */ + switch (opcode) { + case 0x62: /* bound */ + case 0x70 ... 0x7f: /* Conditional jumps */ + case 0x9a: /* Call far */ + case 0xc0 ... 0xc1: /* Grp2 */ + case 0xcc ... 0xce: /* software exceptions */ + case 0xd0 ... 0xd3: /* Grp2 */ + case 0xd6: /* (UD) */ + case 0xd8 ... 0xdf: /* ESC */ + case 0xe0 ... 0xe3: /* LOOP*, JCXZ */ + case 0xe8 ... 0xe9: /* near Call, JMP */ + case 0xeb: /* Short JMP */ + case 0xf0 ... 0xf4: /* LOCK/REP, HLT */ + case 0xf6 ... 0xf7: /* Grp3 */ + case 0xfe: /* Grp4 */ + /* ... are not boostable */ + return 0; + case 0xff: /* Grp5 */ + /* Only indirect jmp is boostable */ return X86_MODRM_REG(insn->modrm.bytes[0]) == 4; default: - /* call is not boostable */ - return opcode != 0x9a; + return 1; } } From f07f3938c813baa06dc05f43a170a6eb83428e4f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 5 Jul 2023 14:46:51 +0800 Subject: [PATCH 119/203] x86/kprobes: Move 'inline' to the beginning of the kprobe_is_ss() declaration [ Upstream commit 2304d14db6595bea5292bece06c4c625b12d8f89 ] Address this GCC warning: arch/x86/kernel/kprobes/core.c:940:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] 940 | static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb) | ^~~~~~ [ mingo: Tidied up the changelog. ] Fixes: 6256e668b7af: ("x86/kprobes: Use int3 instead of debug trap for single-step") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Ingo Molnar Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210324144502.1154883-1-weiyongjun1@huawei.com Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 109221af5d49..07f222743811 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -944,7 +944,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, } NOKPROBE_SYMBOL(reenter_kprobe); -static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb) +static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb) { return (kcb->kprobe_status == KPROBE_HIT_SS || kcb->kprobe_status == KPROBE_REENTER); From edc2ac7c7265b33660fa0190898966b49966b855 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 5 Jul 2023 14:46:52 +0800 Subject: [PATCH 120/203] x86/kprobes: Update kcb status flag after singlestepping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dec8784c9088b131a1523f582c2194cfc8107dc0 ] Fix kprobes to update kcb (kprobes control block) status flag to KPROBE_HIT_SSDONE even if the kp->post_handler is not set. This bug may cause a kernel panic if another INT3 user runs right after kprobes because kprobe_int3_handler() misunderstands the INT3 is kprobe's single stepping INT3. Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step") Reported-by: Daniel Müller Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Ingo Molnar Tested-by: Daniel Müller Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20220727210136.jjgc3lpqeq42yr3m@muellerd-fedora-PC2BDTX9 Link: https://lore.kernel.org/r/165942025658.342061.12452378391879093249.stgit@devnote2 Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 07f222743811..bde43592f54a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -826,16 +826,20 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - /* Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) + if (kcb->kprobe_status == KPROBE_REENTER) { + /* This will restore both kcb and current_kprobe */ restore_previous_kprobe(kcb); - else + } else { + /* + * Always update the kcb status because + * reset_curent_kprobe() doesn't update kcb. + */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) + cur->post_handler(cur, regs, 0); reset_current_kprobe(); + } } NOKPROBE_SYMBOL(kprobe_post_process); From 058c0cbd251a1525a603c313931a0580d14adcfd Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 5 Jul 2023 14:46:53 +0800 Subject: [PATCH 121/203] x86/kprobes: Fix JNG/JNLE emulation [ Upstream commit 8924779df820c53875abaeb10c648e9cb75b46d4 ] When kprobes emulates JNG/JNLE instructions on x86 it uses the wrong condition. For JNG (opcode: 0F 8E), according to Intel SDM, the jump is performed if (ZF == 1 or SF != OF). However the kernel emulation currently uses 'and' instead of 'or'. As a result, setting a kprobe on JNG/JNLE might cause the kernel to behave incorrectly whenever the kprobe is hit. Fix by changing the 'and' to 'or'. Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step") Signed-off-by: Nadav Amit Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220813225943.143767-1-namit@vmware.com Signed-off-by: Li Huafei Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bde43592f54a..c78b4946385e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -505,7 +505,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (p->ainsn.jcc.type >= 0xe) - match = match && (regs->flags & X86_EFLAGS_ZF); + match = match || (regs->flags & X86_EFLAGS_ZF); } __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); } From a883b98dc737b47cc275306ef81f86efd471b9b5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Aug 2023 08:39:47 -0600 Subject: [PATCH 122/203] io_uring: gate iowait schedule on having pending requests Commit 7b72d661f1f2f950ab8c12de7e2bc48bdac8ed69 upstream. A previous commit made all cqring waits marked as iowait, as a way to improve performance for short schedules with pending IO. However, for use cases that have a special reaper thread that does nothing but wait on events on the ring, this causes a cosmetic issue where we know have one core marked as being "busy" with 100% iowait. While this isn't a grave issue, it is confusing to users. Rather than always mark us as being in iowait, gate setting of current->in_iowait to 1 by whether or not the waiting task has pending requests. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/io-uring/CAMEGJJ2RxopfNQ7GNLhr7X9=bHXKo+G5OOe0LUq=+UgLXsv1Xg@mail.gmail.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=217699 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217700 Reported-by: Oleksandr Natalenko Reported-by: Phil Elwell Tested-by: Andres Freund Fixes: 8a796565cec3 ("io_uring: Use io_schedule* in cqring wait") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c18c4c025da4..92eb4769b0a3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -7631,12 +7631,21 @@ static int io_run_task_work_sig(void) return -EINTR; } +static bool current_pending_io(void) +{ + struct io_uring_task *tctx = current->io_uring; + + if (!tctx) + return false; + return percpu_counter_read_positive(&tctx->inflight); +} + /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, ktime_t *timeout) { - int token, ret; + int io_wait, ret; /* make sure we run task_work before checking for signals */ ret = io_run_task_work_sig(); @@ -7647,15 +7656,17 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, return 1; /* - * Use io_schedule_prepare/finish, so cpufreq can take into account - * that the task is waiting for IO - turns out to be important for low - * QD IO. + * Mark us as being in io_wait if we have pending requests, so cpufreq + * can take into account that the task is waiting for IO - turns out + * to be important for low QD IO. */ - token = io_schedule_prepare(); + io_wait = current->in_iowait; + if (current_pending_io()) + current->in_iowait = 1; ret = 1; if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) ret = -ETIME; - io_schedule_finish(token); + current->in_iowait = io_wait; return ret; } From 3f7395c382040f0cf274fd6fac35519b800afc19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Nov 2022 22:40:17 +0100 Subject: [PATCH 123/203] perf: Fix function pointer case commit 1af6239d1d3e61d33fd2f0ba53d3d1a67cc50574 upstream. With the advent of CFI it is no longer acceptible to cast function pointers. The robot complains thusly: kernel-events-core.c:warning:cast-from-int-(-)(struct-perf_cpu_pmu_context-)-to-remote_function_f-(aka-int-(-)(void-)-)-converts-to-incompatible-function-type Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Cixi Geng Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 53f36bbaf0c6..8c5400fd227b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1222,6 +1222,11 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) return 0; } +static int perf_mux_hrtimer_restart_ipi(void *arg) +{ + return perf_mux_hrtimer_restart(arg); +} + void perf_pmu_disable(struct pmu *pmu) { int *count = this_cpu_ptr(pmu->pmu_disable_count); @@ -10772,8 +10777,7 @@ perf_event_mux_interval_ms_store(struct device *dev, cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); - cpu_function_call(cpu, - (remote_function_f)perf_mux_hrtimer_restart, cpuctx); + cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpuctx); } cpus_read_unlock(); mutex_unlock(&mux_interval_mutex); From 832a4d4cdb3f612d5079aadcc67a44f18fa7b5b7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Aug 2021 10:42:00 -0700 Subject: [PATCH 124/203] loop: Select I/O scheduler 'none' from inside add_disk() commit 2112f5c1330a671fa852051d85cb9eadc05d7eb7 upstream. We noticed that the user interface of Android devices becomes very slow under memory pressure. This is because Android uses the zram driver on top of the loop driver for swapping, because under memory pressure the swap code alternates reads and writes quickly, because mq-deadline is the default scheduler for loop devices and because mq-deadline delays writes by five seconds for such a workload with default settings. Fix this by making the kernel select I/O scheduler 'none' from inside add_disk() for loop devices. This default can be overridden at any time from user space, e.g. via a udev rule. This approach has an advantage compared to changing the I/O scheduler from userspace from 'mq-deadline' into 'none', namely that synchronize_rcu() does not get called. This patch changes the default I/O scheduler for loop devices from 'mq-deadline' into 'none'. Additionally, this patch reduces the Android boot time on my test setup with 0.5 seconds compared to configuring the loop I/O scheduler from user space. Cc: Christoph Hellwig Cc: Ming Lei Cc: Tetsuo Handa Cc: Martijn Coenen Cc: Jaegeuk Kim Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20210805174200.3250718-3-bvanassche@acm.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d86fbea54652..7444cc2a6c86 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2109,7 +2109,8 @@ static int loop_add(struct loop_device **l, int i) lo->tag_set.queue_depth = 128; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); - lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING; + lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | + BLK_MQ_F_NO_SCHED; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); From 94f8447ab8021f98867e04385a353ab8b87f6219 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 4 Jul 2023 09:48:00 -0400 Subject: [PATCH 125/203] arm64: dts: imx8mn-var-som: add missing pull-up for onboard PHY reset pinmux [ Upstream commit 253be5b53c2792fb4384f8005b05421e6f040ee3 ] For SOMs with an onboard PHY, the RESET_N pull-up resistor is currently deactivated in the pinmux configuration. When the pinmux code selects the GPIO function for this pin, with a default direction of input, this prevents the RESET_N pin from being taken to the proper 3.3V level (deasserted), and this results in the PHY being not detected since it is held in reset. Taken from RESET_N pin description in ADIN13000 datasheet: This pin requires a 1K pull-up resistor to AVDD_3P3. Activate the pull-up resistor to fix the issue. Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module") Signed-off-by: Hugo Villeneuve Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index 24f9e8fd0c8b..9c6c21cc6c6c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -351,7 +351,7 @@ MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91 MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91 MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f - MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19 + MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x159 >; }; From 4241cfc973ad8bc3d3a68b532b014e880c21b35f Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Tue, 1 Aug 2023 15:22:17 -0700 Subject: [PATCH 126/203] word-at-a-time: use the same return type for has_zero regardless of endianness [ Upstream commit 79e8328e5acbe691bbde029a52c89d70dcbc22f3 ] Compiling big-endian targets with Clang produces the diagnostic: fs/namei.c:2173:13: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ || fs/namei.c:2173:13: note: cast one or both operands to int to silence this warning It appears that when has_zero was introduced, two definitions were produced with different signatures (in particular different return types). Looking at the usage in hash_name() in fs/namei.c, I suspect that has_zero() is meant to be invoked twice per while loop iteration; using logical-or would not update `bdata` when `a` did not have zeros. So I think it's preferred to always return an unsigned long rather than a bool than update the while loop in hash_name() to use a logical-or rather than bitwise-or. [ Also changed powerpc version to do the same - Linus ] Link: https://github.com/ClangBuiltLinux/linux/issues/1832 Link: https://lore.kernel.org/lkml/20230801-bitwise-v1-1-799bec468dc4@google.com/ Fixes: 36126f8f2ed8 ("word-at-a-time: make the interfaces truly generic") Debugged-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Acked-by: Heiko Carstens Cc: Arnd Bergmann Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/word-at-a-time.h | 2 +- include/asm-generic/word-at-a-time.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index f3f4710d4ff5..99129b0cd8b8 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask) return leading_zero_bits >> 3; } -static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) { unsigned long rhs = val | c->low_bits; *data = rhs; diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index 20c93f08c993..95a1d214108a 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask) return (mask >> 8) ? byte : byte + 1; } -static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) { unsigned long rhs = val | c->low_bits; *data = rhs; From b4bdcbc0e39138a064c63ab7ffd5e3c962e881e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Jul 2023 20:29:39 +0200 Subject: [PATCH 127/203] KVM: s390: fix sthyi error handling [ Upstream commit 0c02cc576eac161601927b41634f80bfd55bfa9e ] Commit 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info") added cache handling for store hypervisor info. This also changed the possible return code for sthyi_fill(). Instead of only returning a condition code like the sthyi instruction would do, it can now also return a negative error value (-ENOMEM). handle_styhi() was not changed accordingly. In case of an error, the negative error value would incorrectly injected into the guest PSW. Add proper error handling to prevent this, and update the comment which describes the possible return values of sthyi_fill(). Fixes: 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info") Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20230727182939.2050744-1-hca@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/sthyi.c | 6 +++--- arch/s390/kvm/intercept.c | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 888cc2f166db..ce6084e28d90 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -460,9 +460,9 @@ static int sthyi_update_cache(u64 *rc) * * Fills the destination with system information returned by the STHYI * instruction. The data is generated by emulation or execution of STHYI, - * if available. The return value is the condition code that would be - * returned, the rc parameter is the return code which is passed in - * register R2 + 1. + * if available. The return value is either a negative error value or + * the condition code that would be returned, the rc parameter is the + * return code which is passed in register R2 + 1. */ int sthyi_fill(void *dst, u64 *rc) { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5be68190901f..8bf72a323e4f 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -387,8 +387,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) */ int handle_sthyi(struct kvm_vcpu *vcpu) { - int reg1, reg2, r = 0; - u64 code, addr, cc = 0, rc = 0; + int reg1, reg2, cc = 0, r = 0; + u64 code, addr, rc = 0; struct sthyi_sctns *sctns = NULL; if (!test_kvm_facility(vcpu->kvm, 74)) @@ -419,7 +419,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu) return -ENOMEM; cc = sthyi_fill(sctns, &rc); - + if (cc < 0) { + free_page((unsigned long)sctns); + return cc; + } out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { From ef3d0a732d69bb2fc7bdce159d7d4f259f0c7359 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sun, 23 Jul 2023 23:10:43 +0300 Subject: [PATCH 128/203] wifi: cfg80211: Fix return value in scan logic [ Upstream commit fd7f08d92fcd7cc3eca0dd6c853f722a4c6176df ] The reporter noticed a warning when running iwlwifi: WARNING: CPU: 8 PID: 659 at mm/page_alloc.c:4453 __alloc_pages+0x329/0x340 As cfg80211_parse_colocated_ap() is not expected to return a negative value return 0 and not a negative value if cfg80211_calc_short_ssid() fails. Fixes: c8cb5b854b40f ("nl80211/cfg80211: support 6 GHz scanning") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217675 Signed-off-by: Ilan Peer Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230723201043.3007430-1-ilan.peer@intel.com Signed-off-by: Sasha Levin --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 671c7f83d5fc..f59691936e5b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -641,7 +641,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp); if (ret) - return ret; + return 0; /* RNR IE may contain more than one NEIGHBOR_AP_INFO */ while (pos + sizeof(*ap_info) <= end) { From 165159854757dbae0dfd1812b27051da35aa6223 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 5 Jul 2023 20:15:27 +0800 Subject: [PATCH 129/203] net/mlx5: DR, fix memory leak in mlx5dr_cmd_create_reformat_ctx [ Upstream commit 5dd77585dd9d0e03dd1bceb95f0269a7eaf6b936 ] when mlx5_cmd_exec failed in mlx5dr_cmd_create_reformat_ctx, the memory pointed by 'in' is not released, which will cause memory leak. Move memory release after mlx5_cmd_exec. Fixes: 1d9186476e12 ("net/mlx5: DR, Add direct rule command utilities") Signed-off-by: Zhengchao Shao Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index fd56cae0d54f..4549840fb91a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -425,11 +425,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (err) - return err; + goto err_free_in; *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); - kvfree(in); +err_free_in: + kvfree(in); return err; } From 2cf67912078f9ecdd57fab008784fdf4ea41d772 Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Tue, 25 Jul 2023 14:56:55 +0800 Subject: [PATCH 130/203] net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer() [ Upstream commit e5bcb7564d3bd0c88613c76963c5349be9c511c5 ] mlx5e_ipsec_remove_trailer() should return an error code if function pskb_trim() returns an unexpected value. Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") Signed-off-by: Yuanjun Gong Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index a9b45606dbdb..76ef8a009d6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -121,7 +121,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) trailer_len = alen + plen + 2; - pskb_trim(skb, skb->len - trailer_len); + ret = pskb_trim(skb, skb->len - trailer_len); + if (unlikely(ret)) + return ret; if (skb->protocol == htons(ETH_P_IP)) { ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); ip_send_check(ipv4hdr); From 6b93c510684a802e58da9c04aa05ee3aad74d6ed Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 25 Jul 2023 10:33:30 +0800 Subject: [PATCH 131/203] bpf: Add length check for SK_DIAG_BPF_STORAGE_REQ_MAP_FD parsing [ Upstream commit bcc29b7f5af6797702c2306a7aacb831fc5ce9cb ] The nla_for_each_nested parsing in function bpf_sk_storage_diag_alloc does not check the length of the nested attribute. This can lead to an out-of-attribute read and allow a malformed nlattr (e.g., length 0) to be viewed as a 4 byte integer. This patch adds an additional check when the nlattr is getting counted. This makes sure the latter nla_get_u32 can access the attributes with the correct length. Fixes: 1ed4d92458a9 ("bpf: INET_DIAG support in bpf_sk_storage") Suggested-by: Jakub Kicinski Signed-off-by: Lin Ma Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230725023330.422856-1-linma@zju.edu.cn Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- net/core/bpf_sk_storage.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d67d06d6b817..a811fe0f0f6f 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -446,8 +446,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) return ERR_PTR(-EPERM); nla_for_each_nested(nla, nla_stgs, rem) { - if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) + if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) { + if (nla_len(nla) != sizeof(u32)) + return ERR_PTR(-EINVAL); nr_maps++; + } } diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps, From abb0172fa8dc4a4ec51aa992b7269ed65959f310 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 26 Jul 2023 15:53:14 +0800 Subject: [PATCH 132/203] rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length [ Upstream commit d73ef2d69c0dba5f5a1cb9600045c873bab1fb7f ] There are totally 9 ndo_bridge_setlink handlers in the current kernel, which are 1) bnxt_bridge_setlink, 2) be_ndo_bridge_setlink 3) i40e_ndo_bridge_setlink 4) ice_bridge_setlink 5) ixgbe_ndo_bridge_setlink 6) mlx5e_bridge_setlink 7) nfp_net_bridge_setlink 8) qeth_l2_bridge_setlink 9) br_setlink. By investigating the code, we find that 1-7 parse and use nlattr IFLA_BRIDGE_MODE but 3 and 4 forget to do the nla_len check. This can lead to an out-of-attribute read and allow a malformed nlattr (e.g., length 0) to be viewed as a 2 byte integer. To avoid such issues, also for other ndo_bridge_setlink handlers in the future. This patch adds the nla_len check in rtnl_bridge_setlink and does an early error return if length mismatches. To make it works, the break is removed from the parsing for IFLA_BRIDGE_FLAGS to make sure this nla_for_each_nested iterates every attribute. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Fixes: 51616018dd1b ("i40e: Add support for getlink, setlink ndo ops") Suggested-by: Jakub Kicinski Signed-off-by: Lin Ma Acked-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20230726075314.1059224-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d3c03ebf06a5..ce37a052b9c3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4897,13 +4897,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; have_flags = true; flags = nla_get_u16(attr); - break; + } + + if (nla_type(attr) == IFLA_BRIDGE_MODE) { + if (nla_len(attr) < sizeof(u16)) + return -EINVAL; } } } From eb8031b7ce0c5ce6869632058fad7345feb4328e Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Thu, 27 Jul 2023 01:05:06 +0800 Subject: [PATCH 133/203] net: dsa: fix value check in bcm_sf2_sw_probe() [ Upstream commit dadc5b86cc9459581f37fe755b431adc399ea393 ] in bcm_sf2_sw_probe(), check the return value of clk_prepare_enable() and return the error code if clk_prepare_enable() returns an unexpected value. Fixes: e9ec5c3bd238 ("net: dsa: bcm_sf2: request and handle clocks") Signed-off-by: Yuanjun Gong Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230726170506.16547-1-ruc_gongyuanjun@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/bcm_sf2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index c6563d212476..f2f890e559f3 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1301,7 +1301,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) if (IS_ERR(priv->clk)) return PTR_ERR(priv->clk); - clk_prepare_enable(priv->clk); + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv"); if (IS_ERR(priv->clk_mdiv)) { @@ -1309,7 +1311,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) goto out_clk; } - clk_prepare_enable(priv->clk_mdiv); + ret = clk_prepare_enable(priv->clk_mdiv); + if (ret) + goto out_clk; ret = bcm_sf2_sw_rst(priv); if (ret) { From e68929f11b19f6ecea4737d8ded616473bf0e32b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Georg=20M=C3=BCller?= Date: Fri, 28 Jul 2023 17:18:12 +0200 Subject: [PATCH 134/203] perf test uprobe_from_different_cu: Skip if there is no gcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 98ce8e4a9dcfb448b30a2d7a16190f4a00382377 ] Without gcc, the test will fail. On cleanup, ignore probe removal errors. Otherwise, in case of an error adding the probe, the temporary directory is not removed. Fixes: 56cbeacf14353057 ("perf probe: Add test for regression introduced by switch to die_get_decl_file()") Signed-off-by: Georg Müller Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Georg Müller Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230728151812.454806-2-georgmueller@gmx.net Link: https://lore.kernel.org/r/CAP-5=fUP6UuLgRty3t2=fQsQi3k4hDMz415vWdp1x88QMvZ8ug@mail.gmail.com/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh index 00d2e0e2e0c2..319f36ebb9a4 100644 --- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh @@ -4,6 +4,12 @@ set -e +# skip if there's no gcc +if ! [ -x "$(command -v gcc)" ]; then + echo "failed: no gcc compiler" + exit 2 +fi + temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) cleanup() @@ -11,7 +17,7 @@ cleanup() trap - EXIT TERM INT if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then echo "--- Cleaning up ---" - perf probe -x ${temp_dir}/testfile -d foo + perf probe -x ${temp_dir}/testfile -d foo || true rm -f "${temp_dir}/"* rmdir "${temp_dir}" fi From d163337bef2025da6e7ffa3ff2ce24c0a79c91b4 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 26 Jul 2023 09:51:51 -0400 Subject: [PATCH 135/203] net: sched: cls_u32: Fix match key mis-addressing [ Upstream commit e68409db995380d1badacba41ff24996bd396171 ] A match entry is uniquely identified with an "address" or "path" in the form of: hashtable ID(12b):bucketid(8b):nodeid(12b). When creating table match entries all of hash table id, bucket id and node (match entry id) are needed to be either specified by the user or reasonable in-kernel defaults are used. The in-kernel default for a table id is 0x800(omnipresent root table); for bucketid it is 0x0. Prior to this fix there was none for a nodeid i.e. the code assumed that the user passed the correct nodeid and if the user passes a nodeid of 0 (as Mingi Cho did) then that is what was used. But nodeid of 0 is reserved for identifying the table. This is not a problem until we dump. The dump code notices that the nodeid is zero and assumes it is referencing a table and therefore references table struct tc_u_hnode instead of what was created i.e match entry struct tc_u_knode. Ming does an equivalent of: tc filter add dev dummy0 parent 10: prio 1 handle 0x1000 \ protocol ip u32 match ip src 10.0.0.1/32 classid 10:1 action ok Essentially specifying a table id 0, bucketid 1 and nodeid of zero Tableid 0 is remapped to the default of 0x800. Bucketid 1 is ignored and defaults to 0x00. Nodeid was assumed to be what Ming passed - 0x000 dumping before fix shows: ~$ tc filter ls dev dummy0 parent 10: filter protocol ip pref 1 u32 chain 0 filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1 filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor -30591 Note that the last line reports a table instead of a match entry (you can tell this because it says "ht divisor..."). As a result of reporting the wrong data type (misinterpretting of struct tc_u_knode as being struct tc_u_hnode) the divisor is reported with value of -30591. Ming identified this as part of the heap address (physmap_base is 0xffff8880 (-30591 - 1)). The fix is to ensure that when table entry matches are added and no nodeid is specified (i.e nodeid == 0) then we get the next available nodeid from the table's pool. After the fix, this is what the dump shows: $ tc filter ls dev dummy0 parent 10: filter protocol ip pref 1 u32 chain 0 filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1 filter protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 10:1 not_in_hw match 0a000001/ffffffff at 12 action order 1: gact action pass random type none pass val 0 index 1 ref 1 bind 1 Reported-by: Mingi Cho Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20230726135151.416917-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_u32.c | 56 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 1ac8ff445a6d..04e933530cad 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -999,18 +999,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } + /* At this point, we need to derive the new handle that will be used to + * uniquely map the identity of this table match entry. The + * identity of the entry that we need to construct is 32 bits made of: + * htid(12b):bucketid(8b):node/entryid(12b) + * + * At this point _we have the table(ht)_ in which we will insert this + * entry. We carry the table's id in variable "htid". + * Note that earlier code picked the ht selection either by a) the user + * providing the htid specified via TCA_U32_HASH attribute or b) when + * no such attribute is passed then the root ht, is default to at ID + * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. + * If OTOH the user passed us the htid, they may also pass a bucketid of + * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is + * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be + * passed via the htid, so even if it was non-zero it will be ignored. + * + * We may also have a handle, if the user passed one. The handle also + * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). + * Rule: the bucketid on the handle is ignored even if one was passed; + * rather the value on "htid" is always assumed to be the bucketid. + */ if (handle) { + /* Rule: The htid from handle and tableid from htid must match */ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; } - handle = htid | TC_U32_NODE(handle); - err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, - GFP_KERNEL); - if (err) - return err; - } else + /* Ok, so far we have a valid htid(12b):bucketid(8b) but we + * need to finalize the table entry identification with the last + * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for + * entries. Rule: nodeid of 0 is reserved only for tables(see + * earlier code which processes TC_U32_DIVISOR attribute). + * Rule: The nodeid can only be derived from the handle (and not + * htid). + * Rule: if the handle specified zero for the node id example + * 0x60000000, then pick a new nodeid from the pool of IDs + * this hash table has been allocating from. + * If OTOH it is specified (i.e for example the user passed a + * handle such as 0x60000123), then we use it generate our final + * handle which is used to uniquely identify the match entry. + */ + if (!TC_U32_NODE(handle)) { + handle = gen_new_kid(ht, htid); + } else { + handle = htid | TC_U32_NODE(handle); + err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, + handle, GFP_KERNEL); + if (err) + return err; + } + } else { + /* The user did not give us a handle; lets just generate one + * from the table's pool of nodeids. + */ handle = gen_new_kid(ht, htid); + } if (tb[TCA_U32_SEL] == NULL) { NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); From 14db69381dd837828c421b5511753febd6f1c557 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Thu, 27 Jul 2023 08:56:19 +0000 Subject: [PATCH 136/203] mISDN: hfcpci: Fix potential deadlock on &hc->lock [ Upstream commit 56c6be35fcbed54279df0a2c9e60480a61841d6f ] As &hc->lock is acquired by both timer _hfcpci_softirq() and hardirq hfcpci_int(), the timer should disable irq before lock acquisition otherwise deadlock could happen if the timmer is preemtped by the hadr irq. Possible deadlock scenario: hfcpci_softirq() (timer) -> _hfcpci_softirq() -> spin_lock(&hc->lock); -> hfcpci_int() -> spin_lock(&hc->lock); (deadlock here) This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. The tentative patch fixes the potential deadlock by spin_lock_irq() in timer. Fixes: b36b654a7e82 ("mISDN: Create /sys/class/mISDN") Signed-off-by: Chengfeng Ye Link: https://lore.kernel.org/r/20230727085619.7419-1-dg573847474@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/isdn/hardware/mISDN/hfcpci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index eba58b99cd29..d6cf01c32a33 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch) *z1t = cpu_to_le16(new_z1); /* now send data */ if (bch->tx_idx < bch->tx_skb->len) return; - dev_kfree_skb(bch->tx_skb); + dev_kfree_skb_any(bch->tx_skb); if (get_next_bframe(bch)) goto next_t_frame; return; @@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch) } bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */ bz->f1 = new_f1; /* next frame */ - dev_kfree_skb(bch->tx_skb); + dev_kfree_skb_any(bch->tx_skb); get_next_bframe(bch); } @@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch) if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len) hfcpci_fill_fifo(bch); else { - dev_kfree_skb(bch->tx_skb); + dev_kfree_skb_any(bch->tx_skb); if (get_next_bframe(bch)) hfcpci_fill_fifo(bch); } @@ -2277,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused) return 0; if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) { - spin_lock(&hc->lock); + spin_lock_irq(&hc->lock); bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1); if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */ main_rec_hfcpci(bch); @@ -2288,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused) main_rec_hfcpci(bch); tx_birq(bch); } - spin_unlock(&hc->lock); + spin_unlock_irq(&hc->lock); } return 0; } From c7ebe08f4081fe69c719c5690c3211d125f625b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:10 +0000 Subject: [PATCH 137/203] net: annotate data-races around sk->sk_max_pacing_rate [ Upstream commit ea7f45ef77b39e72244d282e47f6cb1ef4135cd2 ] sk_getsockopt() runs locklessly. This means sk->sk_max_pacing_rate can be read while other threads are changing its value. Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 4e00c6e2cb43..c7ba7d82eb36 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1183,7 +1183,8 @@ set_sndbuf: cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); - sk->sk_max_pacing_rate = ulval; + /* Pairs with READ_ONCE() from sk_getsockopt() */ + WRITE_ONCE(sk->sk_max_pacing_rate, ulval); sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); break; } @@ -1551,12 +1552,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname, #endif case SO_MAX_PACING_RATE: + /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { lv = sizeof(v.ulval); - v.ulval = sk->sk_max_pacing_rate; + v.ulval = READ_ONCE(sk->sk_max_pacing_rate); } else { /* 32bit version */ - v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); + v.val = min_t(unsigned long, ~0U, + READ_ONCE(sk->sk_max_pacing_rate)); } break; From 427c611d846d9c5198f498d2f13ff940683606d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:11 +0000 Subject: [PATCH 138/203] net: add missing READ_ONCE(sk->sk_rcvlowat) annotation [ Upstream commit e6d12bdb435d23ff6c1890c852d85408a2f496ee ] In a prior commit, I forgot to change sk_getsockopt() when reading sk->sk_rcvlowat locklessly. Fixes: eac66402d1c3 ("net: annotate sk->sk_rcvlowat lockless reads") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index c7ba7d82eb36..66aab0981f66 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1423,7 +1423,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_RCVLOWAT: - v.val = sk->sk_rcvlowat; + v.val = READ_ONCE(sk->sk_rcvlowat); break; case SO_SNDLOWAT: From dc20f7bada00b550b3fd2855ec910a33460ed357 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:13 +0000 Subject: [PATCH 139/203] net: add missing READ_ONCE(sk->sk_sndbuf) annotation [ Upstream commit 74bc084327c643499474ba75df485607da37dd6e ] In a prior commit, I forgot to change sk_getsockopt() when reading sk->sk_sndbuf locklessly. Fixes: e292f05e0df7 ("tcp: annotate sk->sk_sndbuf lockless reads") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 66aab0981f66..6f422882d34f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1332,7 +1332,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_SNDBUF: - v.val = sk->sk_sndbuf; + v.val = READ_ONCE(sk->sk_sndbuf); break; case SO_RCVBUF: From 11e0590af33383ea3a745ae7452dea4f1f19eba2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:14 +0000 Subject: [PATCH 140/203] net: add missing READ_ONCE(sk->sk_rcvbuf) annotation [ Upstream commit b4b553253091cafe9ec38994acf42795e073bef5 ] In a prior commit, I forgot to change sk_getsockopt() when reading sk->sk_rcvbuf locklessly. Fixes: ebb3b78db7bf ("tcp: annotate sk->sk_rcvbuf lockless reads") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 6f422882d34f..708018b58e90 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1336,7 +1336,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_RCVBUF: - v.val = sk->sk_rcvbuf; + v.val = READ_ONCE(sk->sk_rcvbuf); break; case SO_REUSEADDR: From ad417bab9d5b0bb5d8c24423c8d80beb5858bd08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:16 +0000 Subject: [PATCH 141/203] net: add missing data-race annotations around sk->sk_peek_off [ Upstream commit 11695c6e966b0ec7ed1d16777d294cef865a5c91 ] sk_getsockopt() runs locklessly, thus we need to annotate the read of sk->sk_peek_off. While we are at it, add corresponding annotations to sk_set_peek_off() and unix_set_peek_off(). Fixes: b9bb53f3836f ("sock: convert sk_peek_offset functions to WRITE_ONCE") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 4 ++-- net/unix/af_unix.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 708018b58e90..a241734b1024 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1517,7 +1517,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, if (!sock->ops->set_peek_off) return -EOPNOTSUPP; - v.val = sk->sk_peek_off; + v.val = READ_ONCE(sk->sk_peek_off); break; case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); @@ -2745,7 +2745,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) { - sk->sk_peek_off = val; + WRITE_ONCE(sk->sk_peek_off, val); return 0; } EXPORT_SYMBOL_GPL(sk_set_peek_off); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2fe0efcbfed1..3aa783a23c5f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -697,7 +697,7 @@ static int unix_set_peek_off(struct sock *sk, int val) if (mutex_lock_interruptible(&u->iolock)) return -EINTR; - sk->sk_peek_off = val; + WRITE_ONCE(sk->sk_peek_off, val); mutex_unlock(&u->iolock); return 0; From 268b29ef1947718582936e5e6b3d851bafe19271 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jul 2023 15:03:17 +0000 Subject: [PATCH 142/203] net: add missing data-race annotation for sk_ll_usec [ Upstream commit e5f0d2dd3c2faa671711dac6d3ff3cef307bcfe3 ] In a prior commit I forgot that sk_getsockopt() reads sk->sk_ll_usec without holding a lock. Fixes: 0dbffbb5335a ("net: annotate data race around sk_ll_usec") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index a241734b1024..98f4b4a80de4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1547,7 +1547,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: - v.val = sk->sk_ll_usec; + v.val = READ_ONCE(sk->sk_ll_usec); break; #endif From b4256c99a7116c9514224847e8aaee2ecf110a0a Mon Sep 17 00:00:00 2001 From: valis Date: Sat, 29 Jul 2023 08:32:00 -0400 Subject: [PATCH 143/203] net/sched: cls_u32: No longer copy tcf_result on update to avoid use-after-free [ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ] When u32_change() is called on an existing filter, the whole tcf_result struct is always copied into the new instance of the filter. This causes a problem when updating a filter bound to a class, as tcf_unbind_filter() is always called on the old instance in the success path, decreasing filter_cnt of the still referenced class and allowing it to be deleted, leading to a use-after-free. Fix this by no longer copying the tcf_result struct from the old filter. Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers") Reported-by: valis Reported-by: M A Ramdhan Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Reviewed-by: M A Ramdhan Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_u32.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 04e933530cad..b2d2ba561eba 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -812,7 +812,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->ifindex = n->ifindex; new->fshift = n->fshift; - new->res = n->res; new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); From a8d478200b104ff356f51e1f63499fe46ba8c9b8 Mon Sep 17 00:00:00 2001 From: valis Date: Sat, 29 Jul 2023 08:32:01 -0400 Subject: [PATCH 144/203] net/sched: cls_fw: No longer copy tcf_result on update to avoid use-after-free [ Upstream commit 76e42ae831991c828cffa8c37736ebfb831ad5ec ] When fw_change() is called on an existing filter, the whole tcf_result struct is always copied into the new instance of the filter. This causes a problem when updating a filter bound to a class, as tcf_unbind_filter() is always called on the old instance in the success path, decreasing filter_cnt of the still referenced class and allowing it to be deleted, leading to a use-after-free. Fix this by no longer copying the tcf_result struct from the old filter. Fixes: e35a8ee5993b ("net: sched: fw use RCU") Reported-by: valis Reported-by: Bing-Jhong Billy Jheng Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Reviewed-by: M A Ramdhan Link: https://lore.kernel.org/r/20230729123202.72406-3-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_fw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 41f0898a5a56..08c41f1976c4 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -266,7 +266,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; fnew->id = f->id; - fnew->res = f->res; fnew->ifindex = f->ifindex; fnew->tp = f->tp; From aaa71c4e8ad98828ed50dde3eec8e0d545a117f7 Mon Sep 17 00:00:00 2001 From: valis Date: Sat, 29 Jul 2023 08:32:02 -0400 Subject: [PATCH 145/203] net/sched: cls_route: No longer copy tcf_result on update to avoid use-after-free [ Upstream commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 ] When route4_change() is called on an existing filter, the whole tcf_result struct is always copied into the new instance of the filter. This causes a problem when updating a filter bound to a class, as tcf_unbind_filter() is always called on the old instance in the success path, decreasing filter_cnt of the still referenced class and allowing it to be deleted, leading to a use-after-free. Fix this by no longer copying the tcf_result struct from the old filter. Fixes: 1109c00547fc ("net: sched: RCU cls_route") Reported-by: valis Reported-by: Bing-Jhong Billy Jheng Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Reviewed-by: M A Ramdhan Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_route.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index b775e681cb56..1ad4b3e60eb3 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -511,7 +511,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (fold) { f->id = fold->id; f->iif = fold->iif; - f->res = fold->res; f->handle = fold->handle; f->tp = fold->tp; From a0e42f4bd496760669c52c4937ac970479b17fe7 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Fri, 28 Jul 2023 08:44:11 +0200 Subject: [PATCH 146/203] bpf: sockmap: Remove preempt_disable in sock_map_sk_acquire [ Upstream commit 13d2618b48f15966d1adfe1ff6a1985f5eef40ba ] Disabling preemption in sock_map_sk_acquire conflicts with GFP_ATOMIC allocation later in sk_psock_init_link on PREEMPT_RT kernels, since GFP_ATOMIC might sleep on RT (see bpf: Make BPF and PREEMPT_RT co-exist patchset notes for details). This causes calling bpf_map_update_elem on BPF_MAP_TYPE_SOCKMAP maps to BUG (sleeping function called from invalid context) on RT kernels. preempt_disable was introduced together with lock_sk and rcu_read_lock in commit 99ba2b5aba24e ("bpf: sockhash, disallow bpf_tcp_close and update in parallel"), probably to match disabled migration of BPF programs, and is no longer necessary. Remove preempt_disable to fix BUG in sock_map_update_common on RT. Signed-off-by: Tomas Glozar Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/all/20200224140131.461979697@linutronix.de/ Fixes: 99ba2b5aba24 ("bpf: sockhash, disallow bpf_tcp_close and update in parallel") Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20230728064411.305576-1-tglozar@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/sock_map.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index ee5d3f49b0b5..f375ef150149 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -122,7 +122,6 @@ static void sock_map_sk_acquire(struct sock *sk) __acquires(&sk->sk_lock.slock) { lock_sock(sk); - preempt_disable(); rcu_read_lock(); } @@ -130,7 +129,6 @@ static void sock_map_sk_release(struct sock *sk) __releases(&sk->sk_lock.slock) { rcu_read_unlock(); - preempt_enable(); release_sock(sk); } From 20fe059c1d47bdb2603606cd909e37bf8066dfe2 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 15 Sep 2022 19:42:14 +0800 Subject: [PATCH 147/203] net: ll_temac: Switch to use dev_err_probe() helper [ Upstream commit 75ae8c284c00dc3584b7c173f6fcf96ee15bd02c ] dev_err() can be replace with dev_err_probe() which will check if error code is -EPROBE_DEFER. Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()") Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/ll_temac_main.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 130f4b707bdc..2f27e93370c6 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1550,16 +1550,12 @@ static int temac_probe(struct platform_device *pdev) } /* Error handle returned DMA RX and TX interrupts */ - if (lp->rx_irq < 0) { - if (lp->rx_irq != -EPROBE_DEFER) - dev_err(&pdev->dev, "could not get DMA RX irq\n"); - return lp->rx_irq; - } - if (lp->tx_irq < 0) { - if (lp->tx_irq != -EPROBE_DEFER) - dev_err(&pdev->dev, "could not get DMA TX irq\n"); - return lp->tx_irq; - } + if (lp->rx_irq < 0) + return dev_err_probe(&pdev->dev, lp->rx_irq, + "could not get DMA RX irq\n"); + if (lp->tx_irq < 0) + return dev_err_probe(&pdev->dev, lp->tx_irq, + "could not get DMA TX irq\n"); if (temac_np) { /* Retrieve the MAC address */ From 216092963ce5699a7540035ec8135f5bd4942207 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 31 Jul 2023 10:42:32 +0300 Subject: [PATCH 148/203] net: ll_temac: fix error checking of irq_of_parse_and_map() [ Upstream commit ef45e8400f5bb66b03cc949f76c80e2a118447de ] Most kernel functions return negative error codes but some irq functions return zero on error. In this code irq_of_parse_and_map(), returns zero and platform_get_irq() returns negative error codes. We need to handle both cases appropriately. Fixes: 8425c41d1ef7 ("net: ll_temac: Extend support to non-device-tree platforms") Signed-off-by: Dan Carpenter Acked-by: Esben Haabendal Reviewed-by: Yang Yingliang Reviewed-by: Harini Katakam Link: https://lore.kernel.org/r/3d0aef75-06e0-45a5-a2a6-2cc4738d4143@moroto.mountain Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/ll_temac_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 2f27e93370c6..da136abba152 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1550,12 +1550,16 @@ static int temac_probe(struct platform_device *pdev) } /* Error handle returned DMA RX and TX interrupts */ - if (lp->rx_irq < 0) - return dev_err_probe(&pdev->dev, lp->rx_irq, + if (lp->rx_irq <= 0) { + rc = lp->rx_irq ?: -EINVAL; + return dev_err_probe(&pdev->dev, rc, "could not get DMA RX irq\n"); - if (lp->tx_irq < 0) - return dev_err_probe(&pdev->dev, lp->tx_irq, + } + if (lp->tx_irq <= 0) { + rc = lp->tx_irq ?: -EINVAL; + return dev_err_probe(&pdev->dev, rc, "could not get DMA TX irq\n"); + } if (temac_np) { /* Retrieve the MAC address */ From 6d4f24736df9039c5174a80ec14e167d117d693f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 31 Jul 2023 11:48:32 +0100 Subject: [PATCH 149/203] net: netsec: Ignore 'phy-mode' on SynQuacer in DT mode [ Upstream commit f3bb7759a924713bc54d15f6d0d70733b5935fad ] As documented in acd7aaf51b20 ("netsec: ignore 'phy-mode' device property on ACPI systems") the SocioNext SynQuacer platform ships with firmware defining the PHY mode as RGMII even though the physical configuration of the PHY is for TX and RX delays. Since bbc4d71d63549bc ("net: phy: realtek: fix rtl8211e rx/tx delay config") this has caused misconfiguration of the PHY, rendering the network unusable. This was worked around for ACPI by ignoring the phy-mode property but the system is also used with DT. For DT instead if we're running on a SynQuacer force a working PHY mode, as well as the standard EDK2 firmware with DT there are also some of these systems that use u-boot and might not initialise the PHY if not netbooting. Newer firmware imagaes for at least EDK2 are available from Linaro so print a warning when doing this. Fixes: 533dd11a12f6 ("net: socionext: Add Synquacer NetSec driver") Signed-off-by: Mark Brown Acked-by: Ard Biesheuvel Acked-by: Ilias Apalodimas Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230731-synquacer-net-v3-1-944be5f06428@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/socionext/netsec.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index b9acee214bb6..bb8631558181 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1845,6 +1845,17 @@ static int netsec_of_probe(struct platform_device *pdev, return err; } + /* + * SynQuacer is physically configured with TX and RX delays + * but the standard firmware claimed otherwise for a long + * time, ignore it. + */ + if (of_machine_is_compatible("socionext,developer-box") && + priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) { + dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n"); + priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID; + } + priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!priv->phy_np) { dev_err(&pdev->dev, "missing required property 'phy-handle'\n"); From 8e309f43d0ca4051d20736c06a6f84bbddd881da Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 1 Aug 2023 09:32:48 +0800 Subject: [PATCH 150/203] net: dcb: choose correct policy to parse DCB_ATTR_BCN [ Upstream commit 31d49ba033095f6e8158c60f69714a500922e0c3 ] The dcbnl_bcn_setcfg uses erroneous policy to parse tb[DCB_ATTR_BCN], which is introduced in commit 859ee3c43812 ("DCB: Add support for DCB BCN"). Please see the comment in below code static int dcbnl_bcn_setcfg(...) { ... ret = nla_parse_nested_deprecated(..., dcbnl_pfc_up_nest, .. ) // !!! dcbnl_pfc_up_nest for attributes // DCB_PFC_UP_ATTR_0 to DCB_PFC_UP_ATTR_ALL in enum dcbnl_pfc_up_attrs ... for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { // !!! DCB_BCN_ATTR_RP_0 to DCB_BCN_ATTR_RP_7 in enum dcbnl_bcn_attrs ... value_byte = nla_get_u8(data[i]); ... } ... for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) { // !!! DCB_BCN_ATTR_BCNA_0 to DCB_BCN_ATTR_RI in enum dcbnl_bcn_attrs ... value_int = nla_get_u32(data[i]); ... } ... } That is, the nla_parse_nested_deprecated uses dcbnl_pfc_up_nest attributes to parse nlattr defined in dcbnl_pfc_up_attrs. But the following access code fetch each nlattr as dcbnl_bcn_attrs attributes. By looking up the associated nla_policy for dcbnl_bcn_attrs. We can find the beginning part of these two policies are "same". static const struct nla_policy dcbnl_pfc_up_nest[...] = { [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG}, }; static const struct nla_policy dcbnl_bcn_nest[...] = { [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_3] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_4] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_5] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG}, // from here is somewhat different [DCB_BCN_ATTR_BCNA_0] = {.type = NLA_U32}, ... [DCB_BCN_ATTR_ALL] = {.type = NLA_FLAG}, }; Therefore, the current code is buggy and this nla_parse_nested_deprecated could overflow the dcbnl_pfc_up_nest and use the adjacent nla_policy to parse attributes from DCB_BCN_ATTR_BCNA_0. Hence use the correct policy dcbnl_bcn_nest to parse the nested tb[DCB_ATTR_BCN] TLV. Fixes: 859ee3c43812 ("DCB: Add support for DCB BCN") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230801013248.87240-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 2535d3dfb92c..c0fb70936ca1 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -946,7 +946,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX, - tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest, NULL); if (ret) return ret; From c33d5a5c5b2c79326190885040f1643793c67b29 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 1 Aug 2023 10:00:16 +0200 Subject: [PATCH 151/203] s390/qeth: Don't call dev_close/dev_open (DOWN/UP) [ Upstream commit 1cfef80d4c2b2c599189f36f36320b205d9447d9 ] dev_close() and dev_open() are issued to change the interface state to DOWN or UP (dev->flags IFF_UP). When the netdev is set DOWN it loses e.g its Ipv6 addresses and routes. We don't want this in cases of device recovery (triggered by hardware or software) or when the qeth device is set offline. Setting a qeth device offline or online and device recovery actions call netif_device_detach() and/or netif_device_attach(). That will reset or set the LOWER_UP indication i.e. change the dev->state Bit __LINK_STATE_PRESENT. That is enough to e.g. cause bond failovers, and still preserves the interface settings that are handled by the network stack. Don't call dev_open() nor dev_close() from the qeth device driver. Let the network stack handle this. Fixes: d4560150cb47 ("s390/qeth: call dev_close() during recovery") Signed-off-by: Alexandra Winter Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core.h | 1 - drivers/s390/net/qeth_core_main.c | 2 -- drivers/s390/net/qeth_l2_main.c | 9 ++++++--- drivers/s390/net/qeth_l3_main.c | 8 +++++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index bf8404b0e74f..2544edd4d2b5 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -719,7 +719,6 @@ struct qeth_card_info { u16 chid; u8 ids_valid:1; /* cssid,iid,chid */ u8 dev_addr_is_registered:1; - u8 open_when_online:1; u8 promisc_mode:1; u8 use_v1_blkt:1; u8 is_vm_nic:1; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 7b0155b0e99e..73d564906d04 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5351,8 +5351,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, qeth_clear_ipacmd_list(card); rtnl_lock(); - card->info.open_when_online = card->dev->flags & IFF_UP; - dev_close(card->dev); netif_device_detach(card->dev); netif_carrier_off(card->dev); rtnl_unlock(); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index cfc931f2b7e2..1797addf69b6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2270,9 +2270,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok) qeth_enable_hw_features(dev); qeth_l2_enable_brport_features(card); - if (card->info.open_when_online) { - card->info.open_when_online = 0; - dev_open(dev, NULL); + if (netif_running(dev)) { + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); + qeth_l2_set_rx_mode(dev); } rtnl_unlock(); } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 291861c9b956..d8cdf9024126 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2037,9 +2037,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok) netif_device_attach(dev); qeth_enable_hw_features(dev); - if (card->info.open_when_online) { - card->info.open_when_online = 0; - dev_open(dev, NULL); + if (netif_running(dev)) { + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); } rtnl_unlock(); } From 1683124129a4263dd5bce2475bab110e95fa0346 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 1 Aug 2023 14:43:18 +0800 Subject: [PATCH 152/203] ip6mr: Fix skb_under_panic in ip6mr_cache_report() [ Upstream commit 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c ] skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4 head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:192! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:skb_panic+0x152/0x1d0 Call Trace: skb_push+0xc4/0xe0 ip6mr_cache_report+0xd69/0x19b0 reg_vif_xmit+0x406/0x690 dev_hard_start_xmit+0x17e/0x6e0 __dev_queue_xmit+0x2d6a/0x3d20 vlan_dev_hard_start_xmit+0x3ab/0x5c0 dev_hard_start_xmit+0x17e/0x6e0 __dev_queue_xmit+0x2d6a/0x3d20 neigh_connected_output+0x3ed/0x570 ip6_finish_output2+0x5b5/0x1950 ip6_finish_output+0x693/0x11c0 ip6_output+0x24b/0x880 NF_HOOK.constprop.0+0xfd/0x530 ndisc_send_skb+0x9db/0x1400 ndisc_send_rs+0x12a/0x6c0 addrconf_dad_completed+0x3c9/0xea0 addrconf_dad_work+0x849/0x1420 process_one_work+0xa22/0x16e0 worker_thread+0x679/0x10c0 ret_from_fork+0x28/0x60 ret_from_fork_asm+0x11/0x20 When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit(). reg_vif_xmit() ip6mr_cache_report() skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4 And skb_push declared as: void *skb_push(struct sk_buff *skb, unsigned int len); skb->data -= len; //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850 skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails. Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).") Signed-off-by: Yue Haibing Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 5f0ac47acc74..c758d0cc6146 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1069,7 +1069,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, And all this only to mangle msg->im6_msgtype and to set msg->im6_mbz to "mbz" :-) */ - skb_push(skb, -skb_network_offset(pkt)); + __skb_pull(skb, skb_network_offset(pkt)); skb_push(skb, sizeof(*msg)); skb_reset_transport_header(skb); From c650597647ecb318d02372277bdfd866c6829f78 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 31 Jul 2023 16:02:08 -0400 Subject: [PATCH 153/203] vxlan: Fix nexthop hash size [ Upstream commit 0756384fb1bd38adb2ebcfd1307422f433a1d772 ] The nexthop code expects a 31 bit hash, such as what is returned by fib_multipath_hash() and rt6_multipath_hash(). Passing the 32 bit hash returned by skb_get_hash() can lead to problems related to the fact that 'int hash' is a negative number when the MSB is set. In the case of hash threshold nexthop groups, nexthop_select_path_hthr() will disproportionately select the first nexthop group entry. In the case of resilient nexthop groups, nexthop_select_path_res() may do an out of bounds access in nh_buckets[], for example: hash = -912054133 num_nh_buckets = 2 bucket_index = 65535 which leads to the following panic: BUG: unable to handle page fault for address: ffffc900025910c8 PGD 100000067 P4D 100000067 PUD 10026b067 PMD 0 Oops: 0002 [#1] PREEMPT SMP KASAN NOPTI CPU: 4 PID: 856 Comm: kworker/4:3 Not tainted 6.5.0-rc2+ #34 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:nexthop_select_path+0x197/0xbf0 Code: c1 e4 05 be 08 00 00 00 4c 8b 35 a4 14 7e 01 4e 8d 6c 25 00 4a 8d 7c 25 08 48 01 dd e8 c2 25 15 ff 49 8d 7d 08 e8 39 13 15 ff <4d> 89 75 08 48 89 ef e8 7d 12 15 ff 48 8b 5d 00 e8 14 55 2f 00 85 RSP: 0018:ffff88810c36f260 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000002000c0 RCX: ffffffffaf02dd77 RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffffc900025910c8 RBP: ffffc900025910c0 R08: 0000000000000001 R09: fffff520004b2219 R10: ffffc900025910cf R11: 31392d2068736168 R12: 00000000002000c0 R13: ffffc900025910c0 R14: 00000000fffef608 R15: ffff88811840e900 FS: 0000000000000000(0000) GS:ffff8881f7000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc900025910c8 CR3: 0000000129d00000 CR4: 0000000000750ee0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x1ee/0x5c0 ? __pfx_is_prefetch.constprop.0+0x10/0x10 ? __pfx_page_fault_oops+0x10/0x10 ? search_bpf_extables+0xfe/0x1c0 ? fixup_exception+0x3b/0x470 ? exc_page_fault+0xf6/0x110 ? asm_exc_page_fault+0x26/0x30 ? nexthop_select_path+0x197/0xbf0 ? nexthop_select_path+0x197/0xbf0 ? lock_is_held_type+0xe7/0x140 vxlan_xmit+0x5b2/0x2340 ? __lock_acquire+0x92b/0x3370 ? __pfx_vxlan_xmit+0x10/0x10 ? __pfx___lock_acquire+0x10/0x10 ? __pfx_register_lock_class+0x10/0x10 ? skb_network_protocol+0xce/0x2d0 ? dev_hard_start_xmit+0xca/0x350 ? __pfx_vxlan_xmit+0x10/0x10 dev_hard_start_xmit+0xca/0x350 __dev_queue_xmit+0x513/0x1e20 ? __pfx___dev_queue_xmit+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? mark_held_locks+0x44/0x90 ? skb_push+0x4c/0x80 ? eth_header+0x81/0xe0 ? __pfx_eth_header+0x10/0x10 ? neigh_resolve_output+0x215/0x310 ? ip6_finish_output2+0x2ba/0xc90 ip6_finish_output2+0x2ba/0xc90 ? lock_release+0x236/0x3e0 ? ip6_mtu+0xbb/0x240 ? __pfx_ip6_finish_output2+0x10/0x10 ? find_held_lock+0x83/0xa0 ? lock_is_held_type+0xe7/0x140 ip6_finish_output+0x1ee/0x780 ip6_output+0x138/0x460 ? __pfx_ip6_output+0x10/0x10 ? __pfx___lock_acquire+0x10/0x10 ? __pfx_ip6_finish_output+0x10/0x10 NF_HOOK.constprop.0+0xc0/0x420 ? __pfx_NF_HOOK.constprop.0+0x10/0x10 ? ndisc_send_skb+0x2c0/0x960 ? __pfx_lock_release+0x10/0x10 ? __local_bh_enable_ip+0x93/0x110 ? lock_is_held_type+0xe7/0x140 ndisc_send_skb+0x4be/0x960 ? __pfx_ndisc_send_skb+0x10/0x10 ? mark_held_locks+0x65/0x90 ? find_held_lock+0x83/0xa0 ndisc_send_ns+0xb0/0x110 ? __pfx_ndisc_send_ns+0x10/0x10 addrconf_dad_work+0x631/0x8e0 ? lock_acquire+0x180/0x3f0 ? __pfx_addrconf_dad_work+0x10/0x10 ? mark_held_locks+0x24/0x90 process_one_work+0x582/0x9c0 ? __pfx_process_one_work+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 ? mark_held_locks+0x24/0x90 worker_thread+0x93/0x630 ? __kthread_parkme+0xdc/0x100 ? __pfx_worker_thread+0x10/0x10 kthread+0x1a5/0x1e0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 RIP: 0000:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: CR2: ffffc900025910c8 ---[ end trace 0000000000000000 ]--- RIP: 0010:nexthop_select_path+0x197/0xbf0 Code: c1 e4 05 be 08 00 00 00 4c 8b 35 a4 14 7e 01 4e 8d 6c 25 00 4a 8d 7c 25 08 48 01 dd e8 c2 25 15 ff 49 8d 7d 08 e8 39 13 15 ff <4d> 89 75 08 48 89 ef e8 7d 12 15 ff 48 8b 5d 00 e8 14 55 2f 00 85 RSP: 0018:ffff88810c36f260 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000002000c0 RCX: ffffffffaf02dd77 RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffffc900025910c8 RBP: ffffc900025910c0 R08: 0000000000000001 R09: fffff520004b2219 R10: ffffc900025910cf R11: 31392d2068736168 R12: 00000000002000c0 R13: ffffc900025910c0 R14: 00000000fffef608 R15: ffff88811840e900 FS: 0000000000000000(0000) GS:ffff8881f7000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 0000000129d00000 CR4: 0000000000750ee0 PKRU: 55555554 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x2ca00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fix this problem by ensuring the MSB of hash is 0 using a right shift - the same approach used in fib_multipath_hash() and rt6_multipath_hash(). Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries") Signed-off-by: Benjamin Poirier Reviewed-by: Ido Schimmel Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/vxlan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index cf1d870f7b9a..e149a0b6f9a3 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -497,12 +497,12 @@ static inline void vxlan_flag_attr_error(int attrtype, } static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, - int hash, + u32 hash, struct vxlan_rdst *rdst) { struct fib_nh_common *nhc; - nhc = nexthop_path_fdb_result(nh, hash); + nhc = nexthop_path_fdb_result(nh, hash >> 1); if (unlikely(!nhc)) return false; From 7a634336a03bd80b57e8914c0580543d5152c429 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 31 Jul 2023 14:58:40 +0300 Subject: [PATCH 154/203] net/mlx5: fs_core: Make find_closest_ft more generic [ Upstream commit 618d28a535a0582617465d14e05f3881736a2962 ] As find_closest_ft_recursive is called to find the closest FT, the first parameter of find_closest_ft can be changed from fs_prio to fs_node. Thus this function is extended to find the closest FT for the nodes of any type, not only prios, but also the sub namespaces. Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/d3962c2b443ec8dde7a740dc742a1f052d5e256c.1690803944.git.leonro@nvidia.com Signed-off-by: Jakub Kicinski Stable-dep-of: c635ca45a7a2 ("net/mlx5: fs_core: Skip the FTs in the same FS_TYPE_PRIO_CHAINS fs_prio") Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4bdcceffe9d3..692fe3e5d076 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -818,18 +818,17 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, return ft; } -/* If reverse is false then return the first flow table in next priority of - * prio in the tree, else return the last flow table in the previous priority - * of prio in the tree. +/* If reverse is false then return the first flow table next to the passed node + * in the tree, else return the last flow table before the node in the tree. */ -static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse) +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse) { struct mlx5_flow_table *ft = NULL; struct fs_node *curr_node; struct fs_node *parent; - parent = prio->node.parent; - curr_node = &prio->node; + parent = node->parent; + curr_node = node; while (!ft && parent) { ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); curr_node = parent; @@ -839,15 +838,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, false); + return find_closest_ft(node, false); } /* Assuming all the tree is locked by mutex chain lock */ -static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio) +static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) { - return find_closest_ft(prio, true); + return find_closest_ft(node, true); } static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, @@ -859,7 +858,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS; fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent); - return find_next_chained_ft(prio); + return find_next_chained_ft(&prio->node); } static int connect_fts_in_prio(struct mlx5_core_dev *dev, @@ -890,7 +889,7 @@ static int connect_prev_fts(struct mlx5_core_dev *dev, { struct mlx5_flow_table *prev_ft; - prev_ft = find_prev_chained_ft(prio); + prev_ft = find_prev_chained_ft(&prio->node); if (prev_ft) { struct fs_prio *prev_prio; @@ -1036,7 +1035,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table if (err) return err; - next_ft = first_ft ? first_ft : find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -1114,7 +1113,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = unmanaged ? ft_attr->next_ft : - find_next_chained_ft(fs_prio); + find_next_chained_ft(&fs_prio->node); ft->def_miss_action = ns->def_miss_action; ft->ns = ns; err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft); @@ -2079,7 +2078,7 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) if (!list_is_last(&ft->node.list, &prio->node.children)) return list_next_entry(ft, node.list); - return find_next_chained_ft(prio); + return find_next_chained_ft(&prio->node); } static int update_root_ft_destroy(struct mlx5_flow_table *ft) From 80e9488ece3db108bd1eed2f7d4a9372c1b46ed8 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 31 Jul 2023 14:58:41 +0300 Subject: [PATCH 155/203] net/mlx5: fs_core: Skip the FTs in the same FS_TYPE_PRIO_CHAINS fs_prio [ Upstream commit c635ca45a7a2023904a1f851e99319af7b87017d ] In the cited commit, new type of FS_TYPE_PRIO_CHAINS fs_prio was added to support multiple parallel namespaces for multi-chains. And we skip all the flow tables under the fs_node of this type unconditionally, when searching for the next or previous flow table to connect for a new table. As this search function is also used for find new root table when the old one is being deleted, it will skip the entire FS_TYPE_PRIO_CHAINS fs_node next to the old root. However, new root table should be chosen from it if there is any table in it. Fix it by skipping only the flow tables in the same FS_TYPE_PRIO_CHAINS fs_node when finding the closest FT for a fs_node. Besides, complete the connecting from FTs of previous priority of prio because there should be multiple prevs after this fs_prio type is introduced. And also the next FT should be chosen from the first flow table next to the prio in the same FS_TYPE_PRIO_CHAINS fs_prio, if this prio is the first child. Fixes: 328edb499f99 ("net/mlx5: Split FDB fast path prio to multiple namespaces") Signed-off-by: Jianbo Liu Reviewed-by: Paul Blakey Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/7a95754df479e722038996c97c97b062b372591f.1690803944.git.leonro@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 80 +++++++++++++++++-- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 692fe3e5d076..4e8e3797aed0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -802,7 +802,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root || root->type == FS_TYPE_PRIO_CHAINS) + if (!root) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -818,19 +818,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, return ft; } +static struct fs_node *find_prio_chains_parent(struct fs_node *parent, + struct fs_node **child) +{ + struct fs_node *node = NULL; + + while (parent && parent->type != FS_TYPE_PRIO_CHAINS) { + node = parent; + parent = parent->parent; + } + + if (child) + *child = node; + + return parent; +} + /* If reverse is false then return the first flow table next to the passed node * in the tree, else return the last flow table before the node in the tree. + * If skip is true, skip the flow tables in the same prio_chains prio. */ -static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse) +static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse, + bool skip) { + struct fs_node *prio_chains_parent = NULL; struct mlx5_flow_table *ft = NULL; struct fs_node *curr_node; struct fs_node *parent; + if (skip) + prio_chains_parent = find_prio_chains_parent(node, NULL); parent = node->parent; curr_node = node; while (!ft && parent) { - ft = find_closest_ft_recursive(parent, &curr_node->list, reverse); + if (parent != prio_chains_parent) + ft = find_closest_ft_recursive(parent, &curr_node->list, + reverse); curr_node = parent; parent = curr_node->parent; } @@ -840,13 +863,13 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool revers /* Assuming all the tree is locked by mutex chain lock */ static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node) { - return find_closest_ft(node, false); + return find_closest_ft(node, false, true); } /* Assuming all the tree is locked by mutex chain lock */ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node) { - return find_closest_ft(node, true); + return find_closest_ft(node, true, true); } static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft, @@ -882,21 +905,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev, return 0; } +static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node, + struct fs_node *parent, + struct fs_node **child, + bool reverse) +{ + struct mlx5_flow_table *ft; + + ft = find_closest_ft(node, reverse, false); + + if (ft && parent == find_prio_chains_parent(&ft->node, child)) + return ft; + + return NULL; +} + /* Connect flow tables from previous priority of prio to ft */ static int connect_prev_fts(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { + struct fs_node *prio_parent, *parent = NULL, *child, *node; struct mlx5_flow_table *prev_ft; + int err = 0; + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + /* return directly if not under the first sub ns of prio_chains prio */ + if (prio_parent && !list_is_first(&child->list, &prio_parent->children)) + return 0; prev_ft = find_prev_chained_ft(&prio->node); - if (prev_ft) { + while (prev_ft) { struct fs_prio *prev_prio; fs_get_obj(prev_prio, prev_ft->node.parent); - return connect_fts_in_prio(dev, prev_prio, ft); + err = connect_fts_in_prio(dev, prev_prio, ft); + if (err) + break; + + if (!parent) { + parent = find_prio_chains_parent(&prev_prio->node, &child); + if (!parent) + break; + } + + node = child; + prev_ft = find_closet_ft_prio_chains(node, parent, &child, true); } - return 0; + return err; } static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio @@ -2072,12 +2129,19 @@ EXPORT_SYMBOL(mlx5_del_flow_rules); /* Assuming prio->node.children(flow tables) is sorted by level */ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) { + struct fs_node *prio_parent, *child; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); if (!list_is_last(&ft->node.list, &prio->node.children)) return list_next_entry(ft, node.list); + + prio_parent = find_prio_chains_parent(&prio->node, &child); + + if (prio_parent && list_is_first(&child->list, &prio_parent->children)) + return find_closest_ft(&prio->node, false, false); + return find_next_chained_ft(&prio->node); } From 420aad608f7291cb768a32fe7c86884ffb2c1c70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2023 13:14:55 +0000 Subject: [PATCH 156/203] tcp_metrics: fix addr_same() helper [ Upstream commit e6638094d7af6c7b9dcca05ad009e79e31b4f670 ] Because v4 and v6 families use separate inetpeer trees (respectively net->ipv4.peers and net->ipv6.peers), inetpeer_addr_cmp(a, b) assumes a & b share the same family. tcp_metrics use a common hash table, where entries can have different families. We must therefore make sure to not call inetpeer_addr_cmp() if the families do not match. Fixes: d39d14ffa24c ("net: Add helper function to compare inetpeer addresses") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230802131500.1478140-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_metrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f3ca6eea2ca3..3fe7eb3bc84c 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -78,7 +78,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - return inetpeer_addr_cmp(a, b) == 0; + return (a->family == b->family) && !inetpeer_addr_cmp(a, b); } struct tcpm_hash_bucket { From 289091eef30fbf863c8872e19b5624ccb977bfaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2023 13:14:56 +0000 Subject: [PATCH 157/203] tcp_metrics: annotate data-races around tm->tcpm_stamp [ Upstream commit 949ad62a5d5311d36fce2e14fe5fed3f936da51c ] tm->tcpm_stamp can be read or written locklessly. Add needed READ_ONCE()/WRITE_ONCE() to document this. Also constify tcpm_check_stamp() dst argument. Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230802131500.1478140-3-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_metrics.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 3fe7eb3bc84c..20d2addddd16 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -97,7 +97,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, u32 msval; u32 val; - tm->tcpm_stamp = jiffies; + WRITE_ONCE(tm->tcpm_stamp, jiffies); val = 0; if (dst_metric_locked(dst, RTAX_RTT)) @@ -131,9 +131,15 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) -static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +static void tcpm_check_stamp(struct tcp_metrics_block *tm, + const struct dst_entry *dst) { - if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) + unsigned long limit; + + if (!tm) + return; + limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT; + if (unlikely(time_after(jiffies, limit))) tcpm_suck_dst(tm, dst, false); } @@ -174,7 +180,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, oldest = deref_locked(tcp_metrics_hash[hash].chain); for (tm = deref_locked(oldest->tcpm_next); tm; tm = deref_locked(tm->tcpm_next)) { - if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) + if (time_before(READ_ONCE(tm->tcpm_stamp), + READ_ONCE(oldest->tcpm_stamp))) oldest = tm; } tm = oldest; @@ -434,7 +441,7 @@ void tcp_update_metrics(struct sock *sk) tp->reordering); } } - tm->tcpm_stamp = jiffies; + WRITE_ONCE(tm->tcpm_stamp, jiffies); out_unlock: rcu_read_unlock(); } @@ -647,7 +654,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, } if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, - jiffies - tm->tcpm_stamp, + jiffies - READ_ONCE(tm->tcpm_stamp), TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; From 57bcbbb208a0e2630e4fe524d765a26f9c429d11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2023 13:14:57 +0000 Subject: [PATCH 158/203] tcp_metrics: annotate data-races around tm->tcpm_lock [ Upstream commit 285ce119a3c6c4502585936650143e54c8692788 ] tm->tcpm_lock can be read or written locklessly. Add needed READ_ONCE()/WRITE_ONCE() to document this. Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230802131500.1478140-4-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_metrics.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 20d2addddd16..6617cba19982 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -59,7 +59,8 @@ static inline struct net *tm_net(struct tcp_metrics_block *tm) static bool tcp_metric_locked(struct tcp_metrics_block *tm, enum tcp_metric_index idx) { - return tm->tcpm_lock & (1 << idx); + /* Paired with WRITE_ONCE() in tcpm_suck_dst() */ + return READ_ONCE(tm->tcpm_lock) & (1 << idx); } static u32 tcp_metric_get(struct tcp_metrics_block *tm, @@ -110,7 +111,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, val |= 1 << TCP_METRIC_CWND; if (dst_metric_locked(dst, RTAX_REORDERING)) val |= 1 << TCP_METRIC_REORDERING; - tm->tcpm_lock = val; + /* Paired with READ_ONCE() in tcp_metric_locked() */ + WRITE_ONCE(tm->tcpm_lock, val); msval = dst_metric_raw(dst, RTAX_RTT); tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; From 079afb1815043f3d048663abd1c8154d804fe524 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2023 13:14:58 +0000 Subject: [PATCH 159/203] tcp_metrics: annotate data-races around tm->tcpm_vals[] [ Upstream commit 8c4d04f6b443869d25e59822f7cec88d647028a9 ] tm->tcpm_vals[] values can be read or written locklessly. Add needed READ_ONCE()/WRITE_ONCE() to document this, and force use of tcp_metric_get() and tcp_metric_set() Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_metrics.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 6617cba19982..6ea833474feb 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -63,17 +63,19 @@ static bool tcp_metric_locked(struct tcp_metrics_block *tm, return READ_ONCE(tm->tcpm_lock) & (1 << idx); } -static u32 tcp_metric_get(struct tcp_metrics_block *tm, +static u32 tcp_metric_get(const struct tcp_metrics_block *tm, enum tcp_metric_index idx) { - return tm->tcpm_vals[idx]; + /* Paired with WRITE_ONCE() in tcp_metric_set() */ + return READ_ONCE(tm->tcpm_vals[idx]); } static void tcp_metric_set(struct tcp_metrics_block *tm, enum tcp_metric_index idx, u32 val) { - tm->tcpm_vals[idx] = val; + /* Paired with READ_ONCE() in tcp_metric_get() */ + WRITE_ONCE(tm->tcpm_vals[idx], val); } static bool addr_same(const struct inetpeer_addr *a, @@ -115,13 +117,16 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, WRITE_ONCE(tm->tcpm_lock, val); msval = dst_metric_raw(dst, RTAX_RTT); - tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC); msval = dst_metric_raw(dst, RTAX_RTTVAR); - tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; - tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); - tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); - tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); + tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC); + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + dst_metric_raw(dst, RTAX_SSTHRESH)); + tcp_metric_set(tm, TCP_METRIC_CWND, + dst_metric_raw(dst, RTAX_CWND)); + tcp_metric_set(tm, TCP_METRIC_REORDERING, + dst_metric_raw(dst, RTAX_REORDERING)); if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; @@ -667,7 +672,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, if (!nest) goto nla_put_failure; for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { - u32 val = tm->tcpm_vals[i]; + u32 val = tcp_metric_get(tm, i); if (!val) continue; From 05d1dc88c40f106df9cbc7ab465797fc18a06ec1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2023 13:14:59 +0000 Subject: [PATCH 160/203] tcp_metrics: annotate data-races around tm->tcpm_net [ Upstream commit d5d986ce42c71a7562d32c4e21e026b0f87befec ] tm->tcpm_net can be read or written locklessly. Instead of changing write_pnet() and read_pnet() and potentially hurt performance, add the needed READ_ONCE()/WRITE_ONCE() in tm_net() and tcpm_new(). Fixes: 849e8a0ca8d5 ("tcp_metrics: Add a field tcpm_net and verify it matches on lookup") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230802131500.1478140-6-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_metrics.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 6ea833474feb..b01cc3540748 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -40,7 +40,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; - possible_net_t tcpm_net; + struct net *tcpm_net; struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; @@ -51,9 +51,10 @@ struct tcp_metrics_block { struct rcu_head rcu_head; }; -static inline struct net *tm_net(struct tcp_metrics_block *tm) +static inline struct net *tm_net(const struct tcp_metrics_block *tm) { - return read_pnet(&tm->tcpm_net); + /* Paired with the WRITE_ONCE() in tcpm_new() */ + return READ_ONCE(tm->tcpm_net); } static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -197,7 +198,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, if (!tm) goto out_unlock; } - write_pnet(&tm->tcpm_net, net); + /* Paired with the READ_ONCE() in tm_net() */ + WRITE_ONCE(tm->tcpm_net, net); + tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; From 6d9f5f3d8920db72e82b5e216d1daa933d9d40d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2023 13:15:00 +0000 Subject: [PATCH 161/203] tcp_metrics: fix data-race in tcpm_suck_dst() vs fastopen [ Upstream commit ddf251fa2bc1d3699eec0bae6ed0bc373b8fda79 ] Whenever tcpm_new() reclaims an old entry, tcpm_suck_dst() would overwrite data that could be read from tcp_fastopen_cache_get() or tcp_metrics_fill_info(). We need to acquire fastopen_seqlock to maintain consistency. For newly allocated objects, tcpm_new() can switch to kzalloc() to avoid an extra fastopen_seqlock acquisition. Fixes: 1fe4c481ba63 ("net-tcp: Fast Open client - cookie cache") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230802131500.1478140-7-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_metrics.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b01cc3540748..a707fa1dbcaf 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -93,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; static unsigned int tcp_metrics_hash_log __read_mostly; static DEFINE_SPINLOCK(tcp_metrics_lock); +static DEFINE_SEQLOCK(fastopen_seqlock); static void tcpm_suck_dst(struct tcp_metrics_block *tm, const struct dst_entry *dst, @@ -129,11 +130,13 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, tcp_metric_set(tm, TCP_METRIC_REORDERING, dst_metric_raw(dst, RTAX_REORDERING)); if (fastopen_clear) { + write_seqlock(&fastopen_seqlock); tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; tm->tcpm_fastopen.try_exp = 0; tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.len = 0; + write_sequnlock(&fastopen_seqlock); } } @@ -194,7 +197,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, } tm = oldest; } else { - tm = kmalloc(sizeof(*tm), GFP_ATOMIC); + tm = kzalloc(sizeof(*tm), GFP_ATOMIC); if (!tm) goto out_unlock; } @@ -204,7 +207,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; - tcpm_suck_dst(tm, dst, true); + tcpm_suck_dst(tm, dst, reclaim); if (likely(!reclaim)) { tm->tcpm_next = tcp_metrics_hash[hash].chain; @@ -556,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) return ret; } -static DEFINE_SEQLOCK(fastopen_seqlock); - void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { From 181274d2f3de26cbf5fd66cc6987672d9048cb30 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Mon, 24 Jul 2023 16:51:56 +0200 Subject: [PATCH 162/203] scsi: zfcp: Defer fc_rport blocking until after ADISC response commit e65851989001c0c9ba9177564b13b38201c0854c upstream. Storage devices are free to send RSCNs, e.g. for internal state changes. If this happens on all connected paths, zfcp risks temporarily losing all paths at the same time. This has strong requirements on multipath configuration such as "no_path_retry queue". Avoid such situations by deferring fc_rport blocking until after the ADISC response, when any actual state change of the remote port became clear. The already existing port recovery triggers explicitly block the fc_rport. The triggers are: on ADISC reject or timeout (typical cable pull case), and on ADISC indicating that the remote port has changed its WWPN or the port is meanwhile no longer open. As a side effect, this also removes a confusing direct function call to another work item function zfcp_scsi_rport_work() instead of scheduling that other work item. It was probably done that way to have the rport block side effect immediate and synchronous to the caller. Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors") Cc: stable@vger.kernel.org #v2.6.30+ Reviewed-by: Benjamin Block Reviewed-by: Fedor Loshakov Signed-off-by: Steffen Maier Link: https://lore.kernel.org/r/20230724145156.3920244-1-maier@linux.ibm.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_fc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index b61acbb09be3..d323f9985c48 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data) /* re-init to undo drop from zfcp_fc_adisc() */ port->d_id = ntoh24(adisc_resp->adisc_port_id); - /* port is good, unblock rport without going through erp */ - zfcp_scsi_schedule_rport_register(port); + /* port is still good, nothing to do */ out: atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status); put_device(&port->dev); @@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work) int retval; set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */ - get_device(&port->dev); - port->rport_task = RPORT_DEL; - zfcp_scsi_rport_work(&port->rport_work); /* only issue one test command at one time per port */ if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST) From 513bfdde8a3b58cdfac37083c38f7b0100f72ae6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 1 Aug 2023 19:14:24 +0200 Subject: [PATCH 163/203] libceph: fix potential hang in ceph_osdc_notify() commit e6e2843230799230fc5deb8279728a7218b0d63c upstream. If the cluster becomes unavailable, ceph_osdc_notify() may hang even with osd_request_timeout option set because linger_notify_finish_wait() waits for MWatchNotify NOTIFY_COMPLETE message with no associated OSD request in flight -- it's completely asynchronous. Introduce an additional timeout, derived from the specified notify timeout. While at it, switch both waits to killable which is more correct. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang Reviewed-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1e9fab79e245..d594cd501861 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3330,17 +3330,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) int ret; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->reg_commit_wait); + ret = wait_for_completion_killable(&lreq->reg_commit_wait); return ret ?: lreq->reg_commit_error; } -static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq) +static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, + unsigned long timeout) { - int ret; + long left; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->notify_finish_wait); - return ret ?: lreq->notify_finish_error; + left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, + ceph_timeout_jiffies(timeout)); + if (left <= 0) + left = left ?: -ETIMEDOUT; + else + left = lreq->notify_finish_error; /* completed */ + + return left; } /* @@ -4888,7 +4895,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) - ret = linger_notify_finish_wait(lreq); + ret = linger_notify_finish_wait(lreq, + msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); else dout("lreq %p failed to initiate notify %d\n", lreq, ret); From b5b39ff6917f37a5a735c20a65c5106427270571 Mon Sep 17 00:00:00 2001 From: Ross Maynard Date: Mon, 31 Jul 2023 15:42:04 +1000 Subject: [PATCH 164/203] USB: zaurus: Add ID for A-300/B-500/C-700 commit b99225b4fe297d07400f9e2332ecd7347b224f8d upstream. The SL-A300, B500/5600, and C700 devices no longer auto-load because of "usbnet: Remove over-broad module alias from zaurus." This patch adds IDs for those 3 devices. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217632 Fixes: 16adf5d07987 ("usbnet: Remove over-broad module alias from zaurus.") Signed-off-by: Ross Maynard Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/69b5423b-2013-9fc9-9569-58e707d9bafb@bigpond.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 21 +++++++++++++++++++++ drivers/net/usb/zaurus.c | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 935cd296887f..9f3446d6dde7 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -604,9 +604,23 @@ static const struct usb_device_id products[] = { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, + .idProduct = 0x8005, /* A-300 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8006, /* B-500/SL-5600 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, @@ -614,6 +628,13 @@ static const struct usb_device_id products[] = { .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8007, /* C-700 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index 7984f2157d22..df3617c4c44e 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -289,9 +289,23 @@ static const struct usb_device_id products [] = { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x04DD, + .idProduct = 0x8005, /* A-300 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, .idProduct = 0x8006, /* B-500/SL-5600 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8006, /* B-500/SL-5600 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, @@ -299,6 +313,13 @@ static const struct usb_device_id products [] = { .idProduct = 0x8007, /* C-700 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x8007, /* C-700 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, From a4b2673e3c040bcd7cc8749e27607098aef9a2c2 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 25 Jul 2023 12:03:59 +0800 Subject: [PATCH 165/203] ceph: defer stopping mdsc delayed_work commit e7e607bd00481745550389a29ecabe33e13d67cf upstream. Flushing the dirty buffer may take a long time if the cluster is overloaded or if there is network issue. So we should ping the MDSs periodically to keep alive, else the MDS will blocklist the kclient. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61843 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 4 ++-- fs/ceph/mds_client.h | 5 +++++ fs/ceph/super.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 87a9e9096421..df1ecb8bfebf 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4511,7 +4511,7 @@ static void delayed_work(struct work_struct *work) dout("mdsc delayed_work\n"); - if (mdsc->stopping) + if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) return; mutex_lock(&mdsc->mutex); @@ -4701,7 +4701,7 @@ void send_flush_mdlog(struct ceph_mds_session *s) void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) { dout("pre_umount\n"); - mdsc->stopping = 1; + mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a92e42e8a9f8..1c958510f00f 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -372,6 +372,11 @@ struct cap_wait { int want; }; +enum { + CEPH_MDSC_STOPPING_BEGIN = 1, + CEPH_MDSC_STOPPING_FLUSHED = 2, +}; + /* * mds client state */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 08c8d34c9809..f2aff97348bc 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1222,6 +1222,16 @@ static void ceph_kill_sb(struct super_block *s) ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); + /* + * Though the kill_anon_super() will finally trigger the + * sync_filesystem() anyway, we still need to do it here + * and then bump the stage of shutdown to stop the work + * queue as earlier as possible. + */ + sync_filesystem(s); + + fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; + kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL; From 79d16a84ea41272dfcb0c00f9798ddd0edd8098d Mon Sep 17 00:00:00 2001 From: gaoming Date: Wed, 5 Jul 2023 15:15:15 +0800 Subject: [PATCH 166/203] exfat: use kvmalloc_array/kvfree instead of kmalloc_array/kfree commit daf60d6cca26e50d65dac374db92e58de745ad26 upstream. The call stack shown below is a scenario in the Linux 4.19 kernel. Allocating memory failed where exfat fs use kmalloc_array due to system memory fragmentation, while the u-disk was inserted without recognition. Devices such as u-disk using the exfat file system are pluggable and may be insert into the system at any time. However, long-term running systems cannot guarantee the continuity of physical memory. Therefore, it's necessary to address this issue. Binder:2632_6: page allocation failure: order:4, mode:0x6040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null) Call trace: [242178.097582] dump_backtrace+0x0/0x4 [242178.097589] dump_stack+0xf4/0x134 [242178.097598] warn_alloc+0xd8/0x144 [242178.097603] __alloc_pages_nodemask+0x1364/0x1384 [242178.097608] kmalloc_order+0x2c/0x510 [242178.097612] kmalloc_order_trace+0x40/0x16c [242178.097618] __kmalloc+0x360/0x408 [242178.097624] load_alloc_bitmap+0x160/0x284 [242178.097628] exfat_fill_super+0xa3c/0xe7c [242178.097635] mount_bdev+0x2e8/0x3a0 [242178.097638] exfat_fs_mount+0x40/0x50 [242178.097643] mount_fs+0x138/0x2e8 [242178.097649] vfs_kern_mount+0x90/0x270 [242178.097655] do_mount+0x798/0x173c [242178.097659] ksys_mount+0x114/0x1ac [242178.097665] __arm64_sys_mount+0x24/0x34 [242178.097671] el0_svc_common+0xb8/0x1b8 [242178.097676] el0_svc_handler+0x74/0x90 [242178.097681] el0_svc+0x8/0x340 By analyzing the exfat code,we found that continuous physical memory is not required here,so kvmalloc_array is used can solve this problem. Cc: stable@vger.kernel.org Signed-off-by: gaoming Signed-off-by: Namjae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/exfat/balloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 258b6bb5762a..ab091440e8b9 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, } sbi->map_sectors = ((need_map_size - 1) >> (sb->s_blocksize_bits)) + 1; - sbi->vol_amap = kmalloc_array(sbi->map_sectors, + sbi->vol_amap = kvmalloc_array(sbi->map_sectors, sizeof(struct buffer_head *), GFP_KERNEL); if (!sbi->vol_amap) return -ENOMEM; @@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb, while (j < i) brelse(sbi->vol_amap[j++]); - kfree(sbi->vol_amap); + kvfree(sbi->vol_amap); sbi->vol_amap = NULL; return -EIO; } @@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi) for (i = 0; i < sbi->map_sectors; i++) __brelse(sbi->vol_amap[i]); - kfree(sbi->vol_amap); + kvfree(sbi->vol_amap); } int exfat_set_bitmap(struct inode *inode, unsigned int clu) From ddf7cc7029738bd1c202fd9cd53f7b05e0b1a773 Mon Sep 17 00:00:00 2001 From: Sungjong Seo Date: Fri, 14 Jul 2023 17:43:54 +0900 Subject: [PATCH 167/203] exfat: release s_lock before calling dir_emit() commit ff84772fd45d486e4fc78c82e2f70ce5333543e6 upstream. There is a potential deadlock reported by syzbot as below: ====================================================== WARNING: possible circular locking dependency detected 6.4.0-next-20230707-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor330/5073 is trying to acquire lock: ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: mmap_read_lock_killable include/linux/mmap_lock.h:151 [inline] ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: get_mmap_lock_carefully mm/memory.c:5293 [inline] ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: lock_mm_and_find_vma+0x369/0x510 mm/memory.c:5344 but task is already holding lock: ffff888019f760e0 (&sbi->s_lock){+.+.}-{3:3}, at: exfat_iterate+0x117/0xb50 fs/exfat/dir.c:232 which lock already depends on the new lock. Chain exists of: &mm->mmap_lock --> mapping.invalidate_lock#3 --> &sbi->s_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sbi->s_lock); lock(mapping.invalidate_lock#3); lock(&sbi->s_lock); rlock(&mm->mmap_lock); Let's try to avoid above potential deadlock condition by moving dir_emit*() out of sbi->s_lock coverage. Fixes: ca06197382bd ("exfat: add directory operations") Cc: stable@vger.kernel.org #v5.7+ Reported-by: syzbot+1741a5d9b79989c10bdc@syzkaller.appspotmail.com Link: https://lore.kernel.org/lkml/00000000000078ee7e060066270b@google.com/T/#u Tested-by: syzbot+1741a5d9b79989c10bdc@syzkaller.appspotmail.com Signed-off-by: Sungjong Seo Signed-off-by: Namjae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/exfat/dir.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 6caded58cda5..093f79ae3c67 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -210,7 +210,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb) exfat_init_namebuf(nb); } -/* skip iterating emit_dots when dir is empty */ +/* + * Before calling dir_emit*(), sbi->s_lock should be released + * because page fault can occur in dir_emit*(). + */ #define ITER_POS_FILLED_DOTS (2) static int exfat_iterate(struct file *filp, struct dir_context *ctx) { @@ -225,11 +228,10 @@ static int exfat_iterate(struct file *filp, struct dir_context *ctx) int err = 0, fake_offset = 0; exfat_init_namebuf(nb); - mutex_lock(&EXFAT_SB(sb)->s_lock); cpos = ctx->pos; if (!dir_emit_dots(filp, ctx)) - goto unlock; + goto out; if (ctx->pos == ITER_POS_FILLED_DOTS) { cpos = 0; @@ -241,16 +243,18 @@ static int exfat_iterate(struct file *filp, struct dir_context *ctx) /* name buffer should be allocated before use */ err = exfat_alloc_namebuf(nb); if (err) - goto unlock; + goto out; get_new: + mutex_lock(&EXFAT_SB(sb)->s_lock); + if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode)) goto end_of_dir; err = exfat_readdir(inode, &cpos, &de); if (err) { /* - * At least we tried to read a sector. Move cpos to next sector - * position (should be aligned). + * At least we tried to read a sector. + * Move cpos to next sector position (should be aligned). */ if (err == -EIO) { cpos += 1 << (sb->s_blocksize_bits); @@ -273,16 +277,10 @@ get_new: inum = iunique(sb, EXFAT_ROOT_INO); } - /* - * Before calling dir_emit(), sb_lock should be released. - * Because page fault can occur in dir_emit() when the size - * of buffer given from user is larger than one page size. - */ mutex_unlock(&EXFAT_SB(sb)->s_lock); if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum, (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG)) - goto out_unlocked; - mutex_lock(&EXFAT_SB(sb)->s_lock); + goto out; ctx->pos = cpos; goto get_new; @@ -290,9 +288,8 @@ end_of_dir: if (!cpos && fake_offset) cpos = ITER_POS_FILLED_DOTS; ctx->pos = cpos; -unlock: mutex_unlock(&EXFAT_SB(sb)->s_lock); -out_unlocked: +out: /* * To improve performance, free namebuf after unlock sb_lock. * If namebuf is not allocated, this function do nothing From 82dc2bffeabc4fb40e911ca708d2f46efb3c95c0 Mon Sep 17 00:00:00 2001 From: Olivier Maignial Date: Fri, 23 Jun 2023 17:33:36 +0200 Subject: [PATCH 168/203] mtd: spinand: toshiba: Fix ecc_get_status commit 8544cda94dae6be3f1359539079c68bb731428b1 upstream. Reading ECC status is failing. tx58cxgxsxraix_ecc_get_status() is using on-stack buffer for SPINAND_GET_FEATURE_OP() output. It is not suitable for DMA needs of spi-mem. Fix this by using the spi-mem operations dedicated buffer spinand->scratchbuf. See spinand->scratchbuf: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/mtd/spinand.h?h=v6.3#n418 spi_mem_check_op(): https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/spi/spi-mem.c?h=v6.3#n199 Fixes: 10949af1681d ("mtd: spinand: Add initial support for Toshiba TC58CVG2S0H") Cc: stable@vger.kernel.org Signed-off-by: Olivier Maignial Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/DB4P250MB1032553D05FBE36DEE0D311EFE23A@DB4P250MB1032.EURP250.PROD.OUTLOOK.COM Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/spi/toshiba.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c index 6fe7bd2a94d2..daa49c060368 100644 --- a/drivers/mtd/nand/spi/toshiba.c +++ b/drivers/mtd/nand/spi/toshiba.c @@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, { struct nand_device *nand = spinand_to_nand(spinand); u8 mbf = 0; - struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf); + struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf); switch (status & STATUS_ECC_MASK) { case STATUS_ECC_NO_BITFLIPS: @@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, if (spi_mem_exec_op(spinand->spimem, &op)) return nanddev_get_ecc_requirements(nand)->strength; - mbf >>= 4; + mbf = *(spinand->scratchbuf) >> 4; if (WARN_ON(mbf > nanddev_get_ecc_requirements(nand)->strength || !mbf)) return nanddev_get_ecc_requirements(nand)->strength; From 8625802a1ba82062d0c0321a7aa6cc89f8ac4af2 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Wed, 5 Jul 2023 09:52:10 +0300 Subject: [PATCH 169/203] mtd: rawnand: meson: fix OOB available bytes for ECC commit 7e6b04f9238eab0f684fafd158c1f32ea65b9eaa upstream. It is incorrect to calculate number of OOB bytes for ECC engine using some "already known" ECC step size (1024 bytes here). Number of such bytes for ECC engine must be whole OOB except 2 bytes for bad block marker, while proper ECC step size and strength will be selected by ECC logic. Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Cc: Signed-off-by: Arseniy Krasnov Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230705065211.293500-1-AVKrasnov@sberdevices.ru Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/meson_nand.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index ee3976b7e197..6bb0fca4a91d 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -1180,7 +1180,6 @@ static int meson_nand_attach_chip(struct nand_chip *nand) struct meson_nfc *nfc = nand_get_controller_data(nand); struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand); struct mtd_info *mtd = nand_to_mtd(nand); - int nsectors = mtd->writesize / 1024; int ret; if (!mtd->name) { @@ -1198,7 +1197,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand) nand->options |= NAND_NO_SUBPAGE_WRITE; ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps, - mtd->oobsize - 2 * nsectors); + mtd->oobsize - 2); if (ret) { dev_err(nfc->dev, "failed to ECC init\n"); return -EINVAL; From 8d104bfd41a9b3dc524f6d05600003b3db09fdef Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Tue, 11 Jul 2023 15:44:30 -0500 Subject: [PATCH 170/203] arm64: dts: stratix10: fix incorrect I2C property for SCL signal commit db66795f61354c373ecdadbdae1ed253a96c47cb upstream. The correct dts property for the SCL falling time is "i2c-scl-falling-time-ns". Fixes: c8da1d15b8a4 ("arm64: dts: stratix10: i2c clock running out of spec") Cc: stable@vger.kernel.org Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts | 2 +- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index 46e558ab7729..f0e8af12442a 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -129,7 +129,7 @@ status = "okay"; clock-frequency = <100000>; i2c-sda-falling-time-ns = <890>; /* hcnt */ - i2c-sdl-falling-time-ns = <890>; /* lcnt */ + i2c-scl-falling-time-ns = <890>; /* lcnt */ adc@14 { compatible = "lltc,ltc2497"; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index f9b4a39683cf..92ac3c86ebd5 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -162,7 +162,7 @@ status = "okay"; clock-frequency = <100000>; i2c-sda-falling-time-ns = <890>; /* hcnt */ - i2c-sdl-falling-time-ns = <890>; /* lcnt */ + i2c-scl-falling-time-ns = <890>; /* lcnt */ adc@14 { compatible = "lltc,ltc2497"; From 5ea23f1cb67e4468db7ff651627892c9217fec24 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 31 Jul 2023 18:42:36 +0200 Subject: [PATCH 171/203] net: tun_chr_open(): set sk_uid from current_fsuid() commit 9bc3047374d5bec163e83e743709e23753376f0c upstream. Commit a096ccca6e50 initializes the "sk_uid" field in the protocol socket (struct sock) from the "/dev/net/tun" device node's owner UID. Per original commit 86741ec25462 ("net: core: Add a UID field to struct sock.", 2016-11-04), that's wrong: the idea is to cache the UID of the userspace process that creates the socket. Commit 86741ec25462 mentions socket() and accept(); with "tun", the action that creates the socket is open("/dev/net/tun"). Therefore the device node's owner UID is irrelevant. In most cases, "/dev/net/tun" will be owned by root, so in practice, commit a096ccca6e50 has no observable effect: - before, "sk_uid" would be zero, due to undefined behavior (CVE-2023-1076), - after, "sk_uid" would be zero, due to "/dev/net/tun" being owned by root. What matters is the (fs)UID of the process performing the open(), so cache that in "sk_uid". Cc: Eric Dumazet Cc: Lorenzo Colitti Cc: Paolo Abeni Cc: Pietro Borrello Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Fixes: a096ccca6e50 ("tun: tun_chr_open(): correctly initialize socket uid") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173435 Signed-off-by: Laszlo Ersek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f1d46aea8a2b..0e70877c932c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3457,7 +3457,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; - sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid); + sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid()); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; From 33a339e717be2c88b7ad11375165168d5b40e38e Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 31 Jul 2023 18:42:37 +0200 Subject: [PATCH 172/203] net: tap_open(): set sk_uid from current_fsuid() commit 5c9241f3ceab3257abe2923a59950db0dc8bb737 upstream. Commit 66b2c338adce initializes the "sk_uid" field in the protocol socket (struct sock) from the "/dev/tapX" device node's owner UID. Per original commit 86741ec25462 ("net: core: Add a UID field to struct sock.", 2016-11-04), that's wrong: the idea is to cache the UID of the userspace process that creates the socket. Commit 86741ec25462 mentions socket() and accept(); with "tap", the action that creates the socket is open("/dev/tapX"). Therefore the device node's owner UID is irrelevant. In most cases, "/dev/tapX" will be owned by root, so in practice, commit 66b2c338adce has no observable effect: - before, "sk_uid" would be zero, due to undefined behavior (CVE-2023-1076), - after, "sk_uid" would be zero, due to "/dev/tapX" being owned by root. What matters is the (fs)UID of the process performing the open(), so cache that in "sk_uid". Cc: Eric Dumazet Cc: Lorenzo Colitti Cc: Paolo Abeni Cc: Pietro Borrello Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 66b2c338adce ("tap: tap_open(): correctly initialize socket uid") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173435 Signed-off-by: Laszlo Ersek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f9b3eb2d8d8b..41ee56015a45 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -523,7 +523,7 @@ static int tap_open(struct inode *inode, struct file *file) q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; - sock_init_data_uid(&q->sock, &q->sk, inode->i_uid); + sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; From 3048cb0dc0cc9dc74ed93690dffef00733bcad5b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Jul 2023 10:42:06 +0200 Subject: [PATCH 173/203] bpf: Disable preemption in bpf_event_output commit d62cc390c2e99ae267ffe4b8d7e2e08b6c758c32 upstream. We received report [1] of kernel crash, which is caused by using nesting protection without disabled preemption. The bpf_event_output can be called by programs executed by bpf_prog_run_array_cg function that disabled migration but keeps preemption enabled. This can cause task to be preempted by another one inside the nesting protection and lead eventually to two tasks using same perf_sample_data buffer and cause crashes like: BUG: kernel NULL pointer dereference, address: 0000000000000001 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page ... ? perf_output_sample+0x12a/0x9a0 ? finish_task_switch.isra.0+0x81/0x280 ? perf_event_output+0x66/0xa0 ? bpf_event_output+0x13a/0x190 ? bpf_event_output_data+0x22/0x40 ? bpf_prog_dfc84bbde731b257_cil_sock4_connect+0x40a/0xacb ? xa_load+0x87/0xe0 ? __cgroup_bpf_run_filter_sock_addr+0xc1/0x1a0 ? release_sock+0x3e/0x90 ? sk_setsockopt+0x1a1/0x12f0 ? udp_pre_connect+0x36/0x50 ? inet_dgram_connect+0x93/0xa0 ? __sys_connect+0xb4/0xe0 ? udp_setsockopt+0x27/0x40 ? __pfx_udp_push_pending_frames+0x10/0x10 ? __sys_setsockopt+0xdf/0x1a0 ? __x64_sys_connect+0xf/0x20 ? do_syscall_64+0x3a/0x90 ? entry_SYSCALL_64_after_hwframe+0x72/0xdc Fixing this by disabling preemption in bpf_event_output. [1] https://github.com/cilium/cilium/issues/26756 Cc: stable@vger.kernel.org Reported-by: Oleg "livelace" Popov Closes: https://github.com/cilium/cilium/issues/26756 Fixes: 2a916f2f546c ("bpf: Use migrate_disable/enable in array macros and cgroup/lirc code.") Acked-by: Hou Tao Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20230725084206.580930-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/trace/bpf_trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9e90d1e7af2c..1de9a6bf8471 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -970,7 +970,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { - int nest_level = this_cpu_inc_return(bpf_event_output_nest_level); struct perf_raw_frag frag = { .copy = ctx_copy, .size = ctx_size, @@ -987,8 +986,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, }; struct perf_sample_data *sd; struct pt_regs *regs; + int nest_level; u64 ret; + preempt_disable(); + nest_level = this_cpu_inc_return(bpf_event_output_nest_level); + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) { ret = -EBUSY; goto out; @@ -1003,6 +1006,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, ret = __bpf_perf_event_output(regs, map, flags, sd); out: this_cpu_dec(bpf_event_output_nest_level); + preempt_enable(); return ret; } From c9c78b91c7834de6e4ae49392c3177608bcd47ab Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sun, 6 Aug 2023 02:11:58 +1000 Subject: [PATCH 174/203] open: make RESOLVE_CACHED correctly test for O_TMPFILE commit a0fc452a5d7fed986205539259df1d60546f536c upstream. O_TMPFILE is actually __O_TMPFILE|O_DIRECTORY. This means that the old fast-path check for RESOLVE_CACHED would reject all users passing O_DIRECTORY with -EAGAIN, when in fact the intended test was to check for __O_TMPFILE. Cc: stable@vger.kernel.org # v5.12+ Fixes: 99668f618062 ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED") Signed-off-by: Aleksa Sarai Message-Id: <20230806-resolve_cached-o_tmpfile-v1-1-7ba16308465e@cyphar.com> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/open.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/open.c b/fs/open.c index 1ca4b236fdbe..83f62cf1432c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1101,7 +1101,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) lookup_flags |= LOOKUP_IN_ROOT; if (how->resolve & RESOLVE_CACHED) { /* Don't bother even trying for create/truncate/tmpfile open */ - if (flags & (O_TRUNC | O_CREAT | O_TMPFILE)) + if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) return -EAGAIN; lookup_flags |= LOOKUP_CACHED; } From d39971d902d067b4dc366981b75b17c8c57ed5d1 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 24 Jul 2023 10:42:29 +0800 Subject: [PATCH 175/203] drm/ttm: check null pointer before accessing when swapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2dedcf414bb01b8d966eb445db1d181d92304fb2 upstream. Add a check to avoid null pointer dereference as below: [ 90.002283] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 90.002292] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 90.002346] ? exc_general_protection+0x159/0x240 [ 90.002352] ? asm_exc_general_protection+0x26/0x30 [ 90.002357] ? ttm_bo_evict_swapout_allowable+0x322/0x5e0 [ttm] [ 90.002365] ? ttm_bo_evict_swapout_allowable+0x42e/0x5e0 [ttm] [ 90.002373] ttm_bo_swapout+0x134/0x7f0 [ttm] [ 90.002383] ? __pfx_ttm_bo_swapout+0x10/0x10 [ttm] [ 90.002391] ? lock_acquire+0x44d/0x4f0 [ 90.002398] ? ttm_device_swapout+0xa5/0x260 [ttm] [ 90.002412] ? lock_acquired+0x355/0xa00 [ 90.002416] ? do_raw_spin_trylock+0xb6/0x190 [ 90.002421] ? __pfx_lock_acquired+0x10/0x10 [ 90.002426] ? ttm_global_swapout+0x25/0x210 [ttm] [ 90.002442] ttm_device_swapout+0x198/0x260 [ttm] [ 90.002456] ? __pfx_ttm_device_swapout+0x10/0x10 [ttm] [ 90.002472] ttm_global_swapout+0x75/0x210 [ttm] [ 90.002486] ttm_tt_populate+0x187/0x3f0 [ttm] [ 90.002501] ttm_bo_handle_move_mem+0x437/0x590 [ttm] [ 90.002517] ttm_bo_validate+0x275/0x430 [ttm] [ 90.002530] ? __pfx_ttm_bo_validate+0x10/0x10 [ttm] [ 90.002544] ? kasan_save_stack+0x33/0x60 [ 90.002550] ? kasan_set_track+0x25/0x30 [ 90.002554] ? __kasan_kmalloc+0x8f/0xa0 [ 90.002558] ? amdgpu_gtt_mgr_new+0x81/0x420 [amdgpu] [ 90.003023] ? ttm_resource_alloc+0xf6/0x220 [ttm] [ 90.003038] amdgpu_bo_pin_restricted+0x2dd/0x8b0 [amdgpu] [ 90.003210] ? __x64_sys_ioctl+0x131/0x1a0 [ 90.003210] ? do_syscall_64+0x60/0x90 Fixes: a2848d08742c ("drm/ttm: never consider pinned BOs for eviction&swap") Tested-by: Mikhail Gavrilov Signed-off-by: Guchun Chen Reviewed-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230724024229.1118444-1-guchun.chen@amd.com Signed-off-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index dca4dfdd332d..8fe3be20af62 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -674,7 +674,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, if (bo->pin_count) { *locked = false; - *busy = false; + if (busy) + *busy = false; return false; } From 4f86da9abe31636a43e64944a6a3d00fb2652c61 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 3 Aug 2023 11:35:53 -0700 Subject: [PATCH 176/203] file: reinstate f_pos locking optimization for regular files commit 797964253d358cf8d705614dda394dbe30120223 upstream. In commit 20ea1e7d13c1 ("file: always lock position for FMODE_ATOMIC_POS") we ended up always taking the file pos lock, because pidfd_getfd() could get a reference to the file even when it didn't have an elevated file count due to threading of other sharing cases. But Mateusz Guzik reports that the extra locking is actually measurable, so let's re-introduce the optimization, and only force the locking for directory traversal. Directories need the lock for correctness reasons, while regular files only need it for "POSIX semantics". Since pidfd_getfd() is about debuggers etc special things that are _way_ outside of POSIX, we can relax the rules for that case. Reported-by: Mateusz Guzik Cc: Christian Brauner Link: https://lore.kernel.org/linux-fsdevel/20230803095311.ijpvhx3fyrbkasul@f/ Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/file.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 2feeccec3f4d..d6bc73960e4a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1007,12 +1007,28 @@ unsigned long __fdget_raw(unsigned int fd) return __fget_light(fd, 0); } +/* + * Try to avoid f_pos locking. We only need it if the + * file is marked for FMODE_ATOMIC_POS, and it can be + * accessed multiple ways. + * + * Always do it for directories, because pidfd_getfd() + * can make a file accessible even if it otherwise would + * not be, and for directories this is a correctness + * issue, not a "POSIX requirement". + */ +static inline bool file_needs_f_pos_lock(struct file *file) +{ + return (file->f_mode & FMODE_ATOMIC_POS) && + (file_count(file) > 1 || S_ISDIR(file_inode(file)->i_mode)); +} + unsigned long __fdget_pos(unsigned int fd) { unsigned long v = __fdget(fd); struct file *file = (struct file *)(v & ~3); - if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file && file_needs_f_pos_lock(file)) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } From 426656e8dd03ded47112e5e1488f0975a1f0d806 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 8 Jul 2022 17:09:52 -0700 Subject: [PATCH 177/203] tracing: Fix sleeping while atomic in kdb ftdump commit 495fcec8648cdfb483b5b9ab310f3839f07cb3b8 upstream. If you drop into kdb and type "ftdump" you'll get a sleeping while atomic warning from memory allocation in trace_find_next_entry(). This appears to have been caused by commit ff895103a84a ("tracing: Save off entry when peeking at next entry"), which added the allocation in that path. The problematic commit was already fixed by commit 8e99cf91b99b ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") but that fix missed the kdb case. The fix here is easy: just move the assignment of the static buffer to the place where it should have been to begin with: trace_init_global_iter(). That function is called in two places, once is right before the assignment of the static buffer added by the previous fix and once is in kdb. Note that it appears that there's a second static buffer that we need to assign that was added in commit efbbdaa22bb7 ("tracing: Show real address for trace event arguments"), so we'll move that too. Link: https://lkml.kernel.org/r/20220708170919.1.I75844e5038d9425add2ad853a608cb44bb39df40@changeid Fixes: ff895103a84a ("tracing: Save off entry when peeking at next entry") Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bed792f06575..7e99319bd536 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9416,6 +9416,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -9449,11 +9455,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp and iter.fmt */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; - iter.fmt = static_fmt_buf; - iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); From 7f740bc696d4617f8ee44565e8ac0d36278a1e91 Mon Sep 17 00:00:00 2001 From: Prince Kumar Maurya Date: Tue, 30 May 2023 18:31:41 -0700 Subject: [PATCH 178/203] fs/sysv: Null check to prevent null-ptr-deref bug commit ea2b62f305893992156a798f665847e0663c9f41 upstream. sb_getblk(inode->i_sb, parent) return a null ptr and taking lock on that leads to the null-ptr-deref bug. Reported-by: syzbot+aad58150cbc64ba41bdc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=aad58150cbc64ba41bdc Signed-off-by: Prince Kumar Maurya Message-Id: <20230531013141.19487-1-princekumarmaurya06@gmail.com> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/sysv/itree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 31f66053e239..e3d1673b8ec9 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode, */ parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key); bh = sb_getblk(inode->i_sb, parent); + if (!bh) { + sysv_free_block(inode->i_sb, branch[n].key); + break; + } lock_buffer(bh); memset(bh->b_data, 0, blocksize); branch[n].bh = bh; From 06f87c96216bc5cd1094c23492274f77f1d5dd3b Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Wed, 31 May 2023 01:39:56 -0400 Subject: [PATCH 179/203] Bluetooth: L2CAP: Fix use-after-free in l2cap_sock_ready_cb commit 1728137b33c00d5a2b5110ed7aafb42e7c32e4a1 upstream. l2cap_sock_release(sk) frees sk. However, sk's children are still alive and point to the already free'd sk's address. To fix this, l2cap_sock_release(sk) also cleans sk's children. ================================================================== BUG: KASAN: use-after-free in l2cap_sock_ready_cb+0xb7/0x100 net/bluetooth/l2cap_sock.c:1650 Read of size 8 at addr ffff888104617aa8 by task kworker/u3:0/276 CPU: 0 PID: 276 Comm: kworker/u3:0 Not tainted 6.2.0-00001-gef397bd4d5fb-dirty #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: hci2 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x72/0x95 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x175/0x478 mm/kasan/report.c:417 kasan_report+0xb1/0x130 mm/kasan/report.c:517 l2cap_sock_ready_cb+0xb7/0x100 net/bluetooth/l2cap_sock.c:1650 l2cap_chan_ready+0x10e/0x1e0 net/bluetooth/l2cap_core.c:1386 l2cap_config_req+0x753/0x9f0 net/bluetooth/l2cap_core.c:4480 l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:5739 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:6509 [inline] l2cap_recv_frame+0xe2e/0x43c0 net/bluetooth/l2cap_core.c:7788 l2cap_recv_acldata+0x6ed/0x7e0 net/bluetooth/l2cap_core.c:8506 hci_acldata_packet net/bluetooth/hci_core.c:3813 [inline] hci_rx_work+0x66e/0xbc0 net/bluetooth/hci_core.c:4048 process_one_work+0x4ea/0x8e0 kernel/workqueue.c:2289 worker_thread+0x364/0x8e0 kernel/workqueue.c:2436 kthread+0x1b9/0x200 kernel/kthread.c:376 ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308 Allocated by task 288: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0x82/0x90 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0x140 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x113/0x1f0 net/core/sock.c:2040 sk_alloc+0x36/0x3c0 net/core/sock.c:2093 l2cap_sock_alloc.constprop.0+0x39/0x1c0 net/bluetooth/l2cap_sock.c:1852 l2cap_sock_create+0x10d/0x220 net/bluetooth/l2cap_sock.c:1898 bt_sock_create+0x183/0x290 net/bluetooth/af_bluetooth.c:132 __sock_create+0x226/0x380 net/socket.c:1518 sock_create net/socket.c:1569 [inline] __sys_socket_create net/socket.c:1606 [inline] __sys_socket_create net/socket.c:1591 [inline] __sys_socket+0x112/0x200 net/socket.c:1639 __do_sys_socket net/socket.c:1652 [inline] __se_sys_socket net/socket.c:1650 [inline] __x64_sys_socket+0x40/0x50 net/socket.c:1650 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 288: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x50 mm/kasan/generic.c:523 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free mm/kasan/common.c:200 [inline] __kasan_slab_free+0x10a/0x190 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook mm/slub.c:1807 [inline] slab_free mm/slub.c:3787 [inline] __kmem_cache_free+0x88/0x1f0 mm/slub.c:3800 sk_prot_free net/core/sock.c:2076 [inline] __sk_destruct+0x347/0x430 net/core/sock.c:2168 sk_destruct+0x9c/0xb0 net/core/sock.c:2183 __sk_free+0x82/0x220 net/core/sock.c:2194 sk_free+0x7c/0xa0 net/core/sock.c:2205 sock_put include/net/sock.h:1991 [inline] l2cap_sock_kill+0x256/0x2b0 net/bluetooth/l2cap_sock.c:1257 l2cap_sock_release+0x1a7/0x220 net/bluetooth/l2cap_sock.c:1428 __sock_release+0x80/0x150 net/socket.c:650 sock_close+0x19/0x30 net/socket.c:1368 __fput+0x17a/0x5c0 fs/file_table.c:320 task_work_run+0x132/0x1c0 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x21/0x50 kernel/entry/common.c:296 do_syscall_64+0x4c/0x90 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x72/0xdc The buggy address belongs to the object at ffff888104617800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 680 bytes inside of 1024-byte region [ffff888104617800, ffff888104617c00) The buggy address belongs to the physical page: page:00000000dbca6a80 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888104614000 pfn:0x104614 head:00000000dbca6a80 order:2 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 flags: 0x200000000010200(slab|head|node=0|zone=2) raw: 0200000000010200 ffff888100041dc0 ffffea0004212c10 ffffea0004234b10 raw: ffff888104614000 0000000000080002 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888104617980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888104617a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888104617a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888104617b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888104617b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Ack: This bug is found by FuzzBT with a modified Syzkaller. Other contributors are Ruoyu Wu and Hui Peng. Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a267c9b6bcef..756523e5402a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -45,6 +45,7 @@ static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern); +static void l2cap_sock_cleanup_listen(struct sock *parent); bool l2cap_is_socket(struct socket *sock) { @@ -1414,6 +1415,7 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + l2cap_sock_cleanup_listen(sk); bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, SHUT_RDWR); From ec0d0be41721aca683c5606354a58ee2c687e3f8 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 12 Jul 2023 10:15:10 -0400 Subject: [PATCH 180/203] net: usbnet: Fix WARNING in usbnet_start_xmit/usb_submit_urb commit 5e1627cb43ddf1b24b92eb26f8d958a3f5676ccb upstream. The syzbot fuzzer identified a problem in the usbnet driver: usb 1-1: BOGUS urb xfer, pipe 3 != type 1 WARNING: CPU: 0 PID: 754 at drivers/usb/core/urb.c:504 usb_submit_urb+0xed6/0x1880 drivers/usb/core/urb.c:504 Modules linked in: CPU: 0 PID: 754 Comm: kworker/0:2 Not tainted 6.4.0-rc7-syzkaller-00014-g692b7dc87ca6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 Workqueue: mld mld_ifc_work RIP: 0010:usb_submit_urb+0xed6/0x1880 drivers/usb/core/urb.c:504 Code: 7c 24 18 e8 2c b4 5b fb 48 8b 7c 24 18 e8 42 07 f0 fe 41 89 d8 44 89 e1 4c 89 ea 48 89 c6 48 c7 c7 a0 c9 fc 8a e8 5a 6f 23 fb <0f> 0b e9 58 f8 ff ff e8 fe b3 5b fb 48 81 c5 c0 05 00 00 e9 84 f7 RSP: 0018:ffffc9000463f568 EFLAGS: 00010086 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: ffff88801eb28000 RSI: ffffffff814c03b7 RDI: 0000000000000001 RBP: ffff8881443b7190 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000003 R13: ffff88802a77cb18 R14: 0000000000000003 R15: ffff888018262500 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556a99c15a18 CR3: 0000000028c71000 CR4: 0000000000350ef0 Call Trace: usbnet_start_xmit+0xfe5/0x2190 drivers/net/usb/usbnet.c:1453 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3578 [inline] dev_hard_start_xmit+0x187/0x700 net/core/dev.c:3594 ... This bug is caused by the fact that usbnet trusts the bulk endpoint addresses its probe routine receives in the driver_info structure, and it does not check to see that these endpoints actually exist and have the expected type and directions. The fix is simply to add such a check. Reported-and-tested-by: syzbot+63ee658b9a100ffadbe2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-usb/000000000000a56e9105d0cec021@google.com/ Signed-off-by: Alan Stern CC: Oliver Neukum Link: https://lore.kernel.org/r/ea152b6d-44df-4f8a-95c6-4db51143dcc1@rowland.harvard.edu Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/usbnet.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 43d70348343b..481a41d879b5 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1738,6 +1738,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) } else if (!info->in || !info->out) status = usbnet_get_endpoints (dev, udev); else { + u8 ep_addrs[3] = { + info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0 + }; + dev->in = usb_rcvbulkpipe (xdev, info->in); dev->out = usb_sndbulkpipe (xdev, info->out); if (!(info->flags & FLAG_NO_SETINT)) @@ -1747,6 +1751,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) else status = 0; + if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs)) + status = -EINVAL; } if (status >= 0 && dev->status) status = init_status (dev, udev); From 7e4e87ec56aa6d008c64eab31b340a7c452b26cc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Jun 2023 13:38:48 +0200 Subject: [PATCH 181/203] fs: Protect reconfiguration of sb read-write from racing writes commit c541dce86c537714b6761a79a969c1623dfa222b upstream. The reconfigure / remount code takes a lot of effort to protect filesystem's reconfiguration code from racing writes on remounting read-only. However during remounting read-only filesystem to read-write mode userspace writes can start immediately once we clear SB_RDONLY flag. This is inconvenient for example for ext4 because we need to do some writes to the filesystem (such as preparation of quota files) before we can take userspace writes so we are clearing SB_RDONLY flag before we are fully ready to accept userpace writes and syzbot has found a way to exploit this [1]. Also as far as I'm reading the code the filesystem remount code was protected from racing writes in the legacy mount path by the mount's MNT_READONLY flag so this is relatively new problem. It is actually fairly easy to protect remount read-write from racing writes using sb->s_readonly_remount flag so let's just do that instead of having to workaround these races in the filesystem code. [1] https://lore.kernel.org/all/00000000000006a0df05f6667499@google.com/T/ Signed-off-by: Jan Kara Message-Id: <20230615113848.8439-1-jack@suse.cz> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/super.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/super.c b/fs/super.c index 7629f9dd031c..f9795e72e3bf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -906,6 +906,7 @@ int reconfigure_super(struct fs_context *fc) struct super_block *sb = fc->root->d_sb; int retval; bool remount_ro = false; + bool remount_rw = false; bool force = fc->sb_flags & SB_FORCE; if (fc->sb_flags_mask & ~MS_RMT_MASK) @@ -922,7 +923,7 @@ int reconfigure_super(struct fs_context *fc) if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; #endif - + remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb); remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb); } @@ -952,6 +953,14 @@ int reconfigure_super(struct fs_context *fc) if (retval) return retval; } + } else if (remount_rw) { + /* + * We set s_readonly_remount here to protect filesystem's + * reconfigure code from writes from userspace until + * reconfigure finishes. + */ + sb->s_readonly_remount = 1; + smp_wmb(); } if (fc->ops->reconfigure) { From d328849fb63bfce7695245b932bc5f295bea5eb6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Jun 2023 12:25:52 +0200 Subject: [PATCH 182/203] ext2: Drop fragment support commit 404615d7f1dcd4cca200e9a7a9df3a1dcae1dd62 upstream. Ext2 has fields in superblock reserved for subblock allocation support. However that never landed. Drop the many years dead code. Reported-by: syzbot+af5e10f73dbff48f70af@syzkaller.appspotmail.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/ext2.h | 12 ------------ fs/ext2/super.c | 23 ++++------------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index f06367cfd764..fb2086bcbbf8 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -68,10 +68,7 @@ struct mb_cache; * second extended-fs super-block data in memory */ struct ext2_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ unsigned long s_blocks_per_group;/* Number of blocks in a group */ unsigned long s_inodes_per_group;/* Number of inodes in a group */ unsigned long s_itb_per_group; /* Number of inode table blocks per group */ @@ -185,15 +182,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) #define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size) #define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) -/* - * Macro-instructions used to manage fragments - */ -#define EXT2_MIN_FRAG_SIZE 1024 -#define EXT2_MAX_FRAG_SIZE 4096 -#define EXT2_MIN_FRAG_LOG_SIZE 10 -#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) -#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) - /* * Structure of a blocks group descriptor */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index ab01ec7ac48c..a810b9c9e8eb 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -673,10 +673,9 @@ static int ext2_setup_super (struct super_block * sb, es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); if (test_opt (sb, DEBUG)) - ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " + ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, " "bpg=%lu, ipg=%lu, mo=%04lx]", EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, - sbi->s_frag_size, sbi->s_groups_count, EXT2_BLOCKS_PER_GROUP(sb), EXT2_INODES_PER_GROUP(sb), @@ -1014,14 +1013,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } } - sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (sbi->s_frag_size == 0) - goto cantfind_ext2; - sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); @@ -1047,11 +1039,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (sb->s_blocksize != sbi->s_frag_size) { + if (es->s_log_frag_size != es->s_log_block_size) { ext2_msg(sb, KERN_ERR, - "error: fragsize %lu != blocksize %lu" - "(not supported yet)", - sbi->s_frag_size, sb->s_blocksize); + "error: fragsize log %u != blocksize log %u", + le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits); goto failed_mount; } @@ -1061,12 +1052,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group); goto failed_mount; } - if (sbi->s_frags_per_group > sb->s_blocksize * 8) { - ext2_msg(sb, KERN_ERR, - "error: #fragments per group too big: %lu", - sbi->s_frags_per_group); - goto failed_mount; - } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, From 027710952b539cb09350e0482e27d072a24416bf Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sun, 25 Jun 2023 00:10:21 +0530 Subject: [PATCH 183/203] mtd: rawnand: omap_elm: Fix incorrect type in assignment [ Upstream commit d8403b9eeee66d5dd81ecb9445800b108c267ce3 ] Once the ECC word endianness is converted to BE32, we force cast it to u32 so we can use elm_write_reg() which in turn uses writel(). Fixes below sparse warnings: drivers/mtd/nand/raw/omap_elm.c:180:37: sparse: expected unsigned int [usertype] val drivers/mtd/nand/raw/omap_elm.c:180:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:185:37: sparse: expected unsigned int [usertype] val drivers/mtd/nand/raw/omap_elm.c:185:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:190:37: sparse: expected unsigned int [usertype] val drivers/mtd/nand/raw/omap_elm.c:190:37: sparse: got restricted __be32 [usertype] >> drivers/mtd/nand/raw/omap_elm.c:200:40: sparse: sparse: restricted __be32 degrades to integer drivers/mtd/nand/raw/omap_elm.c:206:39: sparse: sparse: restricted __be32 degrades to integer drivers/mtd/nand/raw/omap_elm.c:210:37: sparse: expected unsigned int [assigned] [usertype] val drivers/mtd/nand/raw/omap_elm.c:210:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:213:37: sparse: expected unsigned int [assigned] [usertype] val drivers/mtd/nand/raw/omap_elm.c:213:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:216:37: sparse: expected unsigned int [assigned] [usertype] val drivers/mtd/nand/raw/omap_elm.c:216:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:219:37: sparse: expected unsigned int [assigned] [usertype] val drivers/mtd/nand/raw/omap_elm.c:219:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:222:37: sparse: expected unsigned int [assigned] [usertype] val drivers/mtd/nand/raw/omap_elm.c:222:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:225:37: sparse: expected unsigned int [assigned] [usertype] val drivers/mtd/nand/raw/omap_elm.c:225:37: sparse: got restricted __be32 [usertype] drivers/mtd/nand/raw/omap_elm.c:228:39: sparse: sparse: restricted __be32 degrades to integer Fixes: bf22433575ef ("mtd: devices: elm: Add support for ELM error correction") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306212211.WDXokuWh-lkp@intel.com/ Signed-off-by: Roger Quadros Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230624184021.7740-1-rogerq@kernel.org Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/omap_elm.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/nand/raw/omap_elm.c b/drivers/mtd/nand/raw/omap_elm.c index 4b799521a427..dad17fa0b514 100644 --- a/drivers/mtd/nand/raw/omap_elm.c +++ b/drivers/mtd/nand/raw/omap_elm.c @@ -174,17 +174,17 @@ static void elm_load_syndrome(struct elm_info *info, switch (info->bch_type) { case BCH8_ECC: /* syndrome fragment 0 = ecc[9-12B] */ - val = cpu_to_be32(*(u32 *) &ecc[9]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[9]); elm_write_reg(info, offset, val); /* syndrome fragment 1 = ecc[5-8B] */ offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[5]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[5]); elm_write_reg(info, offset, val); /* syndrome fragment 2 = ecc[1-4B] */ offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[1]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[1]); elm_write_reg(info, offset, val); /* syndrome fragment 3 = ecc[0B] */ @@ -194,35 +194,35 @@ static void elm_load_syndrome(struct elm_info *info, break; case BCH4_ECC: /* syndrome fragment 0 = ecc[20-52b] bits */ - val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) | + val = ((__force u32)cpu_to_be32(*(u32 *)&ecc[3]) >> 4) | ((ecc[2] & 0xf) << 28); elm_write_reg(info, offset, val); /* syndrome fragment 1 = ecc[0-20b] bits */ offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12; + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 12; elm_write_reg(info, offset, val); break; case BCH16_ECC: - val = cpu_to_be32(*(u32 *) &ecc[22]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[22]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[18]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[18]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[14]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[14]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[10]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[10]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[6]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[6]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[2]); + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[2]); elm_write_reg(info, offset, val); offset += 4; - val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16; + val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 16; elm_write_reg(info, offset, val); break; default: From 1f09d67d390647f83f8f9d26382b0daa43756e6f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 19 Jul 2023 23:55:01 +0200 Subject: [PATCH 184/203] mtd: rawnand: fsl_upm: Fix an off-by one test in fun_exec_op() [ Upstream commit c6abce60338aa2080973cd95be0aedad528bb41f ] 'op-cs' is copied in 'fun->mchip_number' which is used to access the 'mchip_offsets' and the 'rnb_gpio' arrays. These arrays have NAND_MAX_CHIPS elements, so the index must be below this limit. Fix the sanity check in order to avoid the NAND_MAX_CHIPS value. This would lead to out-of-bound accesses. Fixes: 54309d657767 ("mtd: rawnand: fsl_upm: Implement exec_op()") Signed-off-by: Christophe JAILLET Reviewed-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/cd01cba1c7eda58bdabaae174c78c067325803d2.1689803636.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/fsl_upm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c index d5813b9abc8e..9f934466dd97 100644 --- a/drivers/mtd/nand/raw/fsl_upm.c +++ b/drivers/mtd/nand/raw/fsl_upm.c @@ -136,7 +136,7 @@ static int fun_exec_op(struct nand_chip *chip, const struct nand_operation *op, unsigned int i; int ret; - if (op->cs > NAND_MAX_CHIPS) + if (op->cs >= NAND_MAX_CHIPS) return -EINVAL; if (check_only) From fbb6657037d4d396a320b945e14c5db721bc1c3a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 24 Jul 2023 23:43:20 +0530 Subject: [PATCH 185/203] powerpc/mm/altmap: Fix altmap boundary check [ Upstream commit 6722b25712054c0f903b839b8f5088438dd04df3 ] altmap->free includes the entire free space from which altmap blocks can be allocated. So when checking whether the kernel is doing altmap block free, compute the boundary correctly, otherwise memory hotunplug can fail. Fixes: 9ef34630a461 ("powerpc/mm: Fallback to RAM if the altmap is unusable") Signed-off-by: "Aneesh Kumar K.V" Reviewed-by: David Hildenbrand Signed-off-by: Michael Ellerman Link: https://msgid.link/20230724181320.471386-1-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/init_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index b76cd49d521b..db040f34c004 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -313,8 +313,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, start = ALIGN_DOWN(start, page_size); if (altmap) { alt_start = altmap->base_pfn; - alt_end = altmap->base_pfn + altmap->reserve + - altmap->free + altmap->alloc + altmap->align; + alt_end = altmap->base_pfn + altmap->reserve + altmap->free; } pr_debug("vmemmap_free %lx...%lx\n", start, end); From c08de20e70414091994aefa4c62f2bc500705c2b Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Tue, 14 Jun 2022 11:48:30 -0400 Subject: [PATCH 186/203] selftests/rseq: check if libc rseq support is registered [ Upstream commit d1a997ba4c1bf65497d956aea90de42a6398f73a ] When checking for libc rseq support in the library constructor, don't only depend on the symbols presence, check that the registration was completed. This targets a scenario where the libc has rseq support but it is not wired for the current architecture in 'bits/rseq.h', we want to fallback to our internal registration mechanism. Signed-off-by: Michael Jeanson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Desnoyers Link: https://lore.kernel.org/r/20220614154830.1367382-4-mjeanson@efficios.com Stable-dep-of: 3bcbc20942db ("selftests/rseq: Play nice with binaries statically linked against glibc 2.35+") Signed-off-by: Sasha Levin --- tools/testing/selftests/rseq/rseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 986b9458efb2..4177f9507bbe 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -111,7 +111,8 @@ void rseq_init(void) libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); - if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && + *libc_rseq_size_p != 0) { /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; rseq_size = *libc_rseq_size_p; From 173d9c7090db0f908f4851304f2949f1e8c4d76a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 21 Jul 2023 15:33:52 -0700 Subject: [PATCH 187/203] selftests/rseq: Play nice with binaries statically linked against glibc 2.35+ [ Upstream commit 3bcbc20942db5d738221cca31a928efc09827069 ] To allow running rseq and KVM's rseq selftests as statically linked binaries, initialize the various "trampoline" pointers to point directly at the expect glibc symbols, and skip the dlysm() lookups if the rseq size is non-zero, i.e. the binary is statically linked *and* the libc registered its own rseq. Define weak versions of the symbols so as not to break linking against libc versions that don't support rseq in any capacity. The KVM selftests in particular are often statically linked so that they can be run on targets with very limited runtime environments, i.e. test machines. Fixes: 233e667e1ae3 ("selftests/rseq: Uplift rseq selftests for compatibility with glibc-2.35") Cc: Aaron Lewis Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20230721223352.2333911-1-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- tools/testing/selftests/rseq/rseq.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 4177f9507bbe..b736a5169aad 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -32,9 +32,17 @@ #include "../kselftest.h" #include "rseq.h" -static const ptrdiff_t *libc_rseq_offset_p; -static const unsigned int *libc_rseq_size_p; -static const unsigned int *libc_rseq_flags_p; +/* + * Define weak versions to play nice with binaries that are statically linked + * against a libc that doesn't support registering its own rseq. + */ +__weak ptrdiff_t __rseq_offset; +__weak unsigned int __rseq_size; +__weak unsigned int __rseq_flags; + +static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; +static const unsigned int *libc_rseq_size_p = &__rseq_size; +static const unsigned int *libc_rseq_flags_p = &__rseq_flags; /* Offset from the thread pointer to the rseq area. */ ptrdiff_t rseq_offset; @@ -108,9 +116,17 @@ int rseq_unregister_current_thread(void) static __attribute__((constructor)) void rseq_init(void) { - libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); - libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); - libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + /* + * If the libc's registered rseq size isn't already valid, it may be + * because the binary is dynamically linked and not necessarily due to + * libc not having registered a restartable sequence. Try to find the + * symbols if that's the case. + */ + if (!*libc_rseq_size_p) { + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { /* rseq registration owned by glibc */ From 662735bc11279f474c8abfd6c8d4faeddd08f359 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 26 Jan 2021 16:54:02 +0800 Subject: [PATCH 188/203] soundwire: bus: add better dev_dbg to track complete() calls [ Upstream commit f1b690261247c9895f7a4b05d14a4026739134eb ] Add a dev_dbg() log for both enumeration and initialization completion to better track suspend-resume issues. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Chao Song Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20210126085402.4264-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul Stable-dep-of: c40d6b3249b1 ("soundwire: fix enumeration completion") Signed-off-by: Sasha Levin --- drivers/soundwire/bus.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 3317a02bcc17..9c7c8fa7f53e 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -797,7 +797,7 @@ static void sdw_modify_slave_status(struct sdw_slave *slave, if (status == SDW_SLAVE_UNATTACHED) { dev_dbg(&slave->dev, - "%s: initializing completion for Slave %d\n", + "%s: initializing enumeration and init completion for Slave %d\n", __func__, slave->dev_num); init_completion(&slave->enumeration_complete); @@ -806,7 +806,7 @@ static void sdw_modify_slave_status(struct sdw_slave *slave, } else if ((status == SDW_SLAVE_ATTACHED) && (slave->status == SDW_SLAVE_UNATTACHED)) { dev_dbg(&slave->dev, - "%s: signaling completion for Slave %d\n", + "%s: signaling enumeration completion for Slave %d\n", __func__, slave->dev_num); complete(&slave->enumeration_complete); @@ -1734,8 +1734,13 @@ int sdw_handle_slave_status(struct sdw_bus *bus, if (ret) dev_err(slave->bus->dev, "Update Slave status failed:%d\n", ret); - if (attached_initializing) + if (attached_initializing) { + dev_dbg(&slave->dev, + "%s: signaling initialization completion for Slave %d\n", + __func__, slave->dev_num); + complete(&slave->initialization_complete); + } } return ret; From 7d949774e7c1a93980db4961b898313ec09744c3 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 20 Apr 2022 10:32:41 +0800 Subject: [PATCH 189/203] soundwire: bus: pm_runtime_request_resume on peripheral attachment [ Upstream commit e557bca49b812908f380c56b5b4b2f273848b676 ] In typical use cases, the peripheral becomes pm_runtime active as a result of the ALSA/ASoC framework starting up a DAI. The parent/child hierarchy guarantees that the manager device will be fully resumed beforehand. There is however a corner case where the manager device may become pm_runtime active, but without ALSA/ASoC requesting any functionality from the peripherals. In this case, the hardware peripheral device will report as ATTACHED and its initialization routine will be executed. If this initialization routine initiates any sort of deferred processing, there is a possibility that the manager could suspend without the peripheral suspend sequence being invoked: from the pm_runtime framework perspective, the peripheral is *already* suspended. To avoid such disconnects between hardware state and pm_runtime state, this patch adds an asynchronous pm_request_resume() upon successful attach/initialization which will result in the proper resume/suspend sequence to be followed on the peripheral side. BugLink: https://github.com/thesofproject/linux/issues/3459 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20220420023241.14335-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul Stable-dep-of: c40d6b3249b1 ("soundwire: fix enumeration completion") Signed-off-by: Sasha Levin --- drivers/soundwire/bus.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 9c7c8fa7f53e..831d2d751d5d 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -1740,6 +1740,18 @@ int sdw_handle_slave_status(struct sdw_bus *bus, __func__, slave->dev_num); complete(&slave->initialization_complete); + + /* + * If the manager became pm_runtime active, the peripherals will be + * restarted and attach, but their pm_runtime status may remain + * suspended. If the 'update_slave_status' callback initiates + * any sort of deferred processing, this processing would not be + * cancelled on pm_runtime suspend. + * To avoid such zombie states, we queue a request to resume. + * This would be a no-op in case the peripheral was being resumed + * by e.g. the ALSA/ASoC framework. + */ + pm_request_resume(&slave->dev); } } From 48d1d0ce0782f995fda678508fdae35c5e9593f0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 5 Jul 2023 14:30:11 +0200 Subject: [PATCH 190/203] soundwire: fix enumeration completion [ Upstream commit c40d6b3249b11d60e09d81530588f56233d9aa44 ] The soundwire subsystem uses two completion structures that allow drivers to wait for soundwire device to become enumerated on the bus and initialised by their drivers, respectively. The code implementing the signalling is currently broken as it does not signal all current and future waiters and also uses the wrong reinitialisation function, which can potentially lead to memory corruption if there are still waiters on the queue. Not signalling future waiters specifically breaks sound card probe deferrals as codec drivers can not tell that the soundwire device is already attached when being reprobed. Some codec runtime PM implementations suffer from similar problems as waiting for enumeration during resume can also timeout despite the device already having been enumerated. Fixes: fb9469e54fa7 ("soundwire: bus: fix race condition with enumeration_complete signaling") Fixes: a90def068127 ("soundwire: bus: fix race condition with initialization_complete signaling") Cc: stable@vger.kernel.org # 5.7 Cc: Pierre-Louis Bossart Cc: Rander Wang Signed-off-by: Johan Hovold Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230705123018.30903-2-johan+linaro@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/soundwire/bus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 831d2d751d5d..0e0a19030c35 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -800,8 +800,8 @@ static void sdw_modify_slave_status(struct sdw_slave *slave, "%s: initializing enumeration and init completion for Slave %d\n", __func__, slave->dev_num); - init_completion(&slave->enumeration_complete); - init_completion(&slave->initialization_complete); + reinit_completion(&slave->enumeration_complete); + reinit_completion(&slave->initialization_complete); } else if ((status == SDW_SLAVE_ATTACHED) && (slave->status == SDW_SLAVE_UNATTACHED)) { @@ -809,7 +809,7 @@ static void sdw_modify_slave_status(struct sdw_slave *slave, "%s: signaling enumeration completion for Slave %d\n", __func__, slave->dev_num); - complete(&slave->enumeration_complete); + complete_all(&slave->enumeration_complete); } slave->status = status; mutex_unlock(&slave->bus->bus_lock); @@ -1739,7 +1739,7 @@ int sdw_handle_slave_status(struct sdw_bus *bus, "%s: signaling initialization completion for Slave %d\n", __func__, slave->dev_num); - complete(&slave->initialization_complete); + complete_all(&slave->initialization_complete); /* * If the manager became pm_runtime active, the peripherals will be From 9a320469add4e635545b26f82c94fea8aad5f618 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Mon, 25 Oct 2021 15:01:53 +0800 Subject: [PATCH 191/203] PM / wakeirq: support enabling wake-up irq after runtime_suspend called [ Upstream commit 259714100d98b50bf04d36a21bf50ca8b829fc11 ] When the dedicated wake IRQ is level trigger, and it uses the device's low-power status as the wakeup source, that means if the device is not in low-power state, the wake IRQ will be triggered if enabled; For this case, need enable the wake IRQ after running the device's ->runtime_suspend() which make it enter low-power state. e.g. Assume the wake IRQ is a low level trigger type, and the wakeup signal comes from the low-power status of the device. The wakeup signal is low level at running time (0), and becomes high level when the device enters low-power state (runtime_suspend (1) is called), a wakeup event at (2) make the device exit low-power state, then the wakeup signal also becomes low level. ------------------ | ^ ^| ---------------- | | -------------- |<---(0)--->|<--(1)--| (3) (2) (4) if enable the wake IRQ before running runtime_suspend during (0), a wake IRQ will arise, it causes resume immediately; it works if enable wake IRQ ( e.g. at (3) or (4)) after running ->runtime_suspend(). This patch introduces a new status WAKE_IRQ_DEDICATED_REVERSE to optionally support enabling wake IRQ after running ->runtime_suspend(). Suggested-by: Rafael J. Wysocki Signed-off-by: Chunfeng Yun Signed-off-by: Rafael J. Wysocki Stable-dep-of: 8527beb12087 ("PM: sleep: wakeirq: fix wake irq arming") Signed-off-by: Sasha Levin --- drivers/base/power/power.h | 7 ++- drivers/base/power/runtime.c | 6 ++- drivers/base/power/wakeirq.c | 101 +++++++++++++++++++++++++++-------- include/linux/pm_wakeirq.h | 9 +++- 4 files changed, 96 insertions(+), 27 deletions(-) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 54292cdd7808..0eb7f02b3ad5 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -25,8 +25,10 @@ extern u64 pm_runtime_active_time(struct device *dev); #define WAKE_IRQ_DEDICATED_ALLOCATED BIT(0) #define WAKE_IRQ_DEDICATED_MANAGED BIT(1) +#define WAKE_IRQ_DEDICATED_REVERSE BIT(2) #define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \ - WAKE_IRQ_DEDICATED_MANAGED) + WAKE_IRQ_DEDICATED_MANAGED | \ + WAKE_IRQ_DEDICATED_REVERSE) struct wake_irq { struct device *dev; @@ -39,7 +41,8 @@ extern void dev_pm_arm_wake_irq(struct wake_irq *wirq); extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq); extern void dev_pm_enable_wake_irq_check(struct device *dev, bool can_change_status); -extern void dev_pm_disable_wake_irq_check(struct device *dev); +extern void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable); +extern void dev_pm_enable_wake_irq_complete(struct device *dev); #ifdef CONFIG_PM_SLEEP diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 360094692d29..fbbc3ed143f2 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -675,6 +675,8 @@ static int rpm_suspend(struct device *dev, int rpmflags) if (retval) goto fail; + dev_pm_enable_wake_irq_complete(dev); + no_callback: __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); @@ -720,7 +722,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) return retval; fail: - dev_pm_disable_wake_irq_check(dev); + dev_pm_disable_wake_irq_check(dev, true); __update_runtime_status(dev, RPM_ACTIVE); dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); @@ -903,7 +905,7 @@ static int rpm_resume(struct device *dev, int rpmflags) callback = RPM_GET_CALLBACK(dev, runtime_resume); - dev_pm_disable_wake_irq_check(dev); + dev_pm_disable_wake_irq_check(dev, false); retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_SUSPENDED); diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 8e021082dba8..a6d53f0173d3 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -145,24 +145,7 @@ static irqreturn_t handle_threaded_wake_irq(int irq, void *_wirq) return IRQ_HANDLED; } -/** - * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt - * @dev: Device entry - * @irq: Device wake-up interrupt - * - * Unless your hardware has separate wake-up interrupts in addition - * to the device IO interrupts, you don't need this. - * - * Sets up a threaded interrupt handler for a device that has - * a dedicated wake-up interrupt in addition to the device IO - * interrupt. - * - * The interrupt starts disabled, and needs to be managed for - * the device by the bus code or the device driver using - * dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq() - * functions. - */ -int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) +static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag) { struct wake_irq *wirq; int err; @@ -200,7 +183,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) if (err) goto err_free_irq; - wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED; + wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED | flag; return err; @@ -213,8 +196,57 @@ err_free: return err; } + + +/** + * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt + * @dev: Device entry + * @irq: Device wake-up interrupt + * + * Unless your hardware has separate wake-up interrupts in addition + * to the device IO interrupts, you don't need this. + * + * Sets up a threaded interrupt handler for a device that has + * a dedicated wake-up interrupt in addition to the device IO + * interrupt. + * + * The interrupt starts disabled, and needs to be managed for + * the device by the bus code or the device driver using + * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*() + * functions. + */ +int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) +{ + return __dev_pm_set_dedicated_wake_irq(dev, irq, 0); +} EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq); +/** + * dev_pm_set_dedicated_wake_irq_reverse - Request a dedicated wake-up interrupt + * with reverse enable ordering + * @dev: Device entry + * @irq: Device wake-up interrupt + * + * Unless your hardware has separate wake-up interrupts in addition + * to the device IO interrupts, you don't need this. + * + * Sets up a threaded interrupt handler for a device that has a dedicated + * wake-up interrupt in addition to the device IO interrupt. It sets + * the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend() + * to enable dedicated wake-up interrupt after running the runtime suspend + * callback for @dev. + * + * The interrupt starts disabled, and needs to be managed for + * the device by the bus code or the device driver using + * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*() + * functions. + */ +int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq) +{ + return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE); +} +EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse); + /** * dev_pm_enable_wake_irq - Enable device wake-up interrupt * @dev: Device @@ -285,27 +317,54 @@ void dev_pm_enable_wake_irq_check(struct device *dev, return; enable: - enable_irq(wirq->irq); + if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) + enable_irq(wirq->irq); } /** * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt * @dev: Device + * @cond_disable: if set, also check WAKE_IRQ_DEDICATED_REVERSE * * Disables wake-up interrupt conditionally based on status. * Should be only called from rpm_suspend() and rpm_resume() path. */ -void dev_pm_disable_wake_irq_check(struct device *dev) +void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable) { struct wake_irq *wirq = dev->power.wakeirq; if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK)) return; + if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) + return; + if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) disable_irq_nosync(wirq->irq); } +/** + * dev_pm_enable_wake_irq_complete - enable wake IRQ not enabled before + * @dev: Device using the wake IRQ + * + * Enable wake IRQ conditionally based on status, mainly used if want to + * enable wake IRQ after running ->runtime_suspend() which depends on + * WAKE_IRQ_DEDICATED_REVERSE. + * + * Should be only called from rpm_suspend() path. + */ +void dev_pm_enable_wake_irq_complete(struct device *dev) +{ + struct wake_irq *wirq = dev->power.wakeirq; + + if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK)) + return; + + if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED && + wirq->status & WAKE_IRQ_DEDICATED_REVERSE) + enable_irq(wirq->irq); +} + /** * dev_pm_arm_wake_irq - Arm device wake-up * @wirq: Device wake-up interrupt diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index cd5b62db9084..e63a63aa47a3 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -17,8 +17,8 @@ #ifdef CONFIG_PM extern int dev_pm_set_wake_irq(struct device *dev, int irq); -extern int dev_pm_set_dedicated_wake_irq(struct device *dev, - int irq); +extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq); +extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq); extern void dev_pm_clear_wake_irq(struct device *dev); extern void dev_pm_enable_wake_irq(struct device *dev); extern void dev_pm_disable_wake_irq(struct device *dev); @@ -35,6 +35,11 @@ static inline int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) return 0; } +static inline int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq) +{ + return 0; +} + static inline void dev_pm_clear_wake_irq(struct device *dev) { } From 86e4e949ea815dbd517c8f96fe06a1f2b770085a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 13 Jul 2023 16:57:39 +0200 Subject: [PATCH 192/203] PM: sleep: wakeirq: fix wake irq arming [ Upstream commit 8527beb12087238d4387607597b4020bc393c4b4 ] The decision whether to enable a wake irq during suspend can not be done based on the runtime PM state directly as a driver may use wake irqs without implementing runtime PM. Such drivers specifically leave the state set to the default 'suspended' and the wake irq is thus never enabled at suspend. Add a new wake irq flag to track whether a dedicated wake irq has been enabled at runtime suspend and therefore must not be enabled at system suspend. Note that pm_runtime_enabled() can not be used as runtime PM is always disabled during late suspend. Fixes: 69728051f5bf ("PM / wakeirq: Fix unbalanced IRQ enable for wakeirq") Cc: 4.16+ # 4.16+ Signed-off-by: Johan Hovold Reviewed-by: Tony Lindgren Tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/power.h | 1 + drivers/base/power/wakeirq.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 0eb7f02b3ad5..922ed457db19 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -29,6 +29,7 @@ extern u64 pm_runtime_active_time(struct device *dev); #define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \ WAKE_IRQ_DEDICATED_MANAGED | \ WAKE_IRQ_DEDICATED_REVERSE) +#define WAKE_IRQ_DEDICATED_ENABLED BIT(3) struct wake_irq { struct device *dev; diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index a6d53f0173d3..aea690c64e39 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -317,8 +317,10 @@ void dev_pm_enable_wake_irq_check(struct device *dev, return; enable: - if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) + if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) { enable_irq(wirq->irq); + wirq->status |= WAKE_IRQ_DEDICATED_ENABLED; + } } /** @@ -339,8 +341,10 @@ void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable) if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) return; - if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) + if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) { + wirq->status &= ~WAKE_IRQ_DEDICATED_ENABLED; disable_irq_nosync(wirq->irq); + } } /** @@ -379,7 +383,7 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq) if (device_may_wakeup(wirq->dev)) { if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED && - !pm_runtime_status_suspended(wirq->dev)) + !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED)) enable_irq(wirq->irq); enable_irq_wake(wirq->irq); @@ -402,7 +406,7 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq) disable_irq_wake(wirq->irq); if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED && - !pm_runtime_status_suspended(wirq->dev)) + !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED)) disable_irq_nosync(wirq->irq); } } From e0d192a4023ef6839f95e320fb1d9accc3ff66e5 Mon Sep 17 00:00:00 2001 From: Hyeongseok Kim Date: Mon, 22 Mar 2021 12:53:36 +0900 Subject: [PATCH 193/203] exfat: speed up iterate/lookup by fixing start point of traversing cluster chain [ Upstream commit c6e2f52e3051e8d898d38840104638ca8bbcdec2 ] When directory iterate and lookup is called, there's a buggy rewinding of start point for traversing cluster chain to the parent directory entry's first cluster. This caused repeated cluster chain traversing from the first entry of the parent directory that would show worse performance if huge amounts of files exist under the parent directory. Fix not to rewind, make continue from currently referenced cluster and dir entry. Tested with 50,000 files under single directory / 256GB sdcard, with command "time ls -l > /dev/null", Before : 0m08.69s real 0m00.27s user 0m05.91s system After : 0m07.01s real 0m00.25s user 0m04.34s system Signed-off-by: Hyeongseok Kim Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Stable-dep-of: d42334578eba ("exfat: check if filename entries exceeds max filename length") Signed-off-by: Sasha Levin --- fs/exfat/dir.c | 19 +++++++++++++------ fs/exfat/exfat_fs.h | 2 +- fs/exfat/namei.c | 9 ++++++++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 093f79ae3c67..0e1886f9a624 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -148,7 +148,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent 0); *uni_name.name = 0x0; - exfat_get_uniname_from_ext_entry(sb, &dir, dentry, + exfat_get_uniname_from_ext_entry(sb, &clu, i, uni_name.name); exfat_utf16_to_nls(sb, &uni_name, dir_entry->namebuf.lfn, @@ -902,14 +902,19 @@ enum { }; /* - * return values: - * >= 0 : return dir entiry position with the name in dir - * -ENOENT : entry with the name does not exist - * -EIO : I/O error + * @ei: inode info of parent directory + * @p_dir: directory structure of parent directory + * @num_entries:entry size of p_uniname + * @hint_opt: If p_uniname is found, filled with optimized dir/entry + * for traversing cluster chain. + * @return: + * >= 0: file directory entry position where the name exists + * -ENOENT: entry with the name does not exist + * -EIO: I/O error */ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, - int num_entries, unsigned int type) + int num_entries, unsigned int type, struct exfat_hint *hint_opt) { int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len; int order, step, name_len = 0; @@ -986,6 +991,8 @@ rewind: if (entry_type == TYPE_FILE || entry_type == TYPE_DIR) { step = DIRENT_STEP_FILE; + hint_opt->clu = clu.dir; + hint_opt->eidx = i; if (type == TYPE_ALL || type == entry_type) { num_ext = ep->dentry.file.num_ext; step = DIRENT_STEP_STRM; diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 07b09af57436..436683da2515 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -458,7 +458,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es); int exfat_calc_num_entries(struct exfat_uni_name *p_uniname); int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname, - int num_entries, unsigned int type); + int num_entries, unsigned int type, struct exfat_hint *hint_opt); int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu); int exfat_find_location(struct super_block *sb, struct exfat_chain *p_dir, int entry, sector_t *sector, int *offset); diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 1382d816912c..bd00afc5e4c1 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -596,6 +596,8 @@ static int exfat_find(struct inode *dir, struct qstr *qname, struct exfat_inode_info *ei = EXFAT_I(dir); struct exfat_dentry *ep, *ep2; struct exfat_entry_set_cache *es; + /* for optimized dir & entry to prevent long traverse of cluster chain */ + struct exfat_hint hint_opt; if (qname->len == 0) return -ENOENT; @@ -619,7 +621,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, /* search the file name for directories */ dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, - num_entries, TYPE_ALL); + num_entries, TYPE_ALL, &hint_opt); if (dentry < 0) return dentry; /* -error value */ @@ -628,6 +630,11 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->entry = dentry; info->num_subdirs = 0; + /* adjust cdir to the optimized value */ + cdir.dir = hint_opt.clu; + if (cdir.flags & ALLOC_NO_FAT_CHAIN) + cdir.size -= dentry / sbi->dentries_per_clu; + dentry = hint_opt.eidx; es = exfat_get_dentry_set(sb, &cdir, dentry, ES_2_ENTRIES); if (!es) return -EIO; From bd3bdb9e0d656f760b11d0c638d35d7f7068144d Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Wed, 9 Nov 2022 13:50:22 +0800 Subject: [PATCH 194/203] exfat: support dynamic allocate bh for exfat_entry_set_cache [ Upstream commit a3ff29a95fde16906304455aa8c0bd84eb770258 ] In special cases, a file or a directory may occupied more than 19 directory entries, pre-allocating 3 bh is not enough. Such as - Support vendor secondary directory entry in the future. - Since file directory entry is damaged, the SecondaryCount field is bigger than 18. So this commit supports dynamic allocation of bh. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Stable-dep-of: d42334578eba ("exfat: check if filename entries exceeds max filename length") Signed-off-by: Sasha Levin --- fs/exfat/dir.c | 15 +++++++++++++++ fs/exfat/exfat_fs.h | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 0e1886f9a624..185aa13945d3 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -609,6 +609,10 @@ int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) bforget(es->bh[i]); else brelse(es->bh[i]); + + if (IS_DYNAMIC_ES(es)) + kfree(es->bh); + kfree(es); return err; } @@ -844,6 +848,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, /* byte offset in sector */ off = EXFAT_BLK_OFFSET(byte_offset, sb); es->start_off = off; + es->bh = es->__bh; /* sector offset in cluster */ sec = EXFAT_B_TO_BLK(byte_offset, sb); @@ -863,6 +868,16 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, es->num_entries = num_entries; num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb); + if (num_bh > ARRAY_SIZE(es->__bh)) { + es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL); + if (!es->bh) { + brelse(bh); + kfree(es); + return NULL; + } + es->bh[0] = bh; + } + for (i = 1; i < num_bh; i++) { /* get the next sector */ if (exfat_is_last_sector_in_cluster(sbi, sec)) { diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 436683da2515..11e579a2598d 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -170,10 +170,13 @@ struct exfat_entry_set_cache { bool modified; unsigned int start_off; int num_bh; - struct buffer_head *bh[DIR_CACHE_SIZE]; + struct buffer_head *__bh[DIR_CACHE_SIZE]; + struct buffer_head **bh; unsigned int num_entries; }; +#define IS_DYNAMIC_ES(es) ((es)->__bh != (es)->bh) + struct exfat_dir_entry { struct exfat_chain dir; int entry; From 381f7df0f3c3bd7dceb3e2b2b64c2f6247e2ac19 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 13 Jul 2023 21:59:37 +0900 Subject: [PATCH 195/203] exfat: check if filename entries exceeds max filename length [ Upstream commit d42334578eba1390859012ebb91e1e556d51db49 ] exfat_extract_uni_name copies characters from a given file name entry into the 'uniname' variable. This variable is actually defined on the stack of the exfat_readdir() function. According to the definition of the 'exfat_uni_name' type, the file name should be limited 255 characters (+ null teminator space), but the exfat_get_uniname_from_ext_entry() function can write more characters because there is no check if filename entries exceeds max filename length. This patch add the check not to copy filename characters when exceeding max filename length. Cc: stable@vger.kernel.org Cc: Yuezhang Mo Reported-by: Maxim Suhanov Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Signed-off-by: Sasha Levin --- fs/exfat/dir.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 185aa13945d3..db735a0d32fc 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -33,6 +33,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, { int i; struct exfat_entry_set_cache *es; + unsigned int uni_len = 0, len; es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES); if (!es) @@ -51,7 +52,10 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, if (exfat_get_entry_type(ep) != TYPE_EXTEND) break; - exfat_extract_uni_name(ep, uniname); + len = exfat_extract_uni_name(ep, uniname); + uni_len += len; + if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH) + break; uniname += EXFAT_FILE_NAME_LEN; } @@ -1042,7 +1046,8 @@ rewind: if (entry_type == TYPE_EXTEND) { unsigned short entry_uniname[16], unichar; - if (step != DIRENT_STEP_NAME) { + if (step != DIRENT_STEP_NAME || + name_len >= MAX_NAME_LENGTH) { step = DIRENT_STEP_FILE; continue; } From b85c7882fd3cc226a2fe26957a7a9fe27b75f753 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 12 Nov 2020 10:56:56 +0100 Subject: [PATCH 196/203] mt76: move band capabilities in mt76_phy [ Upstream commit 48dbce5cb1ba3ce5b2ed3e997bcb1e4697d41b71 ] This is a preliminary patch to move properly support mt7915 dbdc Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau Stable-dep-of: 421033deb915 ("wifi: mt76: mt7615: do not advertise 5 GHz on first phy of MT7615D (DBDC)") Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mac80211.c | 8 ++++---- drivers/net/wireless/mediatek/mt76/mt76.h | 2 +- .../net/wireless/mediatek/mt76/mt7603/eeprom.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/dma.c | 2 +- .../net/wireless/mediatek/mt76/mt7615/eeprom.c | 16 ++++++++-------- .../net/wireless/mediatek/mt76/mt76x0/eeprom.c | 6 +++--- drivers/net/wireless/mediatek/mt76/mt76x0/init.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt76x0/phy.c | 4 ++-- .../net/wireless/mediatek/mt76/mt76x02_eeprom.c | 8 ++++---- .../net/wireless/mediatek/mt76/mt7915/eeprom.c | 8 ++++---- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 5 ++--- 11 files changed, 32 insertions(+), 33 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 81ff3b4c6c1b..dc1191aa0443 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -160,9 +160,9 @@ static void mt76_init_stream_cap(struct mt76_phy *phy, void mt76_set_stream_caps(struct mt76_phy *phy, bool vht) { - if (phy->dev->cap.has_2ghz) + if (phy->cap.has_2ghz) mt76_init_stream_cap(phy, &phy->sband_2g.sband, false); - if (phy->dev->cap.has_5ghz) + if (phy->cap.has_5ghz) mt76_init_stream_cap(phy, &phy->sband_5g.sband, vht); } EXPORT_SYMBOL_GPL(mt76_set_stream_caps); @@ -463,13 +463,13 @@ int mt76_register_device(struct mt76_dev *dev, bool vht, dev_set_drvdata(dev->dev, dev); mt76_phy_init(dev, hw); - if (dev->cap.has_2ghz) { + if (phy->cap.has_2ghz) { ret = mt76_init_sband_2g(dev, rates, n_rates); if (ret) return ret; } - if (dev->cap.has_5ghz) { + if (phy->cap.has_5ghz) { ret = mt76_init_sband_5g(dev, rates + 4, n_rates - 4, vht); if (ret) return ret; diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5a8060790a61..16e65020a242 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -561,6 +561,7 @@ struct mt76_phy { struct mt76_channel_state *chan_state; ktime_t survey_time; + struct mt76_hw_cap cap; struct mt76_sband sband_2g; struct mt76_sband sband_5g; @@ -630,7 +631,6 @@ struct mt76_dev { struct debugfs_blob_wrapper eeprom; struct debugfs_blob_wrapper otp; - struct mt76_hw_cap cap; struct mt76_rate_power rate_power; diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c index 01f1e0da5ee1..a6df733aca49 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c @@ -170,7 +170,7 @@ int mt7603_eeprom_init(struct mt7603_dev *dev) } eeprom = (u8 *)dev->mt76.eeprom.data; - dev->mt76.cap.has_2ghz = true; + dev->mphy.cap.has_2ghz = true; memcpy(dev->mt76.macaddr, eeprom + MT_EE_MAC_ADDR, ETH_ALEN); /* Check for 1SS devices */ diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c index bf8ae14121db..637ef0882436 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c @@ -202,7 +202,7 @@ int mt7615_dma_init(struct mt7615_dev *dev) int ret; /* Increase buffer size to receive large VHT MPDUs */ - if (dev->mt76.cap.has_5ghz) + if (dev->mphy.cap.has_5ghz) rx_buf_size *= 2; mt76_dma_attach(&dev->mt76); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c index e9cdcdc54d5c..714d1ff69c56 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c @@ -100,20 +100,20 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev) if (is_mt7663(&dev->mt76)) { /* dual band */ - dev->mt76.cap.has_2ghz = true; - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_2ghz = true; + dev->mphy.cap.has_5ghz = true; return; } if (is_mt7622(&dev->mt76)) { /* 2GHz only */ - dev->mt76.cap.has_2ghz = true; + dev->mphy.cap.has_2ghz = true; return; } if (is_mt7611(&dev->mt76)) { /* 5GHz only */ - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_5ghz = true; return; } @@ -121,17 +121,17 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev) eeprom[MT_EE_WIFI_CONF]); switch (val) { case MT_EE_5GHZ: - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_5ghz = true; break; case MT_EE_2GHZ: - dev->mt76.cap.has_2ghz = true; + dev->mphy.cap.has_2ghz = true; break; case MT_EE_DBDC: dev->dbdc_support = true; /* fall through */ default: - dev->mt76.cap.has_2ghz = true; - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_2ghz = true; + dev->mphy.cap.has_5ghz = true; break; } } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index 9087607b621e..ebf4c96532d3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -52,15 +52,15 @@ static void mt76x0_set_chip_cap(struct mt76x02_dev *dev) mt76x02_eeprom_parse_hw_cap(dev); dev_dbg(dev->mt76.dev, "2GHz %d 5GHz %d\n", - dev->mt76.cap.has_2ghz, dev->mt76.cap.has_5ghz); + dev->mphy.cap.has_2ghz, dev->mphy.cap.has_5ghz); if (dev->no_2ghz) { - dev->mt76.cap.has_2ghz = false; + dev->mphy.cap.has_2ghz = false; dev_dbg(dev->mt76.dev, "mask out 2GHz support\n"); } if (is_mt7630(dev)) { - dev->mt76.cap.has_5ghz = false; + dev->mphy.cap.has_5ghz = false; dev_dbg(dev->mt76.dev, "mask out 5GHz support\n"); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/init.c b/drivers/net/wireless/mediatek/mt76/mt76x0/init.c index d78866bf41ba..0bac39bf3b66 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/init.c @@ -245,7 +245,7 @@ int mt76x0_register_device(struct mt76x02_dev *dev) if (ret) return ret; - if (dev->mt76.cap.has_5ghz) { + if (dev->mphy.cap.has_5ghz) { struct ieee80211_supported_band *sband; sband = &dev->mphy.sband_5g.sband; @@ -253,7 +253,7 @@ int mt76x0_register_device(struct mt76x02_dev *dev) mt76x0_init_txpower(dev, sband); } - if (dev->mt76.cap.has_2ghz) + if (dev->mphy.cap.has_2ghz) mt76x0_init_txpower(dev, &dev->mphy.sband_2g.sband); mt76x02_init_debugfs(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index 3de33aadf794..e91c314cdfac 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@ -447,11 +447,11 @@ static void mt76x0_phy_ant_select(struct mt76x02_dev *dev) else coex3 |= BIT(4); coex3 |= BIT(3); - if (dev->mt76.cap.has_2ghz) + if (dev->mphy.cap.has_2ghz) wlan |= BIT(6); } else { /* sigle antenna mode */ - if (dev->mt76.cap.has_5ghz) { + if (dev->mphy.cap.has_5ghz) { coex3 |= BIT(3) | BIT(4); } else { wlan |= BIT(6); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c index c54c50fd639a..0acabba2d1a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c @@ -75,14 +75,14 @@ void mt76x02_eeprom_parse_hw_cap(struct mt76x02_dev *dev) switch (FIELD_GET(MT_EE_NIC_CONF_0_BOARD_TYPE, val)) { case BOARD_TYPE_5GHZ: - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_5ghz = true; break; case BOARD_TYPE_2GHZ: - dev->mt76.cap.has_2ghz = true; + dev->mphy.cap.has_2ghz = true; break; default: - dev->mt76.cap.has_2ghz = true; - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_2ghz = true; + dev->mphy.cap.has_5ghz = true; break; } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c index e4c5f968f706..5f6c527611f2 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c @@ -57,14 +57,14 @@ static void mt7915_eeprom_parse_hw_cap(struct mt7915_dev *dev) val = FIELD_GET(MT_EE_WIFI_CONF_BAND_SEL, val); switch (val) { case MT_EE_5GHZ: - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_5ghz = true; break; case MT_EE_2GHZ: - dev->mt76.cap.has_2ghz = true; + dev->mphy.cap.has_2ghz = true; break; default: - dev->mt76.cap.has_2ghz = true; - dev->mt76.cap.has_5ghz = true; + dev->mphy.cap.has_2ghz = true; + dev->mphy.cap.has_5ghz = true; break; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 8f01ca1694bc..99683688a836 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -528,10 +528,9 @@ void mt7915_set_stream_he_caps(struct mt7915_phy *phy) { struct ieee80211_sband_iftype_data *data; struct ieee80211_supported_band *band; - struct mt76_dev *mdev = &phy->dev->mt76; int n; - if (mdev->cap.has_2ghz) { + if (phy->mt76->cap.has_2ghz) { data = phy->iftype[NL80211_BAND_2GHZ]; n = mt7915_init_he_caps(phy, NL80211_BAND_2GHZ, data); @@ -540,7 +539,7 @@ void mt7915_set_stream_he_caps(struct mt7915_phy *phy) band->n_iftype_data = n; } - if (mdev->cap.has_5ghz) { + if (phy->mt76->cap.has_5ghz) { data = phy->iftype[NL80211_BAND_5GHZ]; n = mt7915_init_he_caps(phy, NL80211_BAND_5GHZ, data); From 37fad83ae5276b007b9e36456be68b80c9649d57 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 20 Nov 2020 12:37:18 -0600 Subject: [PATCH 197/203] mt76: mt7615: Fix fall-through warnings for Clang [ Upstream commit f12758f6f929dbcd37abdb1d91d245539eca48f8 ] In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning by replacing a /* fall through */ comment with the new pseudo-keyword macro fallthrough; instead of letting the code fall through to the next case. Notice that Clang doesn't recognize /* fall through */ comments as implicit fall-through markings. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felix Fietkau Stable-dep-of: 421033deb915 ("wifi: mt76: mt7615: do not advertise 5 GHz on first phy of MT7615D (DBDC)") Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c index 714d1ff69c56..48ce24f0f77b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c @@ -128,7 +128,7 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev) break; case MT_EE_DBDC: dev->dbdc_support = true; - /* fall through */ + fallthrough; default: dev->mphy.cap.has_2ghz = true; dev->mphy.cap.has_5ghz = true; From c0e7123e896acc122edbcb81ee16ea28ef09a492 Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Mon, 5 Jun 2023 10:34:07 +0300 Subject: [PATCH 198/203] wifi: mt76: mt7615: do not advertise 5 GHz on first phy of MT7615D (DBDC) [ Upstream commit 421033deb91521aa6a9255e495cb106741a52275 ] On DBDC devices the first (internal) phy is only capable of using 2.4 GHz band, and the 5 GHz band is exposed via a separate phy object, so avoid the false advertising. Reported-by: Rani Hod Closes: https://github.com/openwrt/openwrt/pull/12361 Fixes: 7660a1bd0c22 ("mt76: mt7615: register ext_phy if DBDC is detected") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser Reviewed-by: Simon Horman Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230605073408.8699-1-fercerpav@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c index 48ce24f0f77b..85f56487feff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c @@ -123,12 +123,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev) case MT_EE_5GHZ: dev->mphy.cap.has_5ghz = true; break; - case MT_EE_2GHZ: - dev->mphy.cap.has_2ghz = true; - break; case MT_EE_DBDC: dev->dbdc_support = true; fallthrough; + case MT_EE_2GHZ: + dev->mphy.cap.has_2ghz = true; + break; default: dev->mphy.cap.has_2ghz = true; dev->mphy.cap.has_5ghz = true; From f1c928496d2a9e9a5567a6bff04d952e284d9e68 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 1 Nov 2020 19:29:53 +0800 Subject: [PATCH 199/203] ARM: dts: imx: add usb alias [ Upstream commit 5c8b3b8a182cbc1ccdfcdeea9b25dd2c12a8148f ] Add usb alias for bootloader searching the controller in correct order. Signed-off-by: Peng Fan Signed-off-by: Shawn Guo Stable-dep-of: ee70b908f77a ("ARM: dts: nxp/imx6sll: fix wrong property name in usbphy node") Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl.dtsi | 4 ++++ arch/arm/boot/dts/imx6sl.dtsi | 3 +++ arch/arm/boot/dts/imx6sll.dtsi | 2 ++ arch/arm/boot/dts/imx6sx.dtsi | 3 +++ arch/arm/boot/dts/imx6ul.dtsi | 2 ++ arch/arm/boot/dts/imx7d.dtsi | 6 ++++++ arch/arm/boot/dts/imx7s.dtsi | 2 ++ 7 files changed, 22 insertions(+) diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 7858ae5d39df..7d81992dfafc 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -45,6 +45,10 @@ spi1 = &ecspi2; spi2 = &ecspi3; spi3 = &ecspi4; + usb0 = &usbotg; + usb1 = &usbh1; + usb2 = &usbh2; + usb3 = &usbh3; usbphy0 = &usbphy1; usbphy1 = &usbphy2; }; diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index c184a6d5bc42..5b4dfc62030e 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -39,6 +39,9 @@ spi1 = &ecspi2; spi2 = &ecspi3; spi3 = &ecspi4; + usb0 = &usbotg1; + usb1 = &usbotg2; + usb2 = &usbh; usbphy0 = &usbphy1; usbphy1 = &usbphy2; }; diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index bf5b262b91f9..eecb2f68a1c3 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -36,6 +36,8 @@ spi1 = &ecspi2; spi3 = &ecspi3; spi4 = &ecspi4; + usb0 = &usbotg1; + usb1 = &usbotg2; usbphy0 = &usbphy1; usbphy1 = &usbphy2; }; diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index c399919943c3..629c6a7432d9 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -49,6 +49,9 @@ spi2 = &ecspi3; spi3 = &ecspi4; spi4 = &ecspi5; + usb0 = &usbotg1; + usb1 = &usbotg2; + usb2 = &usbh; usbphy0 = &usbphy1; usbphy1 = &usbphy2; }; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index c40684ad11b8..b40dd0c19802 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -47,6 +47,8 @@ spi1 = &ecspi2; spi2 = &ecspi3; spi3 = &ecspi4; + usb0 = &usbotg1; + usb1 = &usbotg2; usbphy0 = &usbphy1; usbphy1 = &usbphy2; }; diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index cff875b80b60..b0bcfa9094a3 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -7,6 +7,12 @@ #include / { + aliases { + usb0 = &usbotg1; + usb1 = &usbotg2; + usb2 = &usbh; + }; + cpus { cpu0: cpu@0 { clock-frequency = <996000000>; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 43b39ad9ddce..334e781663cc 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -47,6 +47,8 @@ spi1 = &ecspi2; spi2 = &ecspi3; spi3 = &ecspi4; + usb0 = &usbotg1; + usb1 = &usbh; }; cpus { From 14f2e2ac731b61895d3e335cc23ee0c192ec429f Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 24 Sep 2021 11:14:37 +0200 Subject: [PATCH 200/203] ARM: dts: imx6sll: fixup of operating points [ Upstream commit 1875903019ea6e32e6e544c1631b119e4fd60b20 ] Make operating point definitions comply with binding specifications. Signed-off-by: Andreas Kemnade Reviewed-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo Stable-dep-of: ee70b908f77a ("ARM: dts: nxp/imx6sll: fix wrong property name in usbphy node") Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6sll.dtsi | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index eecb2f68a1c3..2873369a57c0 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -51,20 +51,18 @@ device_type = "cpu"; reg = <0>; next-level-cache = <&L2>; - operating-points = < + operating-points = /* kHz uV */ - 996000 1275000 - 792000 1175000 - 396000 1075000 - 198000 975000 - >; - fsl,soc-operating-points = < + <996000 1275000>, + <792000 1175000>, + <396000 1075000>, + <198000 975000>; + fsl,soc-operating-points = /* ARM kHz SOC-PU uV */ - 996000 1175000 - 792000 1175000 - 396000 1175000 - 198000 1175000 - >; + <996000 1175000>, + <792000 1175000>, + <396000 1175000>, + <198000 1175000>; clock-latency = <61036>; /* two CLK32 periods */ #cooling-cells = <2>; clocks = <&clks IMX6SLL_CLK_ARM>, From 98b7ab5e8d88694c3407292a728dd3b53ad560cd Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 17 Jul 2023 10:28:33 +0800 Subject: [PATCH 201/203] ARM: dts: nxp/imx6sll: fix wrong property name in usbphy node [ Upstream commit ee70b908f77a9d8f689dea986f09e6d7dc481934 ] Property name "phy-3p0-supply" is used instead of "phy-reg_3p0-supply". Fixes: 9f30b6b1a957 ("ARM: dts: imx: Add basic dtsi file for imx6sll") cc: Signed-off-by: Xu Yang Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6sll.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index 2873369a57c0..3659fd5ecfa6 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -552,7 +552,7 @@ reg = <0x020ca000 0x1000>; interrupts = ; clocks = <&clks IMX6SLL_CLK_USBPHY2>; - phy-reg_3p0-supply = <®_3p0>; + phy-3p0-supply = <®_3p0>; fsl,anatop = <&anatop>; }; From b6fc2fbf89089ecfb8eb9a89a7fc91d444f4fec7 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 5 Aug 2023 00:06:43 +0200 Subject: [PATCH 202/203] x86/CPU/AMD: Do not leak quotient data after a division by 0 commit 77245f1c3c6495521f6a3af082696ee2f8ce3921 upstream. Under certain circumstances, an integer division by 0 which faults, can leave stale quotient data from a previous division operation on Zen1 microarchitectures. Do a dummy division 0/1 before returning from the #DE exception handler in order to avoid any leaks of potentially sensitive data. Signed-off-by: Borislav Petkov (AMD) Cc: Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/cpu/amd.c | 19 +++++++++++++++++++ arch/x86/kernel/traps.c | 2 ++ 4 files changed, 24 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2c0c495b9cb6..5a54c3685a06 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -451,4 +451,5 @@ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6a9de5b1fe45..2dd9b661a5fd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -809,10 +809,12 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow); extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); extern bool cpu_has_ibpb_brtype_microcode(void); +extern void amd_clear_divider(void); #else static inline u16 amd_get_nb_id(int cpu) { return 0; } static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } +static inline void amd_clear_divider(void) { } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ffecf4e9444e..da38765aa74c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -76,6 +76,10 @@ static const int amd_zenbleed[] = AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); +static const int amd_div0[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), + AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1168,6 +1172,11 @@ static void init_amd(struct cpuinfo_x86 *c) check_null_seg_clears_base(c); zenbleed_check(c); + + if (cpu_has_amd_erratum(c, amd_div0)) { + pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); + setup_force_cpu_bug(X86_BUG_DIV0); + } } #ifdef CONFIG_X86_32 @@ -1312,3 +1321,13 @@ void amd_check_microcode(void) { on_each_cpu(zenbleed_check_cpu, NULL, 1); } + +/* + * Issue a DIV 0/1 insn to clear any division data from previous DIV + * operations. + */ +void noinstr amd_clear_divider(void) +{ + asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) + :: "a" (0), "d" (0), "r" (1)); +} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98838b784524..28f5cc0a9dec 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -198,6 +198,8 @@ DEFINE_IDTENTRY(exc_divide_error) { do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE, FPE_INTDIV, error_get_trap_addr(regs)); + + amd_clear_divider(); } DEFINE_IDTENTRY(exc_overflow) From ec585727b63d12f9683140fe4b8eb8e564dd3aa0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Aug 2023 11:57:56 +0200 Subject: [PATCH 203/203] Linux 5.10.190 Link: https://lore.kernel.org/r/20230809103643.799166053@linuxfoundation.org Tested-by: Joel Fernandes (Google) Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 36047436fae3..bd2f45770363 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 189 +SUBLEVEL = 190 EXTRAVERSION = NAME = Dare mighty things