From 356801e2ffc71ae35573461e0559a6a62a27f939 Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Thu, 27 Oct 2022 16:26:59 +0530 Subject: [PATCH 001/186] ANDROID: abi_gki_aarch64_qcom: whitelist some vm symbols Whitelist the below symbols: check_move_unevictable_pages __pagevec_release Leaf changes summary: 1 artifact changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 1 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 1 Added function: [A] 'function void check_move_unevictable_pages(pagevec*)' Bug: 255922561 Change-Id: Icdd54d0f0b155cc0617479ef58273020f1fd4e35 Signed-off-by: Charan Teja Kalla --- android/abi_gki_aarch64.xml | 5 +++++ android/abi_gki_aarch64_qcom | 2 ++ 2 files changed, 7 insertions(+) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 543e37374ea3..69fdfc8696c2 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -1126,6 +1126,7 @@ + @@ -122620,6 +122621,10 @@ + + + + diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index dc16110241bc..07b19dc38641 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -129,6 +129,7 @@ cgroup_path_ns cgroup_taskset_first cgroup_taskset_next + check_move_unevictable_pages __check_object_size check_preempt_curr check_zeroed_user @@ -1659,6 +1660,7 @@ page_endio page_mapping __page_pinner_migration_failed + __pagevec_release panic panic_notifier_list panic_timeout From af70ef5bfd0f388356db464f8d506c9761a7d01f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Oct 2022 17:52:05 -0700 Subject: [PATCH 002/186] FROMGIT: f2fs: let's avoid to get cp_rwsem twice by f2fs_evict_inode by d_invalidate f2fs_unlink -> f2fs_lock_op -> d_invalidate -> shrink_dentry_list -> iput_final -> f2fs_evict_inode -> f2fs_lock_op Bug: 253968159 Reviewed-by: Chao Yu Tested-by: Yangtao Li Signed-off-by: Jaegeuk Kim Change-Id: I281afd7ffa0c66509ec5984fd7774ccd4ddef1f4 (cherry picked from commit 14dc00a0e2dbea4b685ab9723ff511fcfd223c18 git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev) --- fs/f2fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index fd4265c73b0d..c0ca487c6a16 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -618,6 +618,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } f2fs_delete_entry(de, page, dir, inode); + f2fs_unlock_op(sbi); + #ifdef CONFIG_UNICODE /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid @@ -628,8 +630,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (IS_CASEFOLDED(dir)) d_invalidate(dentry); #endif - f2fs_unlock_op(sbi); - if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: From 89fc7740586b8e25a99d7727123299b7cd9ed15f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 31 Aug 2022 21:13:48 +0200 Subject: [PATCH 003/186] UPSTREAM: mm: Force TLB flush for PFNMAP mappings before unlink_file_vma() commit b67fbebd4cf980aecbcc750e1462128bffe8ae15 upstream. Some drivers rely on having all VMAs through which a PFN might be accessible listed in the rmap for correctness. However, on X86, it was possible for a VMA with stale TLB entries to not be listed in the rmap. This was fixed in mainline with commit b67fbebd4cf9 ("mmu_gather: Force tlb-flush VM_PFNMAP vmas"), but that commit relies on preceding refactoring in commit 18ba064e42df3 ("mmu_gather: Let there be one tlb_{start,end}_vma() implementation") and commit 1e9fdf21a4339 ("mmu_gather: Remove per arch tlb_{start,end}_vma()"). This patch provides equivalent protection without needing that refactoring, by forcing a TLB flush between removing PTEs in unmap_vmas() and the call to unlink_file_vma() in free_pgtables(). Bug: 245812080 [This is a stable-specific rewrite of the upstream commit!] Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: Ic29df5cfb76676aa87a14619dd19aba301580507 --- mm/mmap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 1b21f03b59c6..89c6056fa654 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2770,6 +2770,18 @@ static void unmap_region(struct mm_struct *mm, tlb_gather_mmu(&tlb, mm, start, end); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end); + + /* + * Ensure we have no stale TLB entries by the time this mapping is + * removed from the rmap. + * Note that we don't have to worry about nested flushes here because + * we're holding the mm semaphore for removing the mapping - so any + * concurrent flush in this region has to be coming through the rmap, + * and we synchronize against that using the rmap lock. + */ + if ((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) + tlb_flush_mmu(&tlb); + free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, start, end); From 4e5c3aad569b412b102a24d24071854e647fbf5c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 15 Sep 2022 16:25:19 +0200 Subject: [PATCH 004/186] UPSTREAM: mm: Fix TLB flush for not-first PFNMAP mappings in unmap_region() This is a stable-specific patch. I botched the stable-specific rewrite of commit b67fbebd4cf98 ("mmu_gather: Force tlb-flush VM_PFNMAP vmas"): As Hugh pointed out, unmap_region() actually operates on a list of VMAs, and the variable "vma" merely points to the first VMA in that list. So if we want to check whether any of the VMAs we're operating on is PFNMAP or MIXEDMAP, we have to iterate through the list and check each VMA. Bug: 245812080 Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3998dc50ebdc127ae79b10992856fb76debc2005) Signed-off-by: Lee Jones Change-Id: Ib8ddb51815e53f42daec5d98a196866a078a7550 --- mm/mmap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 89c6056fa654..8741f27e81a9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2765,6 +2765,7 @@ static void unmap_region(struct mm_struct *mm, { struct vm_area_struct *next = vma_next(mm, prev); struct mmu_gather tlb; + struct vm_area_struct *cur_vma; lru_add_drain(); tlb_gather_mmu(&tlb, mm, start, end); @@ -2779,8 +2780,12 @@ static void unmap_region(struct mm_struct *mm, * concurrent flush in this region has to be coming through the rmap, * and we synchronize against that using the rmap lock. */ - if ((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) - tlb_flush_mmu(&tlb); + for (cur_vma = vma; cur_vma; cur_vma = cur_vma->vm_next) { + if ((cur_vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) { + tlb_flush_mmu(&tlb); + break; + } + } free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING); From 3813ca3253df4e4f543a871eecaae218f6f3b8d9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Aug 2022 18:03:46 +0800 Subject: [PATCH 005/186] UPSTREAM: af_key: Do not call xfrm_probe_algs in parallel [ Upstream commit ba953a9d89a00c078b85f4b190bc1dde66fe16b5 ] When namespace support was added to xfrm/afkey, it caused the previously single-threaded call to xfrm_probe_algs to become multi-threaded. This is buggy and needs to be fixed with a mutex. Bug: 245674737 Reported-by: Abhishek Shah Fixes: 283bc9f35bbb ("xfrm: Namespacify xfrm state/policy locks") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Change-Id: I71fb89a999447862a6c4b1ff754378bb0452ad3a Signed-off-by: Lee Jones --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 2aa16a171285..05e271098888 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1701,9 +1701,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad pfk->registered |= (1<sadb_msg_satype); } + mutex_lock(&pfkey_mutex); xfrm_probe_algs(); supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); + mutex_unlock(&pfkey_mutex); + if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<sadb_msg_satype); From b1f03eefb3883ac2d0285d88dbf00b4f8b018b91 Mon Sep 17 00:00:00 2001 From: Seiya Wang Date: Tue, 1 Nov 2022 11:29:21 +0800 Subject: [PATCH 006/186] ANDROID: GKI: Update symbol list for mtk tablet projects 1 Added function: [A] 'function power_supply* power_supply_register_no_ws(device*, const power_supply_desc*, const power_supply_config*)' Bug: 256723350 Signed-off-by: Seiya Wang Change-Id: Icd28b1be7ce7c85d40b05a4b8ef9786674d2063a --- android/abi_gki_aarch64.xml | 7 +++++++ android/abi_gki_aarch64_mtk | 1 + 2 files changed, 8 insertions(+) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 69fdfc8696c2..cf691b6dead3 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -4262,6 +4262,7 @@ + @@ -138882,6 +138883,12 @@ + + + + + + diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk index 1a3cc8ff7d30..f910ca2d11aa 100644 --- a/android/abi_gki_aarch64_mtk +++ b/android/abi_gki_aarch64_mtk @@ -2973,6 +2973,7 @@ platform_find_device_by_driver pm_wq power_supply_is_system_supplied + power_supply_register_no_ws power_supply_unreg_notifier prepare_to_wait printk_deferred From 30f7ef6517994856188249a7c62093be7ab063ac Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 4 Nov 2022 17:54:49 +0000 Subject: [PATCH 007/186] FROMLIST: binder: fix UAF of alloc->vma in race with munmap() In commit 720c24192404 ("ANDROID: binder: change down_write to down_read") binder assumed the mmap read lock is sufficient to protect alloc->vma inside binder_update_page_range(). This used to be accurate until commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"), which now downgrades the mmap_lock after detaching the vma from the rbtree in munmap(). Then it proceeds to teardown and free the vma with only the read lock held. This means that accesses to alloc->vma in binder_update_page_range() now will race with vm_area_free() in munmap() and can cause a UAF as shown in the following KASAN trace: ================================================================== BUG: KASAN: use-after-free in vm_insert_page+0x7c/0x1f0 Read of size 8 at addr ffff16204ad00600 by task server/558 CPU: 3 PID: 558 Comm: server Not tainted 5.10.150-00001-gdc8dcf942daa #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2a0 show_stack+0x18/0x2c dump_stack+0xf8/0x164 print_address_description.constprop.0+0x9c/0x538 kasan_report+0x120/0x200 __asan_load8+0xa0/0xc4 vm_insert_page+0x7c/0x1f0 binder_update_page_range+0x278/0x50c binder_alloc_new_buf+0x3f0/0xba0 binder_transaction+0x64c/0x3040 binder_thread_write+0x924/0x2020 binder_ioctl+0x1610/0x2e5c __arm64_sys_ioctl+0xd4/0x120 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 Allocated by task 559: kasan_save_stack+0x38/0x6c __kasan_kmalloc.constprop.0+0xe4/0xf0 kasan_slab_alloc+0x18/0x2c kmem_cache_alloc+0x1b0/0x2d0 vm_area_alloc+0x28/0x94 mmap_region+0x378/0x920 do_mmap+0x3f0/0x600 vm_mmap_pgoff+0x150/0x17c ksys_mmap_pgoff+0x284/0x2dc __arm64_sys_mmap+0x84/0xa4 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 Freed by task 560: kasan_save_stack+0x38/0x6c kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x4c __kasan_slab_free+0x100/0x164 kasan_slab_free+0x14/0x20 kmem_cache_free+0xc4/0x34c vm_area_free+0x1c/0x2c remove_vma+0x7c/0x94 __do_munmap+0x358/0x710 __vm_munmap+0xbc/0x130 __arm64_sys_munmap+0x4c/0x64 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 [...] ================================================================== To prevent the race above, revert back to taking the mmap write lock inside binder_update_page_range(). One might expect an increase of mmap lock contention. However, binder already serializes these calls via top level alloc->mutex. Also, there was no performance impact shown when running the binder benchmark tests. Note this patch is specific to stable branches 5.4 and 5.10. Since in newer kernel releases binder no longer caches a pointer to the vma. Instead, it has been refactored to use vma_lookup() which avoids the issue described here. This switch was introduced in commit a43cfc87caaf ("android: binder: stop saving a pointer to the VMA"). Bug: 254837884 Link: https://lore.kernel.org/all/20221104175450.306810-1-cmllamas@google.com/ Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap") Reported-by: Jann Horn Cc: # 5.10.x Cc: Minchan Kim Cc: Yang Shi Cc: Liam Howlett Signed-off-by: Carlos Llamas Change-Id: Ieabadbfa30f99812da9c226cf1ddd5e60f62c607 --- drivers/android/binder_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index d30267e08536..447342a878ff 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -213,7 +213,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, mm = alloc->vma_vm_mm; if (mm) { - mmap_read_lock(mm); + mmap_write_lock(mm); vma = alloc->vma; } @@ -271,7 +271,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, trace_binder_alloc_page_end(alloc, index); } if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return 0; @@ -304,7 +304,7 @@ err_page_ptr_cleared: } err_no_vma: if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return vma ? -ENOMEM : -ESRCH; From bbba68efda123a6cccb2e7e1d08c211a97b3a737 Mon Sep 17 00:00:00 2001 From: Arvin Wang Date: Thu, 3 Nov 2022 20:40:51 +0800 Subject: [PATCH 008/186] ANDROID: Update symbol list for mtk Leaf changes summary: 6 artifacts changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 5 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 1 Added variable 5 Added functions: [A] 'function int devm_led_classdev_flash_register_ext(device*, led_classdev_flash*, led_init_data*)' [A] 'function void led_set_brightness(led_classdev*, led_brightness)' [A] 'function int linear_range_get_selector_high(const linear_range*, unsigned int, unsigned int*, bool*)' [A] 'function int regulator_get_bypass_regmap(regulator_dev*, bool*)' [A] 'function int regulator_set_bypass_regmap(regulator_dev*, bool)' 1 Added variable: [A] 'const char* const led_colors[10]' Bug: 257322202 Change-Id: Ia9c1a18fa41b20e1445492d951ceefee75162d0d Signed-off-by: Arvin Wang --- android/abi_gki_aarch64.xml | 38 +++++++++++++++++++++++++++++++++++++ android/abi_gki_aarch64_mtk | 6 ++++++ 2 files changed, 44 insertions(+) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index cf691b6dead3..47faef88fd20 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -1709,6 +1709,7 @@ + @@ -3368,6 +3369,7 @@ + @@ -3383,6 +3385,7 @@ + @@ -4472,6 +4475,7 @@ + @@ -4497,6 +4501,7 @@ + @@ -6787,6 +6792,7 @@ + @@ -58444,6 +58450,9 @@ + + + @@ -125683,6 +125692,12 @@ + + + + + + @@ -134350,11 +134365,17 @@ + + + + + + @@ -134421,6 +134442,13 @@ + + + + + + + @@ -139909,6 +139937,11 @@ + + + + + @@ -140029,6 +140062,11 @@ + + + + + diff --git a/android/abi_gki_aarch64_mtk b/android/abi_gki_aarch64_mtk index f910ca2d11aa..8d2c95696079 100644 --- a/android/abi_gki_aarch64_mtk +++ b/android/abi_gki_aarch64_mtk @@ -412,6 +412,7 @@ devm_kasprintf devm_kfree devm_kmalloc + devm_led_classdev_flash_register_ext devm_led_classdev_register_ext devm_led_classdev_unregister devm_mbox_controller_register @@ -1149,7 +1150,9 @@ kvmalloc_node led_classdev_flash_register_ext led_classdev_flash_unregister + led_colors led_get_flash_fault + led_set_brightness led_set_brightness_sync led_set_flash_brightness led_set_flash_timeout @@ -1159,6 +1162,7 @@ led_update_brightness led_update_flash_brightness linear_range_get_max_value + linear_range_get_selector_high linear_range_get_value __list_add_valid __list_del_entry_valid @@ -1704,6 +1708,7 @@ regulator_enable regulator_enable_regmap regulator_get + regulator_get_bypass_regmap regulator_get_current_limit_regmap regulator_get_mode regulator_get_optional @@ -1721,6 +1726,7 @@ regulator_notifier_call_chain regulator_put regulator_set_active_discharge_regmap + regulator_set_bypass_regmap regulator_set_current_limit regulator_set_current_limit_regmap regulator_set_load From a2afe6cc0fd9be874477715f48679f49d939ac9d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2021 06:33:30 -0800 Subject: [PATCH 009/186] UPSTREAM: inet: fully convert sk->sk_rx_dst to RCU rules commit 8f905c0e7354ef261360fb7535ea079b1082c105 upstream. syzbot reported various issues around early demux, one being included in this changelog [1] sk->sk_rx_dst is using RCU protection without clearly documenting it. And following sequences in tcp_v4_do_rcv()/tcp_v6_do_rcv() are not following standard RCU rules. [a] dst_release(dst); [b] sk->sk_rx_dst = NULL; They look wrong because a delete operation of RCU protected pointer is supposed to clear the pointer before the call_rcu()/synchronize_rcu() guarding actual memory freeing. In some cases indeed, dst could be freed before [b] is done. We could cheat by clearing sk_rx_dst before calling dst_release(), but this seems the right time to stick to standard RCU annotations and debugging facilities. [1] BUG: KASAN: use-after-free in dst_check include/net/dst.h:470 [inline] BUG: KASAN: use-after-free in tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 Read of size 2 at addr ffff88807f1cb73a by task syz-executor.5/9204 CPU: 0 PID: 9204 Comm: syz-executor.5 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 dst_check include/net/dst.h:470 [inline] tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 ip_rcv_finish_core.constprop.0+0x15de/0x1e80 net/ipv4/ip_input.c:340 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0x52/0xc0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:629 RIP: 0033:0x7f5e972bfd57 Code: 39 d1 73 14 0f 1f 80 00 00 00 00 48 8b 50 f8 48 83 e8 08 48 39 ca 77 f3 48 39 c3 73 3e 48 89 13 48 8b 50 f8 48 89 38 49 8b 0e <48> 8b 3e 48 83 c3 08 48 83 c6 08 eb bc 48 39 d1 72 9e 48 39 d0 73 RSP: 002b:00007fff8a413210 EFLAGS: 00000283 RAX: 00007f5e97108990 RBX: 00007f5e97108338 RCX: ffffffff81d3aa45 RDX: ffffffff81d3aa45 RSI: 00007f5e97108340 RDI: ffffffff81d3aa45 RBP: 00007f5e97107eb8 R08: 00007f5e97108d88 R09: 0000000093c2e8d9 R10: 0000000000000000 R11: 0000000000000000 R12: 00007f5e97107eb0 R13: 00007f5e97108338 R14: 00007f5e97107ea8 R15: 0000000000000019 Allocated by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:467 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 ip_route_input_slow+0x1817/0x3a20 net/ipv4/route.c:2340 ip_route_input_rcu net/ipv4/route.c:2470 [inline] ip_route_input_noref+0x116/0x2a0 net/ipv4/route.c:2415 ip_rcv_finish_core.constprop.0+0x288/0x1e80 net/ipv4/ip_input.c:354 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Freed by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xff/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1749 slab_free mm/slub.c:3513 [inline] kmem_cache_free+0xbd/0x5d0 mm/slub.c:3530 dst_destroy+0x2d6/0x3f0 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2506 [inline] rcu_core+0x7ab/0x1470 kernel/rcu/tree.c:2741 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xf5/0x120 mm/kasan/generic.c:348 __call_rcu kernel/rcu/tree.c:2985 [inline] call_rcu+0xb1/0x740 kernel/rcu/tree.c:3065 dst_release net/core/dst.c:177 [inline] dst_release+0x79/0xe0 net/core/dst.c:167 tcp_v4_do_rcv+0x612/0x8d0 net/ipv4/tcp_ipv4.c:1712 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2768 release_sock+0x54/0x1b0 net/core/sock.c:3300 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1441 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_write_iter+0x289/0x3c0 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write+0x429/0x660 fs/read_write.c:503 vfs_write+0x7cd/0xae0 fs/read_write.c:590 ksys_write+0x1ee/0x250 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88807f1cb700 which belongs to the cache ip_dst_cache of size 176 The buggy address is located 58 bytes inside of 176-byte region [ffff88807f1cb700, ffff88807f1cb7b0) The buggy address belongs to the page: page:ffffea0001fc72c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7f1cb flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff8881413bb780 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 5, ts 108466983062, free_ts 108048976062 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 alloc_pages+0x1a7/0x300 mm/mempolicy.c:2191 alloc_slab_page mm/slub.c:1793 [inline] allocate_slab mm/slub.c:1930 [inline] new_slab+0x32d/0x4a0 mm/slub.c:1993 ___slab_alloc+0x918/0xfe0 mm/slub.c:3022 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3109 slab_alloc_node mm/slub.c:3200 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 __mkroute_output net/ipv4/route.c:2564 [inline] ip_route_output_key_hash_rcu+0x921/0x2d00 net/ipv4/route.c:2791 ip_route_output_key_hash+0x18b/0x300 net/ipv4/route.c:2619 __ip_route_output_key include/net/route.h:126 [inline] ip_route_output_flow+0x23/0x150 net/ipv4/route.c:2850 ip_route_output_key include/net/route.h:142 [inline] geneve_get_v4_rt+0x3a6/0x830 drivers/net/geneve.c:809 geneve_xmit_skb drivers/net/geneve.c:899 [inline] geneve_xmit+0xc4a/0x3540 drivers/net/geneve.c:1082 __netdev_start_xmit include/linux/netdevice.h:4994 [inline] netdev_start_xmit include/linux/netdevice.h:5008 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3606 __dev_queue_xmit+0x299a/0x3650 net/core/dev.c:4229 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x5a/0xc0 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] kmem_cache_alloc_node+0x255/0x3f0 mm/slub.c:3270 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] alloc_skb_with_frags+0x93/0x620 net/core/skbuff.c:6078 sock_alloc_send_pskb+0x783/0x910 net/core/sock.c:2575 mld_newpack+0x1df/0x770 net/ipv6/mcast.c:1754 add_grhead+0x265/0x330 net/ipv6/mcast.c:1857 add_grec+0x1053/0x14e0 net/ipv6/mcast.c:1995 mld_send_initial_cr.part.0+0xf6/0x230 net/ipv6/mcast.c:2242 mld_send_initial_cr net/ipv6/mcast.c:1232 [inline] mld_dad_work+0x1d3/0x690 net/ipv6/mcast.c:2268 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 Memory state around the buggy address: ffff88807f1cb600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807f1cb680: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88807f1cb700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807f1cb780: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ffff88807f1cb800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211220143330.680945-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski [cmllamas: fixed trivial merge conflict] Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f039b43cbaea5e0700980c2f0052da05a70782e0) Bug: 252686090 Signed-off-by: Wei Liu Change-Id: If7fbbe7ef4e6423617aadb8bd1ef06f525a571b7 --- include/net/sock.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv4/udp.c | 6 +++--- net/ipv6/tcp_ipv6.c | 11 +++++++---- net/ipv6/udp.c | 4 ++-- 8 files changed, 23 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 0be6e328af74..c4d5c291e0a4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -426,7 +426,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; atomic_t sk_omem_alloc; int sk_sndbuf; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0494caae8d59..0088c1c9e242 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -158,7 +158,7 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); - dst_release(sk->sk_rx_dst); + dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e31eaa70edbb..634f64c5e67b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2793,8 +2793,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(sk->sk_rx_dst); - sk->sk_rx_dst = NULL; + dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ff2ab936b571..71d1512b490d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5745,7 +5745,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) trace_tcp_probe(sk, skb); tcp_mstamp_refresh(tp); - if (unlikely(!sk->sk_rx_dst)) + if (unlikely(!rcu_access_pointer(sk->sk_rx_dst))) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0d165ce2d80a..5c1e6b0687e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1670,15 +1670,18 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || !dst->ops->check(dst, 0)) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } tcp_rcv_established(sk, skb); @@ -1753,7 +1756,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); @@ -2162,7 +2165,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index adc02652ef67..73adaba61d28 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2196,7 +2196,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg(&sk->sk_rx_dst, dst); + old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); dst_release(old); return old != dst; } @@ -2386,7 +2386,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp_sk_rx_dst_set(sk, dst); ret = udp_unicast_rcv_skb(sk, skb, uh); @@ -2545,7 +2545,7 @@ int udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8d91f36cb11b..c14eaec64a0b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -107,7 +107,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } @@ -1482,15 +1482,18 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } @@ -1842,7 +1845,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7745d8a40209..2c8353509c4d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -941,7 +941,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1055,7 +1055,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); From 0e74b64011f6e37587b1fba2e2ba4370bb8a83de Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 26 Feb 2021 15:36:15 +0900 Subject: [PATCH 010/186] UPSTREAM: kbuild: do not include include/config/auto.conf from adjust_autoksyms.sh Commit cd195bc4775a ("kbuild: split adjust_autoksyms.sh in two parts") split out the code that needs include/config/auto.conf. This script no longer needs to include include/config/auto.conf. Bug: 254441685 Fixes: cd195bc4775a ("kbuild: split adjust_autoksyms.sh in two parts") Signed-off-by: Masahiro Yamada (cherry picked from commit 12e9dea6c9766c7403417d00193940cea33ee81a) Signed-off-by: Lee Jones Change-Id: I54234c36e36b496bb26b2ba38b679ce3fb91e447 --- scripts/adjust_autoksyms.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh index 2b366d945ccb..d8f6f9c63043 100755 --- a/scripts/adjust_autoksyms.sh +++ b/scripts/adjust_autoksyms.sh @@ -34,9 +34,6 @@ case "$KBUILD_VERBOSE" in ;; esac -# We need access to CONFIG_ symbols -. include/config/auto.conf - # Generate a new symbol list file $CONFIG_SHELL $srctree/scripts/gen_autoksyms.sh "$new_ksyms_file" From 7a81ba1e08dbad1fb48aeb17c79313a77babda45 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 29 Apr 2021 10:38:23 +0200 Subject: [PATCH 011/186] BACKPORT: arm64: meson: select COMMON_CLK This fix the recent removal of clock drivers selection. While it is not necessary to select the clock drivers themselves, we need to select a proper implementation of the clock API, which for the meson, is CCF Bug: 254441685 Fixes: ba66a25536dd ("arm64: meson: ship only the necessary clock controllers") Reviewed-by: Neil Armstrong Signed-off-by: Jerome Brunet Reviewed-by: Martin Blumenstingl Signed-off-by: Kevin Hilman Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210429083823.59546-1-jbrunet@baylibre.com (cherry picked from commit 4cce442ffe5448ef572adc8b3abe7001b398e709) Signed-off-by: Lee Jones Change-Id: I1a784ecea76c978599bb0e9c735bf39e802adcdf --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index e6a2f21ba3f8..6e6198fbf17b 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -152,6 +152,7 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" + select COMMON_CLK help This enables support for the arm64 based Amlogic SoCs such as the s905, S905X/D, S912, A113X/D or S905X/D2 From fcc95f9aa87fd956a58f0d18f5bb49bebe671d0a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 4 Jun 2021 12:20:22 -0700 Subject: [PATCH 012/186] UPSTREAM: clk: versatile: Depend on HAS_IOMEM kbuild robot reports that s390 fails to build this driver with COMPILE_TEST. Let's depend on HAS_IOMEM so that s390 doesn't try to build it. Bug: 254441685 Cc: Lee Jones Acked-by: Linus Walleij Reported-by: kernel test robot Fixes: 419b3ab6987f ("clk: versatile: remove dependency on ARCH_*") Link: https://lore.kernel.org/r/20210604192321.2594519-1-sboyd@kernel.org Signed-off-by: Stephen Boyd (cherry picked from commit 51c8b49762e497b6f72185af951b192e7678c61d) Signed-off-by: Lee Jones Change-Id: I7ca8d5d08894b5f18a6007b11152c498c6037162 --- drivers/clk/versatile/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/versatile/Kconfig b/drivers/clk/versatile/Kconfig index 792315d893db..481de5657d85 100644 --- a/drivers/clk/versatile/Kconfig +++ b/drivers/clk/versatile/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Clock driver for ARM Reference designs" + depends on HAS_IOMEM config ICST bool "Clock driver for ARM Reference designs ICST" From ab992b1bb8992f47639685752d3f657618edc3b9 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Thu, 15 Jul 2021 15:37:16 +0800 Subject: [PATCH 013/186] UPSTREAM: Kbuild: lto: fix module versionings mismatch in GNU make 3.X When building modules(CONFIG_...=m), I found some of module versions are incorrect and set to 0. This can be found in build log for first clean build which shows WARNING: EXPORT symbol "XXXX" [drivers/XXX/XXX.ko] version generation failed, symbol will not be versioned. But in second build(incremental build), the WARNING disappeared and the module version becomes valid CRC and make someone who want to change modules without updating kernel image can't insert their modules. The problematic code is + $(foreach n, $(filter-out FORCE,$^), \ + $(if $(wildcard $(n).symversions), \ + ; cat $(n).symversions >> $@.symversions)) For example: rm -f fs/notify/built-in.a.symversions ; rm -f fs/notify/built-in.a; \ llvm-ar cDPrST fs/notify/built-in.a fs/notify/fsnotify.o \ fs/notify/notification.o fs/notify/group.o ... `foreach n` shows nothing to `cat` into $(n).symversions because `if $(wildcard $(n).symversions)` return nothing, but actually they do exist during this line was executed. -rw-r--r-- 1 root root 168580 Jun 13 19:10 fs/notify/fsnotify.o -rw-r--r-- 1 root root 111 Jun 13 19:10 fs/notify/fsnotify.o.symversions The reason is the $(n).symversions are generated at runtime, but Makefile wildcard function expends and checks the file exist or not during parsing the Makefile. Thus fix this by use `test` shell command to check the file existence in runtime. Rebase from both: 1. [https://lore.kernel.org/lkml/20210616080252.32046-1-lecopzer.chen@mediatek.com/] 2. [https://lore.kernel.org/lkml/20210702032943.7865-1-lecopzer.chen@mediatek.com/] Bug: 254441685 Fixes: 38e891849003 ("kbuild: lto: fix module versioning") Co-developed-by: Sami Tolvanen Signed-off-by: Lecopzer Chen Signed-off-by: Masahiro Yamada (cherry picked from commit 1d11053dc63094075bf9e4809fffd3bb5e72f9a6) Signed-off-by: Lee Jones Change-Id: I8f4c2c1f27d47bf14c7a0150c00ac32b8df05980 --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 4963ee9f218b..1a4aa5edfd1e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -388,7 +388,7 @@ ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y) cmd_update_lto_symversions = \ rm -f $@.symversions \ $(foreach n, $(filter-out FORCE,$^), \ - $(if $(wildcard $(n).symversions), \ + $(if $(shell test -s $(n).symversions && echo y), \ ; cat $(n).symversions >> $@.symversions)) else cmd_update_lto_symversions = echo >/dev/null From 3586ade72bf40b04171c91897e6df22b91e25bb3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 21 Jul 2021 20:34:33 -0700 Subject: [PATCH 014/186] BACKPORT: scsi: ufs: Optimize serialization of setup_xfer_req() calls Reduce the number of times the host lock is taken in the hot path. Additionally, inline ufshcd_vops_setup_xfer_req() because that function is too short to keep it. Bug: 254441685 Link: https://lore.kernel.org/r/20210722033439.26550-13-bvanassche@acm.org Fixes: a45f937110fa ("scsi: ufs: Optimize host lock on transfer requests send/compl paths") Cc: Jaegeuk Kim Cc: Stanley Chu Cc: Can Guo Cc: Bean Huo Cc: Asutosh Das Reviewed-by: Daejun Park Reviewed-by: Bean Huo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen (cherry picked from commit a024ad0d49550e4ea1156e677cd9382e1eefbdd7) Signed-off-by: Lee Jones Change-Id: I84662337751a1930c34c32b1aba963652b54347c --- drivers/scsi/ufs/ufshcd.c | 3 ++- drivers/scsi/ufs/ufshcd.h | 12 ------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a611d05b644a..ea90dec13695 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2093,12 +2093,13 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) lrbp->issue_time_stamp = ktime_get(); lrbp->compl_time_stamp = ktime_set(0, 0); - ufshcd_vops_setup_xfer_req(hba, task_tag, (lrbp->cmd ? true : false)); trace_android_vh_ufs_send_command(hba, lrbp); ufshcd_add_command_trace(hba, task_tag, "send"); ufshcd_clk_scaling_start_busy(hba); if (unlikely(ufshcd_should_inform_monitor(hba, lrbp))) ufshcd_start_monitor(hba, lrbp); + if (hba->vops && hba->vops->setup_xfer_req) + hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd); if (ufshcd_has_utrlcnr(hba)) { set_bit(task_tag, &hba->outstanding_reqs); ufshcd_writel(hba, 1 << task_tag, diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 53678a8b80e5..6b80ac49908c 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1304,18 +1304,6 @@ static inline int ufshcd_vops_pwr_change_notify(struct ufs_hba *hba, return -ENOTSUPP; } -static inline void ufshcd_vops_setup_xfer_req(struct ufs_hba *hba, int tag, - bool is_scsi_cmd) -{ - if (hba->vops && hba->vops->setup_xfer_req) { - unsigned long flags; - - spin_lock_irqsave(hba->host->host_lock, flags); - hba->vops->setup_xfer_req(hba, tag, is_scsi_cmd); - spin_unlock_irqrestore(hba->host->host_lock, flags); - } -} - static inline void ufshcd_vops_setup_task_mgmt(struct ufs_hba *hba, int tag, u8 tm_function) { From b85a4aed4e821866a4e0a871ad5cfea0c08569a9 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 19 Aug 2021 19:04:15 -0700 Subject: [PATCH 015/186] UPSTREAM: mmflags.h: add missing __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON names printk("%pGg") outputs these two flags as hexadecimal number, rather than as a string, e.g: GFP_KERNEL|0x1800000 Fix this by adding missing names of __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON flags to __def_gfpflag_names. Bug: 254441685 Link: https://lkml.kernel.org/r/20210816133502.590-1-rppt@kernel.org Fixes: 013bb59dbb7c ("arm64: mte: handle tags zeroing at page allocation time") Fixes: c275c5c6d50a ("kasan: disable freed user page poisoning with HW tags") Signed-off-by: Mike Rapoport Cc: Peter Collingbourne Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b16ee0f9ed79fca2f2c31b13cac2ab9cf543525a) Signed-off-by: Lee Jones Change-Id: I889128b56d075cbbcc3c8da0f9e5c847f500c1fd --- include/trace/events/mmflags.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d428f0137c49..a26dbefdf294 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -48,7 +48,9 @@ {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ - {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\ + {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ + {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \ + {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\ #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ From a9370f7ce90f6632c920804dc0aaba295c333622 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 2 Sep 2021 14:49:13 +0200 Subject: [PATCH 016/186] UPSTREAM: dma-buf: DMABUF_SYSFS_STATS should depend on DMA_SHARED_BUFFER DMA-BUF sysfs statistics are an option of DMA-BUF. It does not make much sense to bother the user with a question about DMA-BUF sysfs statistics if DMA-BUF itself is not enabled. Worse, enabling the statistics enables the feature. Bug: 254441685 Fixes: bdb8d06dfefd666d ("dmabuf: Add the capability to expose DMA-BUF stats in sysfs") Signed-off-by: Geert Uytterhoeven Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20210902124913.2698760-4-geert@linux-m68k.org (cherry picked from commit 87fd9ef47597b2fcee3264eb5f288410b9f376d5) Signed-off-by: Lee Jones Change-Id: Ifb4470f6b013c573d7fb18a40ee8d428c3de94f4 --- drivers/dma-buf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index 8634c09036ac..3280ca972502 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -67,7 +67,7 @@ menuconfig DMABUF_HEAPS menuconfig DMABUF_SYSFS_STATS bool "DMA-BUF sysfs statistics" - select DMA_SHARED_BUFFER + depends on DMA_SHARED_BUFFER help Choose this option to enable DMA-BUF sysfs statistics in location /sys/kernel/dmabuf/buffers. From 8599a67e73ac2c4ab6aba2418885eb8ee613adac Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Sep 2021 15:43:23 -0700 Subject: [PATCH 017/186] UPSTREAM: kasan: fix Kconfig check of CC_HAS_WORKING_NOSANITIZE_ADDRESS In the main KASAN config option CC_HAS_WORKING_NOSANITIZE_ADDRESS is checked for instrumentation-based modes. However, if HAVE_ARCH_KASAN_HW_TAGS is true all modes may still be selected. To fix, also make the software modes depend on CC_HAS_WORKING_NOSANITIZE_ADDRESS. Bug: 254441685 Link: https://lkml.kernel.org/r/20210910084240.1215803-1-elver@google.com Fixes: 6a63a63ff1ac ("kasan: introduce CONFIG_KASAN_HW_TAGS") Signed-off-by: Marco Elver Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Aleksandr Nogikh Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit fa360beac4b62d54879a88b182afef4b369c9700) Signed-off-by: Lee Jones Change-Id: Ic343e59c3d02a2e231202ad4362b46dbad44b44c --- lib/Kconfig.kasan | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index a5ba06a86358..4e999807b6c7 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -59,6 +59,7 @@ choice config KASAN_GENERIC bool "Generic mode" depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC + depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB select CONSTRUCTORS help @@ -79,6 +80,7 @@ config KASAN_GENERIC config KASAN_SW_TAGS bool "Software tag-based mode" depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS + depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB select CONSTRUCTORS help From b3609875ce4937c5cd3e639c3ea41ed8d1e9ffb9 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 6 Oct 2021 10:49:10 -0700 Subject: [PATCH 018/186] UPSTREAM: f2fs: include non-compressed blocks in compr_written_block Need to include non-compressed blocks in compr_written_block to estimate average compression ratio more accurately. Bug: 254441685 Fixes: 5ac443e26a09 ("f2fs: add sysfs nodes to get runtime compression stat") Cc: stable@vger.kernel.org Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim (cherry picked from commit 09631cf3234d32156e7cae32275f5a4144c683c5) Signed-off-by: Lee Jones Change-Id: I6d241fbac3b03f4083457da8c4f079a59c80b42e --- fs/f2fs/compress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7a4abd303fa0..12c789f879a1 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1469,6 +1469,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, if (cluster_may_compress(cc)) { err = f2fs_compress_pages(cc); if (err == -EAGAIN) { + add_compr_block_stat(cc->inode, cc->cluster_size); goto write; } else if (err) { f2fs_put_rpages_wbc(cc, wbc, true, 1); From 8d34761975211bfabe783a432ec4da03a19df30b Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 22 Dec 2021 07:09:30 +0000 Subject: [PATCH 019/186] UPSTREAM: scsi: ufs: ufs-mediatek: Fix error checking in ufs_mtk_init_va09_pwr_ctrl() The function regulator_get() returns an error pointer. Use IS_ERR() to validate the return value. Bug: 254441685 Link: https://lore.kernel.org/r/20211222070930.9449-1-linmq006@gmail.com Fixes: cf137b3ea49a ("scsi: ufs-mediatek: Support VA09 regulator operations") Signed-off-by: Miaoqian Lin Signed-off-by: Martin K. Petersen (cherry picked from commit 3ba880a12df5aa4488c18281701b5b1bc3d4531a) Signed-off-by: Lee Jones Change-Id: Iccf8b09bb8f837139cb4bc7bfc21b29d3423d5b9 --- drivers/scsi/ufs/ufs-mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index ca0ebb2c9b96..99eec4d06dde 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -501,7 +501,7 @@ static void ufs_mtk_init_va09_pwr_ctrl(struct ufs_hba *hba) struct ufs_mtk_host *host = ufshcd_get_variant(hba); host->reg_va09 = regulator_get(hba->dev, "va09"); - if (!host->reg_va09) + if (IS_ERR(host->reg_va09)) dev_info(hba->dev, "failed to get va09"); else host->caps |= UFS_MTK_CAP_VA09_PWR_CTRL; From 01a7ade4e50c683d4e8b8eb6a044541e7dca0e93 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Tue, 4 Jan 2022 15:35:45 +0800 Subject: [PATCH 020/186] UPSTREAM: dma-buf: cma_heap: Fix mutex locking section Fix cma_heap_buffer mutex locking critical section to protect vmap_cnt and vaddr. Bug: 254441685 Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation") Signed-off-by: Weizhao Ouyang Acked-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20220104073545.124244-1-o451686892@gmail.com (cherry picked from commit 54329e6f7beea6af56c1230da293acc97d6a6ee7) Signed-off-by: Lee Jones Change-Id: Ie83cf515450576799364d2e7476a4ff7286ff271 --- drivers/dma-buf/heaps/cma_heap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index fd564aa70ee9..511d678ecbfc 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -126,10 +126,11 @@ static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, struct cma_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; + mutex_lock(&buffer->lock); + if (buffer->vmap_cnt) invalidate_kernel_vmap_range(buffer->vaddr, buffer->len); - mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { if (!a->mapped) continue; @@ -146,10 +147,11 @@ static int cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf, struct cma_heap_buffer *buffer = dmabuf->priv; struct dma_heap_attachment *a; + mutex_lock(&buffer->lock); + if (buffer->vmap_cnt) flush_kernel_vmap_range(buffer->vaddr, buffer->len); - mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { if (!a->mapped) continue; From 6c2c1020b4bf7fcef1aeda23090b899df2bf36fa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 Jan 2022 10:28:13 -0700 Subject: [PATCH 021/186] UPSTREAM: block: fix async_depth sysfs interface for mq-deadline A previous commit added this feature, but it inadvertently used the wrong variable to show/store the setting from/to, victimized by copy/paste. Fix it up so that the async_depth sysfs interface reads and writes from the right setting. Bug: 254441685 Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215485 Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe (cherry picked from commit 46cdc45acb089c811d9a54fd50af33b96e5fae9d) Signed-off-by: Lee Jones Change-Id: Ied032438e73915832175fdc9a5d985f118115523 --- block/mq-deadline-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/mq-deadline-main.c b/block/mq-deadline-main.c index f441bf5929be..5f1f75bddcf0 100644 --- a/block/mq-deadline-main.c +++ b/block/mq-deadline-main.c @@ -866,7 +866,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire); SHOW_INT(deadline_writes_starved_show, dd->writes_starved); SHOW_INT(deadline_front_merges_show, dd->front_merges); -SHOW_INT(deadline_async_depth_show, dd->front_merges); +SHOW_INT(deadline_async_depth_show, dd->async_depth); SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); #undef SHOW_INT #undef SHOW_JIFFIES @@ -896,7 +896,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX); STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); -STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX); +STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX); STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); #undef STORE_FUNCTION #undef STORE_INT From 64eedcefd44d40e524f49f12acdb4fa2e7043805 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jan 2022 18:42:07 +0000 Subject: [PATCH 022/186] UPSTREAM: KVM: arm64: Use shadow SPSR_EL1 when injecting exceptions on !VHE Injecting an exception into a guest with non-VHE is risky business. Instead of writing in the shadow register for the switch code to restore it, we override the CPU register instead. Which gets overriden a few instructions later by said restore code. The result is that although the guest correctly gets the exception, it will return to the original context in some random state, depending on what was there the first place... Boo. Fix the issue by writing to the shadow register. The original code is absolutely fine on VHE, as the state is already loaded, and writing to the shadow register in that case would actually be a bug. Bug: 254441685 Fixes: bb666c472ca2 ("KVM: arm64: Inject AArch64 exceptions from HYP") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Link: https://lore.kernel.org/r/20220121184207.423426-1-maz@kernel.org (cherry picked from commit 278583055a237270fac70518275ba877bf9e4013) Signed-off-by: Lee Jones Change-Id: I5337f3d80d29b798b3cd078fe93fac666c4ae00c --- arch/arm64/kvm/hyp/exception.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 73629094f903..0a99cb4e0c38 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { - write_sysreg_el1(val, SYS_SPSR); + if (has_vhe()) + write_sysreg_el1(val, SYS_SPSR); + else + __vcpu_sys_reg(vcpu, SPSR_EL1) = val; } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) From f120d14123dfe0c654e33e1f93707156c1f67165 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 13 Jan 2022 19:10:51 +0800 Subject: [PATCH 023/186] BACKPORT: dt-bindings: memory: mtk-smi: Rename clock to clocks The property "clock" should be rename to "clocks", and delete the "items", the minItems/maxItems should not be put under "items". Bug: 254441685 Fixes: 27bb0e42855a ("dt-bindings: memory: mediatek: Convert SMI to DT schema") Signed-off-by: Yong Wu Acked-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220113111057.29918-2-yong.wu@mediatek.com Signed-off-by: Krzysztof Kozlowski (cherry picked from commit 5bf7fa48374eafe29dbb30448a0b0c083853583f) Signed-off-by: Lee Jones Change-Id: I15dec8f5870fe225de78862cbff5eb10633839fb --- .../mediatek,smi-common.yaml | 21 ++++++++----------- .../memory-controllers/mediatek,smi-larb.yaml | 14 ++++++------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml index 56c78317f9b7..57b9d2344b8a 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.yaml @@ -82,10 +82,9 @@ allOf: - mediatek,mt2701-smi-common then: properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 3 + maxItems: 3 clock-names: items: - const: apb @@ -101,10 +100,9 @@ allOf: then: properties: - clock: - items: - minItems: 4 - maxItems: 4 + clocks: + minItems: 4 + maxItems: 4 clock-names: items: - const: apb @@ -114,10 +112,9 @@ allOf: else: # for gen2 HW that don't have gals properties: - clock: - items: - minItems: 2 - maxItems: 2 + clocks: + minItems: 2 + maxItems: 2 clock-names: items: - const: apb diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml index 06b623b34f48..22323db1f925 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml @@ -77,10 +77,9 @@ allOf: then: properties: - clock: - items: - minItems: 3 - maxItems: 3 + clocks: + minItems: 3 + maxItems: 3 clock-names: items: - const: apb @@ -89,10 +88,9 @@ allOf: else: properties: - clock: - items: - minItems: 2 - maxItems: 2 + clocks: + minItems: 2 + maxItems: 2 clock-names: items: - const: apb From 451971e07f90f1400da0324e618d75a342b32d9f Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 13 Jan 2022 19:10:52 +0800 Subject: [PATCH 024/186] BACKPORT: dt-bindings: memory: mtk-smi: No need mediatek,larb-id for mt8167 Mute the warning from "make dtbs_check": larb@14016000: 'mediatek,larb-id' is a required property arch/arm64/boot/dts/mediatek/mt8167-pumpkin.dt.yaml larb@15001000: 'mediatek,larb-id' is a required property arch/arm64/boot/dts/mediatek/mt8167-pumpkin.dt.yaml larb@16010000: 'mediatek,larb-id' is a required property arch/arm64/boot/dts/mediatek/mt8167-pumpkin.dt.yaml As the description of mediatek,larb-id, the property is only required when the larbid is not consecutive from its IOMMU point of view. Also, from the description of mediatek,larbs in Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml, all the larbs must sort by the larb index. In mt8167, there is only one IOMMU HW and three larbs. The drivers already know its larb index from the mediatek,larbs property of IOMMU, thus no need this property. Bug: 254441685 Fixes: 27bb0e42855a ("dt-bindings: memory: mediatek: Convert SMI to DT schema") Signed-off-by: Yong Wu Acked-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220113111057.29918-3-yong.wu@mediatek.com Signed-off-by: Krzysztof Kozlowski (cherry picked from commit ddc3a324889686ec9b358de20fdeec0d2668c7a8) Signed-off-by: Lee Jones Change-Id: I5e738d744f4bddb4f5aba34519117d0ee7f65d36 --- .../bindings/memory-controllers/mediatek,smi-larb.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml index 22323db1f925..957a6c3d3e3c 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml @@ -104,7 +104,6 @@ allOf: - mediatek,mt2701-smi-larb - mediatek,mt2712-smi-larb - mediatek,mt6779-smi-larb - - mediatek,mt8167-smi-larb then: required: From 3630e052b5e172fdcb59c45f0366e8dae3e7bb4f Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 13 Jan 2022 19:10:53 +0800 Subject: [PATCH 025/186] UPSTREAM: dt-bindings: memory: mtk-smi: Correct minItems to 2 for the gals clocks Mute the warning from "make dtbs_check": larb@14017000: clock-names: ['apb', 'smi'] is too short arch/arm64/boot/dts/mediatek/mt8183-evb.dt.yaml arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-burnet.dt.yaml ... larb@16010000: clock-names: ['apb', 'smi'] is too short arch/arm64/boot/dts/mediatek/mt8183-evb.dt.yaml arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-burnet.dt.yaml larb@17010000: clock-names: ['apb', 'smi'] is too short arch/arm64/boot/dts/mediatek/mt8183-evb.dt.yaml arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi-burnet.dt.yaml If a platform's larb supports gals, there will be some larbs have one more "gals" clock while the others still only need "apb"/"smi" clocks, then the minItems for clocks and clock-names are 2. Bug: 254441685 Fixes: 27bb0e42855a ("dt-bindings: memory: mediatek: Convert SMI to DT schema") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20220113111057.29918-4-yong.wu@mediatek.com Signed-off-by: Krzysztof Kozlowski (cherry picked from commit 996ebc0e332bfb3091395f9bd286d8349a57be62) Signed-off-by: Lee Jones Change-Id: Ied296442213e9a63eb77e7fc09303d09f32d5938 --- .../bindings/memory-controllers/mediatek,smi-larb.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml index 957a6c3d3e3c..2335a52f6f5a 100644 --- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml @@ -78,9 +78,10 @@ allOf: then: properties: clocks: - minItems: 3 + minItems: 2 maxItems: 3 clock-names: + minItems: 2 items: - const: apb - const: smi From 044793f5af6291a402e7aee1c111f8546d610442 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 13 Jan 2022 17:29:43 +0800 Subject: [PATCH 026/186] UPSTREAM: usb: typec: tcpci: don't touch CC line if it's Vconn source With the AMS and Collision Avoidance, tcpm often needs to change the CC's termination. When one CC line is sourcing Vconn, if we still change its termination, the voltage of the another CC line is likely to be fluctuant and unstable. Therefore, we should verify whether a CC line is sourcing Vconn before changing its termination and only change the termination that is not a Vconn line. This can be done by reading the Vconn Present bit of POWER_ STATUS register. To determine the polarity, we can read the Plug Orientation bit of TCPC_CONTROL register. Since Vconn can only be sourced if Plug Orientation is set. Bug: 254441685 Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20220113092943.752372-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5638b0dfb6921f69943c705383ff40fb64b987f2) Signed-off-by: Lee Jones Change-Id: I7f9f7f22007e9270da3d90e360de793d78027f1d --- drivers/usb/typec/tcpm/tcpci.c | 26 ++++++++++++++++++++++++++ drivers/usb/typec/tcpm/tcpci.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index fb9c14d0d584..17cea37c73fd 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -85,9 +85,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) { struct tcpci *tcpci = tcpc_to_tcpci(tcpc); + bool vconn_pres; + enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1; unsigned int reg; int ret; + ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, ®); + if (ret < 0) + return ret; + + vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES); + if (vconn_pres) { + ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, ®); + if (ret < 0) + return ret; + + if (reg & TCPC_TCPC_CTRL_ORIENTATION) + polarity = TYPEC_POLARITY_CC2; + } + switch (cc) { case TYPEC_CC_RA: reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) | @@ -122,6 +138,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) break; } + if (vconn_pres) { + if (polarity == TYPEC_POLARITY_CC2) { + reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT); + } else { + reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT); + } + } + ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg); if (ret < 0) return ret; diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h index 2be7a77d400e..b2edd45f13c6 100644 --- a/drivers/usb/typec/tcpm/tcpci.h +++ b/drivers/usb/typec/tcpm/tcpci.h @@ -98,6 +98,7 @@ #define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4) #define TCPC_POWER_STATUS_VBUS_DET BIT(3) #define TCPC_POWER_STATUS_VBUS_PRES BIT(2) +#define TCPC_POWER_STATUS_VCONN_PRES BIT(1) #define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) #define TCPC_FAULT_STATUS 0x1f From c5027380a1179775ea26ac1f109fa03b066d21b9 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 21 Jan 2022 17:55:20 -0800 Subject: [PATCH 027/186] UPSTREAM: usb: typec: tcpm: Do not disconnect when receiving VSAFE0V With some chargers, vbus might momentarily raise above VSAFE5V and fall back to 0V causing VSAFE0V to be triggered. This will will report a VBUS off event causing TCPM to transition to SNK_UNATTACHED state where it should be waiting in either SNK_ATTACH_WAIT or SNK_DEBOUNCED state. This patch makes TCPM avoid VSAFE0V events while in SNK_ATTACH_WAIT or SNK_DEBOUNCED state. Stub from the spec: "4.5.2.2.4.2 Exiting from AttachWait.SNK State A Sink shall transition to Unattached.SNK when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce. A DRP shall transition to Unattached.SRC when the state of both the CC1 and CC2 pins is SNK.Open for at least tPDDebounce." [23.194131] CC1: 0 -> 0, CC2: 0 -> 5 [state SNK_UNATTACHED, polarity 0, connected] [23.201777] state change SNK_UNATTACHED -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [23.209949] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [23.300579] VBUS off [23.300668] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [rev3 NONE_AMS] [23.301014] VBUS VSAFE0V [23.301111] Start toggling Bug: 254441685 Fixes: 28b43d3d746b8 ("usb: typec: tcpm: Introduce vsafe0v for vbus") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20220122015520.332507-2-badhri@google.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 746f96e7d6f7a276726860f696671766bfb24cf0) Signed-off-by: Lee Jones Change-Id: Id5661d224fbd49acd39b646cca2a5c328128eb4b --- drivers/usb/typec/tcpm/tcpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 9724311c5fa2..dc0d3a48894a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5354,6 +5354,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) case PR_SWAP_SNK_SRC_SOURCE_ON: /* Do nothing, vsafe0v is expected during transition */ break; + case SNK_ATTACH_WAIT: + case SNK_DEBOUNCED: + /*Do nothing, still waiting for VSAFE5V for connect */ + break; default: if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled) tcpm_set_state(port, SNK_UNATTACHED, 0); From a5642af23478dbf814b5a93e79d361d6debd4fb0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Feb 2022 14:48:38 +0000 Subject: [PATCH 028/186] BACKPORT: arm64: Enable Cortex-A510 erratum 2051678 by default The recently added configuration option for Cortex A510 erratum 2051678 does not have a "default y" unlike other errata fixes. This appears to simply be an oversight since the help text suggests enabling the option if unsure and there's nothing in the commit log to suggest it is intentional. Bug: 254441685 Fixes: 297ae1eb23b0 ("arm64: cpufeature: List early Cortex-A510 parts as having broken dbm") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220201144838.20037-1-broonie@kernel.org Signed-off-by: Catalin Marinas (cherry picked from commit a4b92cebc31d49b7e6ef0ce584c7f2a2e112877d) Signed-off-by: Lee Jones Change-Id: I332b36ee28820c08b03474a38c3a2a5532fe3818 --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c94121e06222..1196fe7686a7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -671,6 +671,7 @@ config ARM64_ERRATUM_1508412 config ARM64_ERRATUM_2051678 bool "Cortex-A510: 2051678: disable Hardware Update of the page table's dirty bit" + default y help This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678. Affected Coretex-A510 might not respect the ordering rules for From 62e7b5a95c881357944cbc97d7e3bbc54a29e8cd Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 27 Jan 2022 12:20:50 +0000 Subject: [PATCH 029/186] UPSTREAM: KVM: arm64: Avoid consuming a stale esr value when SError occur When any exception other than an IRQ occurs, the CPU updates the ESR_EL2 register with the exception syndrome. An SError may also become pending, and will be synchronised by KVM. KVM notes the exception type, and whether an SError was synchronised in exit_code. When an exception other than an IRQ occurs, fixup_guest_exit() updates vcpu->arch.fault.esr_el2 from the hardware register. When an SError was synchronised, the vcpu esr value is used to determine if the exception was due to an HVC. If so, ELR_EL2 is moved back one instruction. This is so that KVM can process the SError first, and re-execute the HVC if the guest survives the SError. But if an IRQ synchronises an SError, the vcpu's esr value is stale. If the previous non-IRQ exception was an HVC, KVM will corrupt ELR_EL2, causing an unrelated guest instruction to be executed twice. Check ARM_EXCEPTION_CODE() before messing with ELR_EL2, IRQs don't update this register so don't need to check. Bug: 254441685 Fixes: defe21f49bc9 ("KVM: arm64: Move PC rollback on SError to HYP") Cc: stable@vger.kernel.org Reported-by: Steven Price Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127122052.1584324-3-james.morse@arm.com (cherry picked from commit 1c71dbc8a179d99dd9bb7e7fc1888db613cf85de) Signed-off-by: Lee Jones Change-Id: I6bdda1456cbcaf7697f09839f9ba8b4b5f2672bd --- arch/arm64/kvm/hyp/include/hyp/switch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 16807a46ab9f..a22555212681 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -423,7 +423,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - if (ARM_SERROR_PENDING(*exit_code)) { + if (ARM_SERROR_PENDING(*exit_code) && + ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) { u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); /* From 831851070882b76bf182424c12f7f49617c0c577 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 27 Jan 2022 12:20:51 +0000 Subject: [PATCH 030/186] UPSTREAM: KVM: arm64: Stop handle_exit() from handling HVC twice when an SError occurs Prior to commit defe21f49bc9 ("KVM: arm64: Move PC rollback on SError to HYP"), when an SError is synchronised due to another exception, KVM handles the SError first. If the guest survives, the instruction that triggered the original exception is re-exectued to handle the first exception. HVC is treated as a special case as the instruction wouldn't normally be re-exectued, as its not a trap. Commit defe21f49bc9 didn't preserve the behaviour of the 'return 1' that skips the rest of handle_exit(). Since commit defe21f49bc9, KVM will try to handle the SError and the original exception at the same time. When the exception was an HVC, fixup_guest_exit() has already rolled back ELR_EL2, meaning if the guest has virtual SError masked, it will execute and handle the HVC twice. Restore the original behaviour. Bug: 254441685 Fixes: defe21f49bc9 ("KVM: arm64: Move PC rollback on SError to HYP") Cc: stable@vger.kernel.org Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127122052.1584324-4-james.morse@arm.com (cherry picked from commit 1229630af88620f6e3a621a1ebd1ca14d9340df7) Signed-off-by: Lee Jones Change-Id: Ie3ba170f6aa453c8e1ae2941d1baf8e9340b300b --- arch/arm64/kvm/handle_exit.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index cebe39f3b1b6..cb0054ea7332 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -240,6 +240,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; + if (ARM_SERROR_PENDING(exception_index)) { + /* + * The SError is handled by handle_exit_early(). If the guest + * survives it will re-execute the original instruction. + */ + return 1; + } + exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { From bbb0dfa3773f263c5f99e0ea0684ad2074383ce9 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Mon, 14 Feb 2022 18:56:43 +0100 Subject: [PATCH 031/186] UPSTREAM: arm64: Correct wrong label in macro __init_el2_gicv3 In commit: 114945d84a30a5fe ("arm64: Fix labels in el2_setup macros") We renamed a label from '1' to '.Lskip_gicv3_\@', but failed to update a branch to it, which now targets a later label also called '1'. The branch is taken rarely, when GICv3 is present but SRE is disabled at EL3, causing a boot-time crash. Update the caller to the new label name. Bug: 254441685 Fixes: 114945d84a30 ("arm64: Fix labels in el2_setup macros") Cc: # 5.12.x Signed-off-by: Joakim Tjernlund Link: https://lore.kernel.org/r/20220214175643.21931-1-joakim.tjernlund@infinera.com Reviewed-by: Mark Rutland Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 4f6de676d94ee8ddfc2e7e7cd935fc7cb2feff3a) Signed-off-by: Lee Jones Change-Id: I0df2028be4bee2d31a5f1eb0bde473e5c048cc6b --- arch/arm64/include/asm/el2_setup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index bda918948471..692b234255e5 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -107,7 +107,7 @@ msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 1f // and check that it sticks + tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm From 045574171642fe8b5611b8c04f643a057b346aa5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Feb 2022 19:21:12 -0800 Subject: [PATCH 032/186] BACKPORT: net: add skb_set_end_offset() helper We have multiple places where this helper is convenient, and plan using it in the following patch. Bug: 254441685 Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski (cherry picked from commit 763087dab97547230a6807c865a6a5ae53a59247) [Lee: Solves a dependency for the next Fixes: patch] Signed-off-by: Lee Jones Change-Id: I044e7020f9d0b85073253fbd4629cf97bbd38736 --- include/linux/skbuff.h | 10 ++++++++++ net/core/skbuff.c | 17 ++++------------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a02503b3637..2114fc0303f3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1417,6 +1417,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1427,6 +1432,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; +} #endif /* Internal */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fc36c8eddbf7..b0f4a79f36ee 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1676,11 +1676,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->head = data; skb->head_frag = 0; skb->data += off; + + skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; off = nhead; -#else - skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); @@ -5975,11 +5974,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb->head = data; skb->data = data; skb->head_frag = 0; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -6117,11 +6112,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb->head = data; skb->head_frag = 0; skb->data = data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; From 712eaeb09ca8f37fe6a43d25775631122097198f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Feb 2022 19:21:13 -0800 Subject: [PATCH 033/186] UPSTREAM: net: preserve skb_end_offset() in skb_unclone_keeptruesize() syzbot found another way to trigger the infamous WARN_ON_ONCE(delta < len) in skb_try_coalesce() [1] I was able to root cause the issue to kfence. When kfence is in action, the following assertion is no longer true: int size = xxxx; void *ptr1 = kmalloc(size, gfp); void *ptr2 = kmalloc(size, gfp); if (ptr1 && ptr2) ASSERT(ksize(ptr1) == ksize(ptr2)); We attempted to fix these issues in the blamed commits, but forgot that TCP was possibly shifting data after skb_unclone_keeptruesize() has been used, notably from tcp_retrans_try_collapse(). So we not only need to keep same skb->truesize value, we also need to make sure TCP wont fill new tailroom that pskb_expand_head() was able to get from a addr = kmalloc(...) followed by ksize(addr) Split skb_unclone_keeptruesize() into two parts: 1) Inline skb_unclone_keeptruesize() for the common case, when skb is not cloned. 2) Out of line __skb_unclone_keeptruesize() for the 'slow path'. WARNING: CPU: 1 PID: 6490 at net/core/skbuff.c:5295 skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295 Modules linked in: CPU: 1 PID: 6490 Comm: syz-executor161 Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295 Code: bf 01 00 00 00 0f b7 c0 89 c6 89 44 24 20 e8 62 24 4e fa 8b 44 24 20 83 e8 01 0f 85 e5 f0 ff ff e9 87 f4 ff ff e8 cb 20 4e fa <0f> 0b e9 06 f9 ff ff e8 af b2 95 fa e9 69 f0 ff ff e8 95 b2 95 fa RSP: 0018:ffffc900063af268 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 00000000ffffffd5 RCX: 0000000000000000 RDX: ffff88806fc05700 RSI: ffffffff872abd55 RDI: 0000000000000003 RBP: ffff88806e675500 R08: 00000000ffffffd5 R09: 0000000000000000 R10: ffffffff872ab659 R11: 0000000000000000 R12: ffff88806dd554e8 R13: ffff88806dd9bac0 R14: ffff88806dd9a2c0 R15: 0000000000000155 FS: 00007f18014f9700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002000 CR3: 000000006be7a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_try_coalesce net/ipv4/tcp_input.c:4651 [inline] tcp_try_coalesce+0x393/0x920 net/ipv4/tcp_input.c:4630 tcp_queue_rcv+0x8a/0x6e0 net/ipv4/tcp_input.c:4914 tcp_data_queue+0x11fd/0x4bb0 net/ipv4/tcp_input.c:5025 tcp_rcv_established+0x81e/0x1ff0 net/ipv4/tcp_input.c:5947 tcp_v4_do_rcv+0x65e/0x980 net/ipv4/tcp_ipv4.c:1719 sk_backlog_rcv include/net/sock.h:1037 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2779 release_sock+0x54/0x1b0 net/core/sock.c:3311 sk_wait_data+0x177/0x450 net/core/sock.c:2821 tcp_recvmsg_locked+0xe28/0x1fd0 net/ipv4/tcp.c:2457 tcp_recvmsg+0x137/0x610 net/ipv4/tcp.c:2572 inet_recvmsg+0x11b/0x5e0 net/ipv4/af_inet.c:850 sock_recvmsg_nosec net/socket.c:948 [inline] sock_recvmsg net/socket.c:966 [inline] sock_recvmsg net/socket.c:962 [inline] ____sys_recvmsg+0x2c4/0x600 net/socket.c:2632 ___sys_recvmsg+0x127/0x200 net/socket.c:2674 __sys_recvmsg+0xe2/0x1a0 net/socket.c:2704 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Bug: 254441685 Fixes: c4777efa751d ("net: add and use skb_unclone_keeptruesize() helper") Fixes: 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Marco Elver Signed-off-by: Jakub Kicinski (cherry picked from commit 2b88cba55883eaafbc9b7cbff0b2c7cdba71ed01) Signed-off-by: Lee Jones Change-Id: I9c25fea9153c553d7105ea73f7aaf486d00804db --- include/linux/skbuff.h | 18 +++++++++--------- net/core/skbuff.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2114fc0303f3..2b9dc0041c22 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1656,19 +1656,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); - if (skb_cloned(skb)) { - unsigned int save = skb->truesize; - int res; - - res = pskb_expand_head(skb, 0, 0, pri); - skb->truesize = save; - return res; - } + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); return 0; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b0f4a79f36ee..6d8f347f98e5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1727,6 +1727,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } EXPORT_SYMBOL(skb_realloc_headroom); +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + unsigned int saved_end_offset, saved_truesize; + struct skb_shared_info *shinfo; + int res; + + saved_end_offset = skb_end_offset(skb); + saved_truesize = skb->truesize; + + res = pskb_expand_head(skb, 0, 0, pri); + if (res) + return res; + + skb->truesize = saved_truesize; + + if (likely(skb_end_offset(skb) == saved_end_offset)) + return 0; + + shinfo = skb_shinfo(skb); + + /* We are about to change back skb->end, + * we need to move skb_shinfo() to its new location. + */ + memmove(skb->head + saved_end_offset, + shinfo, + offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); + + skb_set_end_offset(skb, saved_end_offset); + + return 0; +} + /** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy From 83ed3e2c4a2095d2f49efc7b4e192c0bb1c90b96 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Feb 2022 09:49:59 +0100 Subject: [PATCH 034/186] BACKPORT: vsprintf: Fix %pK with kptr_restrict == 0 Although kptr_restrict is set to 0 and the kernel is booted with no_hash_pointers parameter, the content of /proc/vmallocinfo is lacking the real addresses. / # cat /proc/vmallocinfo 0x(ptrval)-0x(ptrval) 8192 load_module+0xc0c/0x2c0c pages=1 vmalloc 0x(ptrval)-0x(ptrval) 12288 start_kernel+0x4e0/0x690 pages=2 vmalloc 0x(ptrval)-0x(ptrval) 12288 start_kernel+0x4e0/0x690 pages=2 vmalloc 0x(ptrval)-0x(ptrval) 8192 _mpic_map_mmio.constprop.0+0x20/0x44 phys=0x80041000 ioremap 0x(ptrval)-0x(ptrval) 12288 _mpic_map_mmio.constprop.0+0x20/0x44 phys=0x80041000 ioremap ... According to the documentation for /proc/sys/kernel/, %pK is equivalent to %p when kptr_restrict is set to 0. Bug: 254441685 Fixes: 5ead723a20e0 ("lib/vsprintf: no_hash_pointers prints all addresses as unhashed") Signed-off-by: Christophe Leroy Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/107476128e59bff11a309b5bf7579a1753a41aca.1645087605.git.christophe.leroy@csgroup.eu (cherry picked from commit 84842911322fc6a02a03ab9e728a48c691fe3efd) Signed-off-by: Lee Jones Change-Id: I5ac3ae8796559613cedf9a259d39b99765d7165a --- .../admin-guide/kernel-parameters.txt | 3 +- lib/vsprintf.c | 36 +++++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f7c921b029f6..97f84a6cf3fb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3416,8 +3416,7 @@ difficult since unequal pointers can no longer be compared. However, if this command-line option is specified, then all normal pointers will have their true - value printed. Pointers printed via %pK may still be - hashed. This option should only be specified when + value printed. This option should only be specified when debugging the kernel. Please do not use on production kernels. diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 93293eb04df9..5a1bf311b913 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -53,6 +53,10 @@ #include #include "kstrtox.h" +/* Disable pointer hashing if requested */ +bool no_hash_pointers __ro_after_init; +EXPORT_SYMBOL_GPL(no_hash_pointers); + static unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base) { @@ -849,6 +853,19 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } +static char *default_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + /* + * default is to _not_ leak addresses, so hash before printing, + * unless no_hash_pointers is specified on the command line. + */ + if (unlikely(no_hash_pointers)) + return pointer_string(buf, end, ptr, spec); + + return ptr_to_id(buf, end, ptr, spec); +} + int kptr_restrict __read_mostly; static noinline_for_stack @@ -858,7 +875,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, switch (kptr_restrict) { case 0: /* Handle as %p, hash and do _not_ leak addresses. */ - return ptr_to_id(buf, end, ptr, spec); + return default_pointer(buf, end, ptr, spec); case 1: { const struct cred *cred; @@ -2118,10 +2135,6 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } -/* Disable pointer hashing if requested */ -bool no_hash_pointers __ro_after_init; -EXPORT_SYMBOL_GPL(no_hash_pointers); - static int __init no_hash_pointers_enable(char *str) { no_hash_pointers = true; @@ -2339,7 +2352,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'e': /* %pe with a non-ERR_PTR gets treated as plain %p */ if (!IS_ERR(ptr)) - break; + return default_pointer(buf, end, ptr, spec); return err_ptr(buf, end, ptr, spec); case 'u': case 'k': @@ -2349,16 +2362,9 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, default: return error_string(buf, end, "(einval)", spec); } + default: + return default_pointer(buf, end, ptr, spec); } - - /* - * default is to _not_ leak addresses, so hash before printing, - * unless no_hash_pointers is specified on the command line. - */ - if (unlikely(no_hash_pointers)) - return pointer_string(buf, end, ptr, spec); - else - return ptr_to_id(buf, end, ptr, spec); } /* From f55885db0cb5a887cceed753486c6a73dea95016 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 25 Feb 2022 19:11:05 -0800 Subject: [PATCH 035/186] UPSTREAM: mm: fix use-after-free bug when mm->mmap is reused after being freed oom reaping (__oom_reap_task_mm) relies on a 2 way synchronization with exit_mmap. First it relies on the mmap_lock to exclude from unlock path[1], page tables tear down (free_pgtables) and vma destruction. This alone is not sufficient because mm->mmap is never reset. For historical reasons[2] the lock is taken there is also MMF_OOM_SKIP set for oom victims before. The oom reaper only ever looks at oom victims so the whole scheme works properly but process_mrelease can opearate on any task (with fatal signals pending) which doesn't really imply oom victims. That means that the MMF_OOM_SKIP part of the synchronization doesn't work and it can see a task after the whole address space has been demolished and traverse an already released mm->mmap list. This leads to use after free as properly caught up by KASAN report. Fix the issue by reseting mm->mmap so that MMF_OOM_SKIP synchronization is not needed anymore. The MMF_OOM_SKIP is not removed from exit_mmap yet but it acts mostly as an optimization now. [1] 27ae357fa82b ("mm, oom: fix concurrent munlock and oom reaper unmap, v3") [2] 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently") [mhocko@suse.com: changelog rewrite] Bug: 254441685 Link: https://lore.kernel.org/all/00000000000072ef2c05d7f81950@google.com/ Link: https://lkml.kernel.org/r/20220215201922.1908156-1-surenb@google.com Fixes: 64591e8605d6 ("mm: protect free_pgtables with mmap_lock write lock in exit_mmap") Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+2ccf63a4bd07cf39cab0@syzkaller.appspotmail.com Suggested-by: Michal Hocko Reviewed-by: Rik van Riel Reviewed-by: Yang Shi Acked-by: Michal Hocko Cc: David Rientjes Cc: Matthew Wilcox Cc: Johannes Weiner Cc: Roman Gushchin Cc: Rik van Riel Cc: Minchan Kim Cc: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Christian Brauner Cc: Christoph Hellwig Cc: Oleg Nesterov Cc: David Hildenbrand Cc: Jann Horn Cc: Shakeel Butt Cc: Andy Lutomirski Cc: Christian Brauner Cc: Florian Weimer Cc: Jan Engelhardt Cc: Tim Murray Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f798a1d4f94de9510e060d37b9b47721065a957c) Signed-off-by: Lee Jones Change-Id: Ief9a3aa104ff0aff20062eb4a874bd5495a124e5 --- mm/mmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/mmap.c b/mm/mmap.c index 8741f27e81a9..1ec4cea3cac9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3343,6 +3343,7 @@ void exit_mmap(struct mm_struct *mm) vma = remove_vma(vma); cond_resched(); } + mm->mmap = NULL; mmap_write_unlock(mm); vm_unacct_memory(nr_accounted); } From ae9da95de3ae8fed6c3cc19b4fe9fd34e704081b Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 25 Feb 2022 14:48:15 +0800 Subject: [PATCH 036/186] UPSTREAM: PM: domains: Fix sleep-in-atomic bug caused by genpd_debug_remove() When a genpd with GENPD_FLAG_IRQ_SAFE gets removed, the following sleep-in-atomic bug will be seen, as genpd_debug_remove() will be called with a spinlock being held. [ 0.029183] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1460 [ 0.029204] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 1, name: swapper/0 [ 0.029219] preempt_count: 1, expected: 0 [ 0.029230] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc4+ #489 [ 0.029245] Hardware name: Thundercomm TurboX CM2290 (DT) [ 0.029256] Call trace: [ 0.029265] dump_backtrace.part.0+0xbc/0xd0 [ 0.029285] show_stack+0x3c/0xa0 [ 0.029298] dump_stack_lvl+0x7c/0xa0 [ 0.029311] dump_stack+0x18/0x34 [ 0.029323] __might_resched+0x10c/0x13c [ 0.029338] __might_sleep+0x4c/0x80 [ 0.029351] down_read+0x24/0xd0 [ 0.029363] lookup_one_len_unlocked+0x9c/0xcc [ 0.029379] lookup_positive_unlocked+0x10/0x50 [ 0.029392] debugfs_lookup+0x68/0xac [ 0.029406] genpd_remove.part.0+0x12c/0x1b4 [ 0.029419] of_genpd_remove_last+0xa8/0xd4 [ 0.029434] psci_cpuidle_domain_probe+0x174/0x53c [ 0.029449] platform_probe+0x68/0xe0 [ 0.029462] really_probe+0x190/0x430 [ 0.029473] __driver_probe_device+0x90/0x18c [ 0.029485] driver_probe_device+0x40/0xe0 [ 0.029497] __driver_attach+0xf4/0x1d0 [ 0.029508] bus_for_each_dev+0x70/0xd0 [ 0.029523] driver_attach+0x24/0x30 [ 0.029534] bus_add_driver+0x164/0x22c [ 0.029545] driver_register+0x78/0x130 [ 0.029556] __platform_driver_register+0x28/0x34 [ 0.029569] psci_idle_init_domains+0x1c/0x28 [ 0.029583] do_one_initcall+0x50/0x1b0 [ 0.029595] kernel_init_freeable+0x214/0x280 [ 0.029609] kernel_init+0x2c/0x13c [ 0.029622] ret_from_fork+0x10/0x20 It doesn't seem necessary to call genpd_debug_remove() with the lock, so move it out from locking to fix the problem. Bug: 254441685 Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains") Signed-off-by: Shawn Guo Reviewed-by: Ulf Hansson Cc: 5.11+ # 5.11+ Signed-off-by: Rafael J. Wysocki (cherry picked from commit f6bfe8b5b2c2a5ac8bd2fc7bca3706e6c3fc26d8) Signed-off-by: Lee Jones Change-Id: If863989ededdf0d3368921fe66d1eaf6bb65c09a --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index b1974d3e54a6..bc695f880a22 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2052,9 +2052,9 @@ static int genpd_remove(struct generic_pm_domain *genpd) kfree(link); } - genpd_debug_remove(genpd); list_del(&genpd->gpd_list_node); genpd_unlock(genpd); + genpd_debug_remove(genpd); cancel_work_sync(&genpd->power_off_work); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); From 9bb85986aabfec573f9a607179d863901f3c8f3f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Feb 2022 18:10:28 +0000 Subject: [PATCH 037/186] UPSTREAM: arm64: prevent instrumentation of bp hardening callbacks We may call arm64_apply_bp_hardening() early during entry (e.g. in el0_ia()) before it is safe to run instrumented code. Unfortunately this may result in running instrumented code in two cases: * The hardening callbacks called by arm64_apply_bp_hardening() are not marked as `noinstr`, and have been observed to be instrumented when compiled with either GCC or LLVM. * Since arm64_apply_bp_hardening() itself is only marked as `inline` rather than `__always_inline`, it is possible that the compiler decides to place it out-of-line, whereupon it may be instrumented. For example, with defconfig built with clang 13.0.0, call_hvc_arch_workaround_1() is compiled as: | : | d503233f paciasp | f81f0ffe str x30, [sp, #-16]! | 320183e0 mov w0, #0x80008000 | d503201f nop | d4000002 hvc #0x0 | f84107fe ldr x30, [sp], #16 | d50323bf autiasp | d65f03c0 ret ... but when CONFIG_FTRACE=y and CONFIG_KCOV=y this is compiled as: | : | d503245f bti c | d503201f nop | d503201f nop | d503233f paciasp | a9bf7bfd stp x29, x30, [sp, #-16]! | 910003fd mov x29, sp | 94000000 bl 0 <__sanitizer_cov_trace_pc> | 320183e0 mov w0, #0x80008000 | d503201f nop | d4000002 hvc #0x0 | a8c17bfd ldp x29, x30, [sp], #16 | d50323bf autiasp | d65f03c0 ret ... with a patchable function entry registered with ftrace, and a direct call to __sanitizer_cov_trace_pc(). Neither of these are safe early during entry sequences. This patch avoids the unsafe instrumentation by marking arm64_apply_bp_hardening() as `__always_inline` and by marking the hardening functions as `noinstr`. This avoids the potential for instrumentation, and causes clang to consistently generate the function as with the defconfig sample. Note: in the defconfig compilation, when CONFIG_SVE=y, x30 is spilled to the stack without being placed in a frame record, which will result in a missing entry if call_hvc_arch_workaround_1() is backtraced. Similar is true of qcom_link_stack_sanitisation(), where inline asm spills the LR to a GPR prior to corrupting it. This is not a significant issue presently as we will only backtrace here if an exception is taken, and in such cases we may omit entries for other reasons today. The relevant hardening functions were introduced in commits: ec82b567a74fbdff ("arm64: Implement branch predictor hardening for Falkor") b092201e00206141 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support") ... and these were subsequently moved in commit: d4647f0a2ad71110 ("arm64: Rewrite Spectre-v2 mitigation code") The arm64_apply_bp_hardening() function was introduced in commit: 0f15adbb2861ce6f ("arm64: Add skeleton to harden the branch predictor against aliasing attacks") ... and was subsequently moved and reworked in commit: 6279017e807708a0 ("KVM: arm64: Move BP hardening helpers into spectre.h") Bug: 254441685 Fixes: ec82b567a74fbdff ("arm64: Implement branch predictor hardening for Falkor") Fixes: b092201e00206141 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support") Fixes: d4647f0a2ad71110 ("arm64: Rewrite Spectre-v2 mitigation code") Fixes: 0f15adbb2861ce6f ("arm64: Add skeleton to harden the branch predictor against aliasing attacks") Fixes: 6279017e807708a0 ("KVM: arm64: Move BP hardening helpers into spectre.h") Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Acked-by: Marc Zyngier Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220224181028.512873-1-mark.rutland@arm.com Signed-off-by: Will Deacon (cherry picked from commit 614c0b9fee711dd89b1dd65c88ba83612a373fdc) Signed-off-by: Lee Jones Change-Id: I0515fc8f9e2c5f0406fd66ce6d9b1c82aab2c37e --- arch/arm64/include/asm/spectre.h | 3 ++- arch/arm64/kernel/proton-pack.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index c04d01dd4457..71060c1c23fe 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -67,7 +67,8 @@ struct bp_hardening_data { DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -static inline void arm64_apply_bp_hardening(void) +/* Called during entry so must be __always_inline */ +static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 3c35f9376d3d..40be3a7c2c53 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -233,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn) __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -static void call_smc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void call_hvc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_hvc_arch_workaround_1(void) { arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void qcom_link_stack_sanitisation(void) +/* Called during entry so must be noinstr */ +static noinstr void qcom_link_stack_sanitisation(void) { u64 tmp; From 3c6f11550b7ab49bdf53b6f73f82a0aea08a72dd Mon Sep 17 00:00:00 2001 From: Paul Semel Date: Tue, 8 Mar 2022 10:30:58 +0100 Subject: [PATCH 038/186] UPSTREAM: arm64: kasan: fix include error in MTE functions Fix `error: expected string literal in 'asm'`. This happens when compiling an ebpf object file that includes `net/net_namespace.h` from linux kernel headers. Include trace: include/net/net_namespace.h:10 include/linux/workqueue.h:9 include/linux/timer.h:8 include/linux/debugobjects.h:6 include/linux/spinlock.h:90 include/linux/workqueue.h:9 arch/arm64/include/asm/spinlock.h:9 arch/arm64/include/generated/asm/qrwlock.h:1 include/asm-generic/qrwlock.h:14 arch/arm64/include/asm/processor.h:33 arch/arm64/include/asm/kasan.h:9 arch/arm64/include/asm/mte-kasan.h:45 arch/arm64/include/asm/mte-def.h:14 Bug: 254441685 Signed-off-by: Paul Semel Fixes: 2cb34276427a ("arm64: kasan: simplify and inline MTE functions") Cc: # 5.12.x Link: https://lore.kernel.org/r/bacb5387-2992-97e4-0c48-1ed925905bee@gmail.com Signed-off-by: Catalin Marinas (cherry picked from commit b859ebedd1e730bbda69142fca87af4e712649a1) Signed-off-by: Lee Jones Change-Id: I5556726935fd0cafc83a038a4a5415f0c7eb8b45 --- arch/arm64/include/asm/mte-kasan.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 41c48c1ba610..30ef96cc168e 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -5,6 +5,7 @@ #ifndef __ASM_MTE_KASAN_H #define __ASM_MTE_KASAN_H +#include #include #ifndef __ASSEMBLY__ From 329b0783c603ebde603c0f83f6029c76a684825e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 25 Feb 2022 15:35:23 +0100 Subject: [PATCH 039/186] UPSTREAM: clk: Fix clk_hw_get_clk() when dev is NULL Any registered clk_core structure can have a NULL pointer in its dev field. While never actually documented, this is evidenced by the wide usage of clk_register and clk_hw_register with a NULL device pointer, and the fact that the core of_clk_hw_register() function also passes a NULL device pointer. A call to clk_hw_get_clk() on a clk_hw struct whose clk_core is in that case will result in a NULL pointer derefence when it calls dev_name() on that NULL device pointer. Add a test for this case and use NULL as the dev_id if the device pointer is NULL. Bug: 254441685 Fixes: 30d6f8c15d2c ("clk: add api to get clk consumer from clk_hw") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20220225143534.405820-2-maxime@cerno.tech Signed-off-by: Stephen Boyd (cherry picked from commit 0c1b56df451716ba207bbf59f303473643eee4fd) Signed-off-by: Lee Jones Change-Id: Ia05e58195c0b524752a099b6d0b41694a02685f1 --- drivers/clk/clk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 77774a8c701b..4f228d0a201c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3828,8 +3828,9 @@ struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw, struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id) { struct device *dev = hw->core->dev; + const char *name = dev ? dev_name(dev) : NULL; - return clk_hw_create_clk(dev, hw, dev_name(dev), con_id); + return clk_hw_create_clk(dev, hw, name, con_id); } EXPORT_SYMBOL(clk_hw_get_clk); From 2357d700f8c03261de4c4204370121d6eb15a6cb Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Sun, 27 Mar 2022 13:18:52 +0800 Subject: [PATCH 040/186] UPSTREAM: mm: kfence: fix missing objcg housekeeping for SLAB The objcg is not cleared and put for kfence object when it is freed, which could lead to memory leak for struct obj_cgroup and wrong statistics of NR_SLAB_RECLAIMABLE_B or NR_SLAB_UNRECLAIMABLE_B. Since the last freed object's objcg is not cleared, mem_cgroup_from_obj() could return the wrong memcg when this kfence object, which is not charged to any objcgs, is reallocated to other users. A real word issue [1] is caused by this bug. Bug: 254441685 Link: https://lore.kernel.org/all/000000000000cabcb505dae9e577@google.com/ [1] Reported-by: syzbot+f8c45ccc7d5d45fc5965@syzkaller.appspotmail.com Fixes: d3fb45f370d9 ("mm, kfence: insert KFENCE hooks for SLAB") Signed-off-by: Muchun Song Cc: Dmitry Vyukov Cc: Marco Elver Cc: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit ae085d7f9365de7da27ab5c0d16b12d51ea7fca9) Signed-off-by: Lee Jones Change-Id: If17f6048e312e0cf78d01f7c122b84b3fb4a58d8 --- mm/slab.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/slab.c b/mm/slab.c index eced754636ad..731b868a65c8 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3428,6 +3428,7 @@ static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, if (is_kfence_address(objp)) { kmemleak_free_recursive(objp, cachep->flags); + memcg_slab_free_hook(cachep, &objp, 1); __kfence_free(objp); return; } From 34055de440cd683c269c0895fba2f8b7eb2e168a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 17 Mar 2022 09:49:02 +0000 Subject: [PATCH 041/186] UPSTREAM: irqchip/gic-v4: Wait for GICR_VPENDBASER.Dirty to clear before descheduling The way KVM drives GICv4.{0,1} is as follows: - vcpu_load() makes the VPE resident, instructing the RD to start scanning for interrupts - just before entering the guest, we check that the RD has finished scanning and that we can start running the vcpu - on preemption, we deschedule the VPE by making it invalid on the RD However, we are preemptible between the first two steps. If it so happens *and* that the RD was still scanning, we nonetheless write to the GICR_VPENDBASER register while Dirty is set, and bad things happen (we're in UNPRED land). This affects both the 4.0 and 4.1 implementations. Make sure Dirty is cleared before performing the deschedule, meaning that its_clear_vpend_valid() becomes a sort of full VPE residency barrier. Bug: 254441685 Reported-by: Jingyi Wang Tested-by: Nianyao Tang Signed-off-by: Marc Zyngier Fixes: 57e3cebd022f ("KVM: arm64: Delay the polling of the GICR_VPENDBASER.Dirty bit") Link: https://lore.kernel.org/r/4aae10ba-b39a-5f84-754b-69c2eb0a2c03@huawei.com (cherry picked from commit af27e41612ec7e5b4783f589b753a7c31a37aac8) Signed-off-by: Lee Jones Change-Id: I5762305aa5fcc3bfc6fb2b77a459d6e122ad368f --- drivers/irqchip/irq-gic-v3-its.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 6c5a9c04ebe1..9e2332efcc0e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3002,18 +3002,12 @@ static int __init allocate_lpi_tables(void) return 0; } -static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +static u64 read_vpend_dirty_clear(void __iomem *vlpi_base) { u32 count = 1000000; /* 1s! */ bool clean; u64 val; - val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - val &= ~clr; - val |= set; - gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - do { val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); clean = !(val & GICR_VPENDBASER_Dirty); @@ -3024,10 +3018,26 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) } } while (!clean && count); - if (unlikely(val & GICR_VPENDBASER_Dirty)) { + if (unlikely(!clean)) pr_err_ratelimited("ITS virtual pending table not cleaning\n"); + + return val; +} + +static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +{ + u64 val; + + /* Make sure we wait until the RD is done with the initial scan */ + val = read_vpend_dirty_clear(vlpi_base); + val &= ~GICR_VPENDBASER_Valid; + val &= ~clr; + val |= set; + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + val = read_vpend_dirty_clear(vlpi_base); + if (unlikely(val & GICR_VPENDBASER_Dirty)) val |= GICR_VPENDBASER_PendingLast; - } return val; } From 3d129ea9b5b1bab2b76c6a769fa9d2279d868c24 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 20 Jun 2022 07:17:46 -0700 Subject: [PATCH 042/186] UPSTREAM: video: fbdev: pxa3xx-gcu: Fix integer overflow in pxa3xx_gcu_write [ Upstream commit a09d2d00af53b43c6f11e6ab3cb58443c2cac8a7 ] In pxa3xx_gcu_write, a count parameter of type size_t is passed to words of type int. Then, copy_from_user() may cause a heap overflow because it is used as the third argument of copy_from_user(). Bug: 245928838 Signed-off-by: Hyunwoo Kim Signed-off-by: Helge Deller Signed-off-by: Sasha Levin Signed-off-by: Lee Jones Change-Id: I9e21917a52e2cb78cc640a77a6eba21838aa8655 --- drivers/video/fbdev/pxa3xx-gcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 9421d14d0eb0..9e9888e40c57 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -381,7 +381,7 @@ pxa3xx_gcu_write(struct file *file, const char *buff, struct pxa3xx_gcu_batch *buffer; struct pxa3xx_gcu_priv *priv = to_pxa3xx_gcu_priv(file); - int words = count / 4; + size_t words = count / 4; /* Does not need to be atomic. There's a lock in user space, * but anyhow, this is just for statistics. */ From af8a0d5f2fa03be28bd5dc813b1a411b9b4bacff Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 18 May 2022 22:14:12 +0530 Subject: [PATCH 043/186] UPSTREAM: irqchip/tegra: Fix overflow implicit truncation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 443685992bda9bb4f8b17fc02c9f6c60e62b1461 ] Fix -Woverflow warnings for tegra irqchip driver which is a result of moving arm64 custom MMIO accessor macros to asm-generic function implementations giving a bonus type-checking now and uncovering these overflow warnings. drivers/irqchip/irq-tegra.c: In function ‘tegra_ictlr_suspend’: drivers/irqchip/irq-tegra.c:151:18: warning: large integer implicitly truncated to unsigned type [-Woverflow] writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR); ^ Suggested-by: Marc Zyngier Signed-off-by: Sai Prakash Ranjan Reviewed-by: Arnd Bergmann Cc: Marc Zyngier Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin Fixes: de3ce0804916a ("irqchip: tegra: Add DT-based support for legacy interrupt controller") Signed-off-by: Lee Jones Change-Id: Iaee226d0220c9774635cd51953d577ab7e2ebe77 --- drivers/irqchip/irq-tegra.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c index e1f771c72fc4..ad3e2c1b3c87 100644 --- a/drivers/irqchip/irq-tegra.c +++ b/drivers/irqchip/irq-tegra.c @@ -148,10 +148,10 @@ static int tegra_ictlr_suspend(void) lic->cop_iep[i] = readl_relaxed(ictlr + ICTLR_COP_IEP_CLASS); /* Disable COP interrupts */ - writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR); + writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_COP_IER_CLR); /* Disable CPU interrupts */ - writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR); + writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_CPU_IER_CLR); /* Enable the wakeup sources of ictlr */ writel_relaxed(lic->ictlr_wake_mask[i], ictlr + ICTLR_CPU_IER_SET); @@ -172,12 +172,12 @@ static void tegra_ictlr_resume(void) writel_relaxed(lic->cpu_iep[i], ictlr + ICTLR_CPU_IEP_CLASS); - writel_relaxed(~0ul, ictlr + ICTLR_CPU_IER_CLR); + writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_CPU_IER_CLR); writel_relaxed(lic->cpu_ier[i], ictlr + ICTLR_CPU_IER_SET); writel_relaxed(lic->cop_iep[i], ictlr + ICTLR_COP_IEP_CLASS); - writel_relaxed(~0ul, ictlr + ICTLR_COP_IER_CLR); + writel_relaxed(GENMASK(31, 0), ictlr + ICTLR_COP_IER_CLR); writel_relaxed(lic->cop_ier[i], ictlr + ICTLR_COP_IER_SET); } @@ -312,7 +312,7 @@ static int __init tegra_ictlr_init(struct device_node *node, lic->base[i] = base; /* Disable all interrupts */ - writel_relaxed(~0UL, base + ICTLR_CPU_IER_CLR); + writel_relaxed(GENMASK(31, 0), base + ICTLR_CPU_IER_CLR); /* All interrupts target IRQ */ writel_relaxed(0, base + ICTLR_CPU_IEP_CLASS); From 506e6890deb0a7c8effc9a0b6fa91ef5de7c36bb Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 18 May 2022 22:14:13 +0530 Subject: [PATCH 044/186] UPSTREAM: drm/meson: Fix overflow implicit truncation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 98692f52c588225034cbff458622c2c06dfcb544 ] Fix -Woverflow warnings for drm/meson driver which is a result of moving arm64 custom MMIO accessor macros to asm-generic function implementations giving a bonus type-checking now and uncovering these overflow warnings. drivers/gpu/drm/meson/meson_viu.c: In function ‘meson_viu_init’: drivers/gpu/drm/meson/meson_registers.h:1826:48: error: large integer implicitly truncated to unsigned type [-Werror=overflow] #define VIU_OSD_BLEND_REORDER(dest, src) ((src) << (dest * 4)) ^ drivers/gpu/drm/meson/meson_viu.c:472:18: note: in expansion of macro ‘VIU_OSD_BLEND_REORDER’ writel_relaxed(VIU_OSD_BLEND_REORDER(0, 1) | ^~~~~~~~~~~~~~~~~~~~~ Reported-by: kernel test robot Signed-off-by: Sai Prakash Ranjan Reviewed-by: Arnd Bergmann Cc: Arnd Bergmann Cc: Neil Armstrong Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin Fixes: 147ae1cbaa184 ("drm: meson: viu: use proper macros instead of magic constants") Signed-off-by: Lee Jones Change-Id: Id3502967ec9df74ea9420a34549bc0ac3c49dfa8 Signed-off-by: Lee Jones --- drivers/gpu/drm/meson/meson_viu.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index 259f3e6bec90..bb7e109534de 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -469,17 +469,17 @@ void meson_viu_init(struct meson_drm *priv) priv->io_base + _REG(VD2_IF0_LUMA_FIFO_SIZE)); if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { - writel_relaxed(VIU_OSD_BLEND_REORDER(0, 1) | - VIU_OSD_BLEND_REORDER(1, 0) | - VIU_OSD_BLEND_REORDER(2, 0) | - VIU_OSD_BLEND_REORDER(3, 0) | - VIU_OSD_BLEND_DIN_EN(1) | - VIU_OSD_BLEND1_DIN3_BYPASS_TO_DOUT1 | - VIU_OSD_BLEND1_DOUT_BYPASS_TO_BLEND2 | - VIU_OSD_BLEND_DIN0_BYPASS_TO_DOUT0 | - VIU_OSD_BLEND_BLEN2_PREMULT_EN(1) | - VIU_OSD_BLEND_HOLD_LINES(4), - priv->io_base + _REG(VIU_OSD_BLEND_CTRL)); + u32 val = (u32)VIU_OSD_BLEND_REORDER(0, 1) | + (u32)VIU_OSD_BLEND_REORDER(1, 0) | + (u32)VIU_OSD_BLEND_REORDER(2, 0) | + (u32)VIU_OSD_BLEND_REORDER(3, 0) | + (u32)VIU_OSD_BLEND_DIN_EN(1) | + (u32)VIU_OSD_BLEND1_DIN3_BYPASS_TO_DOUT1 | + (u32)VIU_OSD_BLEND1_DOUT_BYPASS_TO_BLEND2 | + (u32)VIU_OSD_BLEND_DIN0_BYPASS_TO_DOUT0 | + (u32)VIU_OSD_BLEND_BLEN2_PREMULT_EN(1) | + (u32)VIU_OSD_BLEND_HOLD_LINES(4); + writel_relaxed(val, priv->io_base + _REG(VIU_OSD_BLEND_CTRL)); writel_relaxed(OSD_BLEND_PATH_SEL_ENABLE, priv->io_base + _REG(OSD1_BLEND_SRC_CTRL)); From 2f29ec11e3f79f3929653c895e1de7655681df5b Mon Sep 17 00:00:00 2001 From: "Sivasri Kumar, Vanka" Date: Thu, 10 Nov 2022 00:03:05 +0530 Subject: [PATCH 045/186] ANDROID: abi_gki_aarch64_qcom: Add wait_on_page_bit In commit fae05b2314b1 ("zsmalloc: fix races between asynchronous zspage free and page migration"), wait_on_page_bit symbol was required to fix the build of that target platform. Functions changes summary: 0 Removed, 0 Changed , 1 Added functions Variables changes summary: 0 Removed, 0 Changed , 0 Added variables 1 Added function: [A] 'function void wait_on_page_bit(page*, int)' Bug: 258412729 Change-Id: Ic392d6789788e1e2a46f95726fb0a0cce05896e1 Signed-off-by: Sivasri Kumar, Vanka Signed-off-by: Bibek Kumar Patro --- android/abi_gki_aarch64.xml | 6 +++--- android/abi_gki_aarch64_qcom | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 47faef88fd20..1b91e08e38ce 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -148306,9 +148306,9 @@ - - - + + + diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index 07b19dc38641..f3e65159f870 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -2583,7 +2583,6 @@ __traceiter_android_vh_jiffies_update __traceiter_android_vh_logbuf __traceiter_android_vh_logbuf_pr_cont - __tracepoint_android_vh_madvise_cold_or_pageout __traceiter_android_vh_printk_hotplug __traceiter_android_vh_rproc_recovery __traceiter_android_vh_rproc_recovery_set @@ -2705,6 +2704,7 @@ __tracepoint_android_vh_jiffies_update __tracepoint_android_vh_logbuf __tracepoint_android_vh_logbuf_pr_cont + __tracepoint_android_vh_madvise_cold_or_pageout __tracepoint_android_vh_oom_check_panic __tracepoint_android_vh_printk_hotplug __tracepoint_android_vh_process_killed @@ -3028,6 +3028,7 @@ wait_for_completion_interruptible_timeout wait_for_completion_killable wait_for_completion_timeout + wait_on_page_bit __wait_rcu_gp wait_woken __wake_up From 425e7c7e0e20a868034e71b909bf6fefd49b75ac Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Sun, 6 Nov 2022 20:52:29 +0000 Subject: [PATCH 046/186] ANDROID: dm-user: Remove bio recount in I/O path When I/O is submitted to dm-user target, bio already has a referance. Additional referance is not needed in the I/O path. Bug: 229696117 Test: OTA on Pixel Change-Id: I8db6802e751336d7a10c6de0bc7a247a6d7f6b37 Signed-off-by: Akilesh Kailash --- drivers/md/dm-user.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/dm-user.c b/drivers/md/dm-user.c index e5a85202d8a0..a8d771855e79 100644 --- a/drivers/md/dm-user.c +++ b/drivers/md/dm-user.c @@ -188,7 +188,6 @@ static void message_kill(struct message *m, mempool_t *pool) { m->bio->bi_status = BLK_STS_IOERR; bio_endio(m->bio); - bio_put(m->bio); mempool_free(m, pool); } @@ -989,7 +988,6 @@ finish_bio: */ WARN_ON(bio_size(c->cur_from_user->bio) != 0); bio_endio(c->cur_from_user->bio); - bio_put(c->cur_from_user->bio); /* * We don't actually need to take the target lock here, as all @@ -1227,7 +1225,6 @@ static int user_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REQUEUE; } - bio_get(bio); entry->msg.type = bio_type_to_user_type(bio); entry->msg.flags = bio_flags_to_user_flags(bio); entry->msg.sector = bio->bi_iter.bi_sector; From c5589c7eec414305c3afc44adbbf79b63a9a7120 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Fri, 4 Nov 2022 19:16:04 +0000 Subject: [PATCH 047/186] ANDROID: dma-buf: Add vendor hook for deferred dmabuf sysfs stats release This vendor hook allows for the sysfs activity associated with dma-buf-sysfs-stats teardowns to be performed asynchronously similar to how the initialization works. Bug: 254192604 Signed-off-by: T.J. Mercier Change-Id: Ie076d0f8f67b96a97c71d9e6bf90539ebd9807bf --- drivers/android/vendor_hooks.c | 3 ++- drivers/dma-buf/dma-buf-sysfs-stats.c | 10 ++++++++-- include/trace/hooks/dmabuf.h | 27 +++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 include/trace/hooks/dmabuf.h diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 7ac6fd6d69c8..12066949e275 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -74,6 +74,7 @@ #include #include #include +#include /* * Export tracepoints that act as a bare tracehook (ie: have no trace event @@ -467,7 +468,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_check_page_look_around_ref); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_look_around); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_look_around_migrate_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_test_clear_look_around_ref); - +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_dma_buf_stats_teardown); /* * For type visibility */ diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 3c7bb2baf8da..7ae64cd3dbb8 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -13,6 +13,8 @@ #include #include +#include + #include "dma-buf-sysfs-stats.h" #define to_dma_buf_entry_from_kobj(x) container_of(x, struct dma_buf_sysfs_entry, kobj) @@ -88,13 +90,17 @@ static struct kobj_type dma_buf_ktype = { void dma_buf_stats_teardown(struct dma_buf *dmabuf) { struct dma_buf_sysfs_entry *sysfs_entry; + bool skip_sysfs_release = false; sysfs_entry = dmabuf->sysfs_entry; if (!sysfs_entry) return; - kobject_del(&sysfs_entry->kobj); - kobject_put(&sysfs_entry->kobj); + trace_android_rvh_dma_buf_stats_teardown(sysfs_entry, &skip_sysfs_release); + if (!skip_sysfs_release) { + kobject_del(&sysfs_entry->kobj); + kobject_put(&sysfs_entry->kobj); + } } /* diff --git a/include/trace/hooks/dmabuf.h b/include/trace/hooks/dmabuf.h new file mode 100644 index 000000000000..8963742273bc --- /dev/null +++ b/include/trace/hooks/dmabuf.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dmabuf + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH trace/hooks + +#if !defined(_TRACE_HOOK_DMABUF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HOOK_DMABUF_H + +#include + +#ifdef __GENKSYMS__ +struct dma_buf_sysfs_entry; +#else +/* struct dma_buf_sysfs_entry */ +#include +#endif + +DECLARE_RESTRICTED_HOOK(android_rvh_dma_buf_stats_teardown, + TP_PROTO(struct dma_buf_sysfs_entry *sysfs_entry, bool *skip_sysfs_release), + TP_ARGS(sysfs_entry, skip_sysfs_release), 1); +#endif /* _TRACE_HOOK_DMABUF_H */ +/* This part must be outside protection */ +#include + From 34c9208c28c37069b8b9151264e2ffc683addd37 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 12 Oct 2022 20:34:19 +0100 Subject: [PATCH 048/186] BACKPORT: ext4,f2fs: fix readahead of verity data The recent change of page_cache_ra_unbounded() arguments was buggy in the two callers, causing us to readahead the wrong pages. Move the definition of ractl down to after the index is set correctly. This affected performance on configurations that use fs-verity. Link: https://lkml.kernel.org/r/20221012193419.1453558-1-willy@infradead.org Fixes: 73bb49da50cd ("mm/readahead: make page_cache_ra_unbounded take a readahead_control") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Jintao Yin Signed-off-by: Andrew Morton Bug: 258554362 (cherry picked from commit 4fa0e3ff217f775cb58d2d6d51820ec519243fb9) Change-Id: Ib5160c5c53629be328c370f5d5d464956d6a6312 Signed-off-by: Eric Biggers --- fs/ext4/verity.c | 3 ++- fs/f2fs/verity.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 00e3cbde472e..35be8e7ec2a0 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -370,13 +370,14 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); struct page *page; index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); if (!page || !PageUptodate(page)) { + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + if (page) put_page(page); else if (num_ra_pages > 1) diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index c700fb47e895..4a9db2d89d0f 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -261,13 +261,14 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); struct page *page; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); if (!page || !PageUptodate(page)) { + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index); + if (page) put_page(page); else if (num_ra_pages > 1) From da05d408ee2e1c105287cf56cdaf027d4c7bba9a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Jul 2021 09:12:15 -0700 Subject: [PATCH 049/186] BACKPORT: f2fs: do not submit NEW_ADDR to read node block After the below patch, give cp is errored, we drop dirty node pages. This can give NEW_ADDR to read node pages. Don't do WARN_ON() which gives generic/475 failure. Fixes: 28607bf3aa6f ("f2fs: drop dirty node pages when cp is in error status") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim (cherry picked from commit b7ec2061737f12c33e45beeb967d17f31abc1ada) Change-Id: If991a70331933a64899864411d2b5d63ea5b3abf --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 93fc1116f4a7..e11c693f28b9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1335,7 +1335,8 @@ static int read_node_page(struct page *page, int op_flags) if (err) return err; - if (unlikely(ni.blk_addr == NULL_ADDR) || + /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ + if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) || is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { ClearPageUptodate(page); return -ENOENT; From 250f35d78bcc0e9291690ab3dc26917d0dc144a3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 8 Nov 2022 17:59:34 -0800 Subject: [PATCH 050/186] FROMGIT: f2fs: allow to read node block after shutdown If block address is still alive, we should give a valid node block even after shutdown. Otherwise, we can see zero data when reading out a file. Bug: 257271565 Cc: stable@vger.kernel.org Fixes: 83a3bfdb5a8 ("f2fs: indicate shutdown f2fs to allow unmount successfully") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim (cherry picked from commit 6953bf65286d git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev) Change-Id: Ifb70f6c73bd67d5112ee9fa1a5e4ad8e10ae8517 --- fs/f2fs/node.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e11c693f28b9..6f8bd29923c1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1336,8 +1336,7 @@ static int read_node_page(struct page *page, int op_flags) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ - if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) || - is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { + if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { ClearPageUptodate(page); return -ENOENT; } From b54d7c828a87c2d6f6a140e4df43b805712f299b Mon Sep 17 00:00:00 2001 From: Yuanchu Xie Date: Mon, 18 Apr 2022 20:20:17 +0000 Subject: [PATCH 051/186] UPSTREAM: selftests/damon: add damon to selftests root Makefile Currently the damon selftests are not built with the rest of the selftests. We add damon to the list of targets. Bug: 254441685 Fixes: b348eb7abd09 ("mm/damon: add user space selftests") Reviewed-by: SeongJae Park Signed-off-by: Yuanchu Xie Acked-by: David Rientjes Signed-off-by: Shuah Khan (cherry picked from commit 678f0cdc572c5fda940cb038d70eebb8d818adc8) Signed-off-by: Lee Jones Change-Id: Ifa83410336c6213ccc7e2731768eeaaf9a4e372b --- tools/testing/selftests/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d9c283503159..5074c8b82367 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -9,6 +9,7 @@ TARGETS += clone3 TARGETS += core TARGETS += cpufreq TARGETS += cpu-hotplug +TARGETS += damon TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec From ec3e1bddded709b86ac54983d745e6d6a80e32da Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 29 Apr 2022 14:36:58 -0700 Subject: [PATCH 052/186] UPSTREAM: mm: hugetlb: add missing cache flushing in hugetlb_unshare_all_pmds() Missed calling flush_cache_range() before removing the sharing PMD entrires, otherwise data consistence issue may be occurred on some architectures whose caches are strict and require a virtual>physical translation to exist for a virtual address. Thus add it. Now no architectures enabling PMD sharing will be affected, since they do not have a VIVT cache. That means this issue can not be happened in practice so far. Bug: 254441685 Link: https://lkml.kernel.org/r/47441086affcabb6ecbe403173e9283b0d904b38.1650956489.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/419b0e777c9e6d1454dcd906e0f5b752a736d335.1650781755.git.baolin.wang@linux.alibaba.com Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp") Signed-off-by: Baolin Wang Reviewed-by: Muchun Song Reviewed-by: Peter Xu Cc: Mike Kravetz Signed-off-by: Andrew Morton (cherry picked from commit 9c8bbfaca1bce84664403fd7dddbef6b3ff0a05a) Signed-off-by: Lee Jones Change-Id: Ifb14e96429c339520083ff8ddb2bf58f4e6fa899 --- mm/hugetlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bf7efe630e76..e1c7c9c189f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5783,6 +5783,7 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) if (start >= end) return; + flush_cache_range(vma, start, end); /* * No need to call adjust_range_if_pmd_sharing_possible(), because * we have already done the PUD_SIZE alignment. From f70c5827f95362e9e1681caaa4647a8153acc099 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:55 +0800 Subject: [PATCH 053/186] UPSTREAM: iommu/mediatek: Fix 2 HW sharing pgtable issue In the commit 4f956c97d26b ("iommu/mediatek: Move domain_finalise into attach_device"), I overlooked the sharing pgtable case. After that commit, the "data" in the mtk_iommu_domain_finalise always is the data of the current IOMMU HW. Fix this for the sharing pgtable case. Only affect mt2712 which is the only SoC that share pgtable currently. Bug: 254441685 Fixes: 4f956c97d26b ("iommu/mediatek: Move domain_finalise into attach_device") Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-5-yong.wu@mediatek.com Signed-off-by: Joerg Roedel (cherry picked from commit 645b87c190c959e9bb4f216b8c4add4ee880451a) Signed-off-by: Lee Jones Change-Id: I5755277fe6ac8bc33ecd3fbfbded142e09f130a3 --- drivers/iommu/mtk_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e864288a6e0e..49cb90e4ffdb 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -458,7 +458,7 @@ static void mtk_iommu_domain_free(struct iommu_domain *domain) static int mtk_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { - struct mtk_iommu_data *data = dev_iommu_priv_get(dev); + struct mtk_iommu_data *data = dev_iommu_priv_get(dev), *frstdata; struct mtk_iommu_domain *dom = to_mtk_domain(domain); struct device *m4udev = data->dev; int ret, domid; @@ -468,7 +468,10 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, return domid; if (!dom->data) { - if (mtk_iommu_domain_finalise(dom, data, domid)) + /* Data is in the frstdata in sharing pgtable case. */ + frstdata = mtk_iommu_get_m4u_data(); + + if (mtk_iommu_domain_finalise(dom, frstdata, domid)) return -ENODEV; dom->data = data; } From adec2c4a36d3edb9187124cf804278f48b0a1542 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:57 +0800 Subject: [PATCH 054/186] UPSTREAM: iommu/mediatek: Remove clk_disable in mtk_iommu_remove After the commit b34ea31fe013 ("iommu/mediatek: Always enable the clk on resume"), the iommu clock is controlled by the runtime callback. thus remove the clk control in the mtk_iommu_remove. Otherwise, it will warning like: echo 14018000.iommu > /sys/bus/platform/drivers/mtk-iommu/unbind [ 51.413044] ------------[ cut here ]------------ [ 51.413648] vpp0_smi_iommu already disabled [ 51.414233] WARNING: CPU: 2 PID: 157 at */v5.15-rc1/kernel/mediatek/ drivers/clk/clk.c:952 clk_core_disable+0xb0/0xb8 [ 51.417174] Hardware name: MT8195V/C(ENG) (DT) [ 51.418635] pc : clk_core_disable+0xb0/0xb8 [ 51.419177] lr : clk_core_disable+0xb0/0xb8 ... [ 51.429375] Call trace: [ 51.429694] clk_core_disable+0xb0/0xb8 [ 51.430193] clk_core_disable_lock+0x24/0x40 [ 51.430745] clk_disable+0x20/0x30 [ 51.431189] mtk_iommu_remove+0x58/0x118 [ 51.431705] platform_remove+0x28/0x60 [ 51.432197] device_release_driver_internal+0x110/0x1f0 [ 51.432873] device_driver_detach+0x18/0x28 [ 51.433418] unbind_store+0xd4/0x108 [ 51.433886] drv_attr_store+0x24/0x38 [ 51.434363] sysfs_kf_write+0x40/0x58 [ 51.434843] kernfs_fop_write_iter+0x164/0x1e0 Bug: 254441685 Fixes: b34ea31fe013 ("iommu/mediatek: Always enable the clk on resume") Reported-by: Hsin-Yi Wang Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-7-yong.wu@mediatek.com Signed-off-by: Joerg Roedel (cherry picked from commit 98df772bdd1c4ce717a26289efea15cbbe4b64ed) Signed-off-by: Lee Jones Change-Id: I1bf8fd0cc328daac8c55a6aa793aa01d8506ee92 --- drivers/iommu/mtk_iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 49cb90e4ffdb..849a2655fde1 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -937,7 +937,6 @@ static int mtk_iommu_remove(struct platform_device *pdev) list_del(&data->list); - clk_disable_unprepare(data->bclk); device_link_remove(data->smicomm_dev, &pdev->dev); pm_runtime_disable(&pdev->dev); devm_free_irq(&pdev->dev, data->irq, data); From 18e0efa7c0785b0847934cfef39faec95bb489d4 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 3 May 2022 15:13:58 +0800 Subject: [PATCH 055/186] UPSTREAM: iommu/mediatek: Add mutex for m4u_group and m4u_dom in data Add a mutex to protect the data in the structure mtk_iommu_data, like ->"m4u_group" ->"m4u_dom". For the internal data, we should protect it in ourselves driver. Add a mutex for this. This could be a fix for the multi-groups support. Bug: 254441685 Fixes: c3045f39244e ("iommu/mediatek: Support for multi domains") Signed-off-by: Yunfei Wang Signed-off-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20220503071427.2285-8-yong.wu@mediatek.com Signed-off-by: Joerg Roedel (cherry picked from commit 0e5a3f2e630b28e88e018655548212ef8eb4dfcb) Signed-off-by: Lee Jones Change-Id: Ic2945a6ad8d5aff6a03490597624f6ea9eec7fd5 --- drivers/iommu/mtk_iommu.c | 13 +++++++++++-- drivers/iommu/mtk_iommu.h | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 849a2655fde1..46f641481ac9 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -476,15 +476,16 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, dom->data = data; } + mutex_lock(&data->mutex); if (!data->m4u_dom) { /* Initialize the M4U HW */ ret = pm_runtime_resume_and_get(m4udev); if (ret < 0) - return ret; + goto err_unlock; ret = mtk_iommu_hw_init(data); if (ret) { pm_runtime_put(m4udev); - return ret; + goto err_unlock; } data->m4u_dom = dom; writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, @@ -492,9 +493,14 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, pm_runtime_put(m4udev); } + mutex_unlock(&data->mutex); mtk_iommu_config(data, dev, true, domid); return 0; + +err_unlock: + mutex_unlock(&data->mutex); + return ret; } static void mtk_iommu_detach_device(struct iommu_domain *domain, @@ -606,6 +612,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) if (domid < 0) return ERR_PTR(domid); + mutex_lock(&data->mutex); group = data->m4u_group[domid]; if (!group) { group = iommu_group_alloc(); @@ -614,6 +621,7 @@ static struct iommu_group *mtk_iommu_device_group(struct device *dev) } else { iommu_group_ref_get(group); } + mutex_unlock(&data->mutex); return group; } @@ -887,6 +895,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, data); + mutex_init(&data->mutex); ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, "mtk-iommu.%pa", &ioaddr); diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index f81fa8862ed0..f413546ac6e5 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -80,6 +80,8 @@ struct mtk_iommu_data { struct dma_iommu_mapping *mapping; /* For mtk_iommu_v1.c */ + struct mutex mutex; /* Protect m4u_group/m4u_dom above */ + struct list_head list; struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; }; From 1199a5f80ddb313cc15374a343a590b08a4846eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Apr 2022 11:03:21 +0200 Subject: [PATCH 056/186] UPSTREAM: module.h: simplify MODULE_IMPORT_NS In commit ca321ec74322 ("module.h: allow #define strings to work with MODULE_IMPORT_NS") I fixed up the MODULE_IMPORT_NS() macro to allow defined strings to work with it. Unfortunatly I did it in a two-stage process, when it could just be done with the __stringify() macro as pointed out by Masahiro Yamada. Clean this up to only be one macro instead of two steps to achieve the same end result. Bug: 254441685 Fixes: ca321ec74322 ("module.h: allow #define strings to work with MODULE_IMPORT_NS") Reported-by: Masahiro Yamada Cc: Luis Chamberlain Cc: Jessica Yu Cc: Matthias Maennich Signed-off-by: Greg Kroah-Hartman Signed-off-by: Luis Chamberlain (cherry picked from commit 80140a81f7f833998d732102eea0fea230b88067) Signed-off-by: Lee Jones Change-Id: I9d66b487211e8b09a8e547c8396923be91c780a0 --- include/linux/module.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index c5c3ce08f646..4cd6d889d5ba 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -293,8 +293,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \ * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) -#define _MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) -#define MODULE_IMPORT_NS(ns) _MODULE_IMPORT_NS(ns) +#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, __stringify(ns)) struct notifier_block; From e3b4c69c57c9e5abf5bb607033350e71731f6c21 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 13 May 2022 15:11:26 -0700 Subject: [PATCH 057/186] BACKPORT: Revert "mm/cma.c: remove redundant cma_mutex lock" This reverts commit a4efc174b382fcdb which introduced a regression issue that when there're multiple processes allocating dma memory in parallel by calling dma_alloc_coherent(), it may fail sometimes as follows: Error log: cma: cma_alloc: linux,cma: alloc failed, req-size: 148 pages, ret: -16 cma: number of available pages: 3@125+20@172+12@236+4@380+32@736+17@2287+23@2473+20@36076+99@40477+108@40852+44@41108+20@41196+108@41364+108@41620+ 108@42900+108@43156+483@44061+1763@45341+1440@47712+20@49324+20@49388+5076@49452+2304@55040+35@58141+20@58220+20@58284+ 7188@58348+84@66220+7276@66452+227@74525+6371@75549=> 33161 free of 81920 total pages When issue happened, we saw there were still 33161 pages (129M) free CMA memory and a lot available free slots for 148 pages in CMA bitmap that we want to allocate. When dumping memory info, we found that there was also ~342M normal memory, but only 1352K CMA memory left in buddy system while a lot of pageblocks were isolated. Memory info log: Normal free:351096kB min:30000kB low:37500kB high:45000kB reserved_highatomic:0KB active_anon:98060kB inactive_anon:98948kB active_file:60864kB inactive_file:31776kB unevictable:0kB writepending:0kB present:1048576kB managed:1018328kB mlocked:0kB bounce:0kB free_pcp:220kB local_pcp:192kB free_cma:1352kB lowmem_reserve[]: 0 0 0 Normal: 78*4kB (UECI) 1772*8kB (UMECI) 1335*16kB (UMECI) 360*32kB (UMECI) 65*64kB (UMCI) 36*128kB (UMECI) 16*256kB (UMCI) 6*512kB (EI) 8*1024kB (UEI) 4*2048kB (MI) 8*4096kB (EI) 8*8192kB (UI) 3*16384kB (EI) 8*32768kB (M) = 489288kB The root cause of this issue is that since commit a4efc174b382 ("mm/cma.c: remove redundant cma_mutex lock"), CMA supports concurrent memory allocation. It's possible that the memory range process A trying to alloc has already been isolated by the allocation of process B during memory migration. The problem here is that the memory range isolated during one allocation by start_isolate_page_range() could be much bigger than the real size we want to alloc due to the range is aligned to MAX_ORDER_NR_PAGES. Taking an ARMv7 platform with 1G memory as an example, when MAX_ORDER_NR_PAGES is big (e.g. 32M with max_order 14) and CMA memory is relatively small (e.g. 128M), there're only 4 MAX_ORDER slot, then it's very easy that all CMA memory may have already been isolated by other processes when one trying to allocate memory using dma_alloc_coherent(). Since current CMA code will only scan one time of whole available CMA memory, then dma_alloc_coherent() may easy fail due to contention with other processes. This patch simply falls back to the original method that using cma_mutex to make alloc_contig_range() run sequentially to avoid the issue. Bug: 254441685 Link: https://lkml.kernel.org/r/20220509094551.3596244-1-aisheng.dong@nxp.com Link: https://lore.kernel.org/all/20220315144521.3810298-2-aisheng.dong@nxp.com/ Fixes: a4efc174b382 ("mm/cma.c: remove redundant cma_mutex lock") Signed-off-by: Dong Aisheng Acked-by: Minchan Kim Acked-by: David Hildenbrand Cc: Marek Szyprowski Cc: Lecopzer Chen Cc: Vlastimil Babka Cc: [5.11+] Signed-off-by: Andrew Morton (cherry picked from commit 60a60e32cf91169840abcb4a80f0b0df31708ba7) Signed-off-by: Lee Jones Change-Id: I4f94b43316d2fae1481eefa038bb4d6fbf5946cd --- mm/cma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/cma.c b/mm/cma.c index a7638d7487c4..44ca2b65b427 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -47,6 +47,7 @@ extern void lru_cache_enable(void); struct cma cma_areas[MAX_CMA_AREAS]; unsigned cma_area_count; +static DEFINE_MUTEX(cma_mutex); phys_addr_t cma_get_base(const struct cma *cma) { @@ -511,7 +512,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, mutex_unlock(&cma->lock); pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); + mutex_lock(&cma_mutex); ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp_mask, &info); + mutex_unlock(&cma_mutex); cma_info.nr_migrated += info.nr_migrated; cma_info.nr_reclaimed += info.nr_reclaimed; cma_info.nr_mapped += info.nr_mapped; From 521b2c566145441419b4c80cdda2b7101073818e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 May 2022 10:13:07 -0700 Subject: [PATCH 058/186] UPSTREAM: block/mq-deadline: Set the fifo_time member also if inserting at head Before commit 322cff70d46c the fifo_time member of requests on a dispatch list was not used. Commit 322cff70d46c introduces code that reads the fifo_time member of requests on dispatch lists. Hence this patch that sets the fifo_time member when adding a request to a dispatch list. Bug: 254441685 Cc: Christoph Hellwig Cc: Ming Lei Cc: Damien Le Moal Fixes: 322cff70d46c ("block/mq-deadline: Prioritize high-priority requests") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220513171307.32564-1-bvanassche@acm.org Signed-off-by: Jens Axboe (cherry picked from commit 725f22a1477c9c15aa67ad3af96fe28ec4fe72d2) Signed-off-by: Lee Jones Change-Id: I967ac8ce1c740c29615edb6dd8fc7fb0db88ef12 --- block/mq-deadline-main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/mq-deadline-main.c b/block/mq-deadline-main.c index 5f1f75bddcf0..cdcf72e72d98 100644 --- a/block/mq-deadline-main.c +++ b/block/mq-deadline-main.c @@ -749,6 +749,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, per_prio = &dd->per_prio[prio]; if (at_head) { list_add(&rq->queuelist, &per_prio->dispatch); + rq->fifo_time = jiffies; } else { deadline_add_rq_rb(per_prio, rq); From afa5490fbafa66972eedbf22c4bc7283a2c3b0f1 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 16 May 2022 11:44:33 +0400 Subject: [PATCH 059/186] UPSTREAM: regulator: scmi: Fix refcount leak in scmi_regulator_probe of_find_node_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when done. Add missing of_node_put() to avoid refcount leak. Bug: 254441685 Fixes: 0fbeae70ee7c ("regulator: add SCMI driver") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220516074433.32433-1-linmq006@gmail.com Signed-off-by: Mark Brown (cherry picked from commit 68d6c8476fd4f448e70e0ab31ff972838ac41dae) Signed-off-by: Lee Jones Change-Id: I3c314e5af0a026bf691e18fa2f937651d1175534 --- drivers/regulator/scmi-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/scmi-regulator.c b/drivers/regulator/scmi-regulator.c index 14d846bbf0bd..c48f7e134465 100644 --- a/drivers/regulator/scmi-regulator.c +++ b/drivers/regulator/scmi-regulator.c @@ -350,7 +350,7 @@ static int scmi_regulator_probe(struct scmi_device *sdev) if (ret == -ENOMEM) return ret; } - + of_node_put(np); /* * Register a regulator for each valid regulator-DT-entry that we * can successfully reach via SCMI and has a valid associated voltage From 4e1bf78ac09837da13ffeb5d273fe318bd17f510 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 14 May 2022 10:59:29 -0700 Subject: [PATCH 060/186] BACKPORT: f2fs: don't use casefolded comparison for "." and ".." Tryng to rename a directory that has all following properties fails with EINVAL and triggers the 'WARN_ON_ONCE(!fscrypt_has_encryption_key(dir))' in f2fs_match_ci_name(): - The directory is casefolded - The directory is encrypted - The directory's encryption key is not yet set up - The parent directory is *not* encrypted The problem is incorrect handling of the lookup of ".." to get the parent reference to update. fscrypt_setup_filename() treats ".." (and ".") specially, as it's never encrypted. It's passed through as-is, and setting up the directory's key is not attempted. As the name isn't a no-key name, f2fs treats it as a "normal" name and attempts a casefolded comparison. That breaks the assumption of the WARN_ON_ONCE() in f2fs_match_ci_name() which assumes that for encrypted directories, casefolded comparisons only happen when the directory's key is set up. We could just remove this WARN_ON_ONCE(). However, since casefolding is always a no-op on "." and ".." anyway, let's instead just not casefold these names. This results in the standard bytewise comparison. Bug: 254441685 Fixes: 7ad08a58bf67 ("f2fs: Handle casefolding with Encryption") Cc: # v5.11+ Signed-off-by: Eric Biggers Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jaegeuk Kim (cherry picked from commit b5639bb4313b9d455fc9fc4768d23a5e4ca8cb9d) Signed-off-by: Lee Jones Change-Id: Id53cfda129b034aa1ebefba8e9e3135e3def62d7 --- fs/f2fs/dir.c | 3 ++- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/hash.c | 11 ++++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9ee895af75e4..9936d5466b6c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -82,7 +82,8 @@ int f2fs_init_casefolded_name(const struct inode *dir, #ifdef CONFIG_UNICODE struct super_block *sb = dir->i_sb; - if (IS_CASEFOLDED(dir)) { + if (IS_CASEFOLDED(dir) && + !is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) { fname->cf_name.name = kmem_cache_alloc(f2fs_cf_name_slab, GFP_NOFS); if (!fname->cf_name.name) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 19d7d1f4c4fa..d1af6cc71581 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -490,11 +490,11 @@ struct f2fs_filename { #ifdef CONFIG_UNICODE /* * For casefolded directories: the casefolded name, but it's left NULL - * if the original name is not valid Unicode, if the directory is both - * casefolded and encrypted and its encryption key is unavailable, or if - * the filesystem is doing an internal operation where usr_fname is also - * NULL. In all these cases we fall back to treating the name as an - * opaque byte sequence. + * if the original name is not valid Unicode, if the original name is + * "." or "..", if the directory is both casefolded and encrypted and + * its encryption key is unavailable, or if the filesystem is doing an + * internal operation where usr_fname is also NULL. In all these cases + * we fall back to treating the name as an opaque byte sequence. */ struct fscrypt_str cf_name; #endif diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index e3beac546c63..2788ceeaf5c2 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -91,7 +91,7 @@ static u32 TEA_hash_name(const u8 *p, size_t len) /* * Compute @fname->hash. For all directories, @fname->disk_name must be set. * For casefolded directories, @fname->usr_fname must be set, and also - * @fname->cf_name if the filename is valid Unicode. + * @fname->cf_name if the filename is valid Unicode and is not "." or "..". */ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) { @@ -110,10 +110,11 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) /* * If the casefolded name is provided, hash it instead of the * on-disk name. If the casefolded name is *not* provided, that - * should only be because the name wasn't valid Unicode, so fall - * back to treating the name as an opaque byte sequence. Note - * that to handle encrypted directories, the fallback must use - * usr_fname (plaintext) rather than disk_name (ciphertext). + * should only be because the name wasn't valid Unicode or was + * "." or "..", so fall back to treating the name as an opaque + * byte sequence. Note that to handle encrypted directories, + * the fallback must use usr_fname (plaintext) rather than + * disk_name (ciphertext). */ WARN_ON_ONCE(!fname->usr_fname->name); if (fname->cf_name.name) { From eb8869415a6c716c172591d60d499c0ca1dd2c4a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 11 May 2022 16:57:00 +0200 Subject: [PATCH 061/186] UPSTREAM: PM: domains: Fix initialization of genpd's next_wakeup In the genpd governor we walk the list of child-domains to take into account their next_wakeup. If the child-domain itself, doesn't have a governor assigned to it, we can end up using the next_wakeup value before it has been properly initialized. To prevent a possible incorrect behaviour in the governor, let's initialize next_wakeup to KTIME_MAX. Bug: 254441685 Fixes: c79aa080fb0f ("PM: domains: use device's next wakeup to determine domain idle state") Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki (cherry picked from commit 622d9b5577f19a6472db21df042fea8f5fefe244) Signed-off-by: Lee Jones Change-Id: Iee5350e44c89dc566c2058f55103985928347a23 --- drivers/base/power/domain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index bc695f880a22..75154dad3822 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1972,6 +1972,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->device_count = 0; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = true; + genpd->next_wakeup = KTIME_MAX; genpd->provider = NULL; genpd->has_provider = false; genpd->accounting_time = ktime_get(); From 7878f98355cd88e6161875c98331c6d12bbefab3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 22 May 2022 20:23:50 +0800 Subject: [PATCH 062/186] UPSTREAM: blk-mq: don't touch ->tagset in blk_mq_get_sq_hctx blk_mq_run_hw_queues() could be run when there isn't queued request and after queue is cleaned up, at that time tagset is freed, because tagset lifetime is covered by driver, and often freed after blk_cleanup_queue() returns. So don't touch ->tagset for figuring out current default hctx by the mapping built in request queue, so use-after-free on tagset can be avoided. Meantime this way should be fast than retrieving mapping from tagset. Bug: 254441685 Cc: "yukuai (C)" Cc: Jan Kara Fixes: b6e68ee82585 ("blk-mq: Improve performance of non-mq IO schedulers with multiple HW queues") Signed-off-by: Ming Lei Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220522122350.743103-1-ming.lei@redhat.com Signed-off-by: Jens Axboe (cherry picked from commit 5d05426e2d5fd7df8afc866b78c36b37b00188b7) Signed-off-by: Lee Jones Change-Id: Ifebb3d15ddfab0b41d8f30b556969ac68058ca8b --- block/blk-mq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 5368379c96d2..0c0f5cffa580 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1674,8 +1674,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q) */ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) { - struct blk_mq_hw_ctx *hctx; - + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); /* * If the IO scheduler does not respect hardware queues when * dispatching, we just don't bother with multiple HW queues and @@ -1683,8 +1682,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) * just causes lock contention inside the scheduler and pointless cache * bouncing. */ - hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT, - raw_smp_processor_id()); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx); + if (!blk_mq_hctx_stopped(hctx)) return hctx; return NULL; From 5084953c40d197d408b586185152e3f0332d3c15 Mon Sep 17 00:00:00 2001 From: Neal Liu Date: Tue, 28 Jun 2022 10:14:36 +0800 Subject: [PATCH 063/186] UPSTREAM: usb: gadget: f_mass_storage: Make CD-ROM emulation works with Windows OS Add read TOC with format 1 to support CD-ROM emulation with Windows OS. This patch is tested on Windows OS Server 2019. Bug: 254441685 Fixes: 89ada0fe669a ("usb: gadget: f_mass_storage: Make CD-ROM emulation work with Mac OS-X") Reviewed-by: Alan Stern Signed-off-by: Neal Liu Link: https://lore.kernel.org/r/20220628021436.3252262-1-neal_liu@aspeedtech.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3b91edd624ab1ab694deef513a45eb9e9d49d75f) Signed-off-by: Lee Jones Change-Id: Iaff2f9a4e8698f0f744150169a4bc78409885bec --- drivers/usb/gadget/function/f_mass_storage.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 12fe0be35528..7790fbed443c 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -1191,13 +1191,14 @@ static int do_read_toc(struct fsg_common *common, struct fsg_buffhd *bh) u8 format; int i, len; + format = common->cmnd[2] & 0xf; + if ((common->cmnd[1] & ~0x02) != 0 || /* Mask away MSF */ - start_track > 1) { + (start_track > 1 && format != 0x1)) { curlun->sense_data = SS_INVALID_FIELD_IN_CDB; return -EINVAL; } - format = common->cmnd[2] & 0xf; /* * Check if CDB is old style SFF-8020i * i.e. format is in 2 MSBs of byte 9 @@ -1207,8 +1208,8 @@ static int do_read_toc(struct fsg_common *common, struct fsg_buffhd *bh) format = (common->cmnd[9] >> 6) & 0x3; switch (format) { - case 0: - /* Formatted TOC */ + case 0: /* Formatted TOC */ + case 1: /* Multi-session info */ len = 4 + 2*8; /* 4 byte header + 2 descriptors */ memset(buf, 0, len); buf[1] = len - 2; /* TOC Length excludes length field */ @@ -1249,7 +1250,7 @@ static int do_read_toc(struct fsg_common *common, struct fsg_buffhd *bh) return len; default: - /* Multi-session, PMA, ATIP, CD-TEXT not supported/required */ + /* PMA, ATIP, CD-TEXT not supported/required */ curlun->sense_data = SS_INVALID_FIELD_IN_CDB; return -EINVAL; } From 0b21c99c831e2ad43d0da5f6b266999696983b37 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 20 Jun 2022 10:34:42 +0800 Subject: [PATCH 064/186] UPSTREAM: mm/damon: use set_huge_pte_at() to make huge pte old The huge_ptep_set_access_flags() can not make the huge pte old according to the discussion [1], that means we will always mornitor the young state of the hugetlb though we stopped accessing the hugetlb, as a result DAMON will get inaccurate accessing statistics. So changing to use set_huge_pte_at() to make the huge pte old to fix this issue. [1] https://lore.kernel.org/all/Yqy97gXI4Nqb7dYo@arm.com/ Bug: 254441685 Link: https://lkml.kernel.org/r/1655692482-28797-1-git-send-email-baolin.wang@linux.alibaba.com Fixes: 49f4203aae06 ("mm/damon: add access checking for hugetlb pages") Signed-off-by: Baolin Wang Reviewed-by: SeongJae Park Acked-by: Mike Kravetz Reviewed-by: Muchun Song Cc: Signed-off-by: Andrew Morton (cherry picked from commit ed1523a895ffdabcab6e067af18685ed00f5ce15) Signed-off-by: Lee Jones Change-Id: I4bdab0c5baace75e01c64e0d10fd8fc4d4ea93b8 --- mm/damon/vaddr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 89b6468da2b9..382520f1e92d 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -410,8 +410,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); + set_huge_pte_at(mm, addr, pte, entry); } #ifdef CONFIG_MMU_NOTIFIER From c3b65245e6ad9f26d6506597e4db0fa17654e2ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Thu, 23 Jun 2022 16:45:52 -0500 Subject: [PATCH 065/186] UPSTREAM: tools/vm/slabinfo: Handle files in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 64dd68497be76 relocated and renamed the alloc_calls and free_calls files from /sys/kernel/slab/NAME/*_calls over to /sys/kernel/debug/slab/NAME/*_calls but didn't update the slabinfo tool with the new location. This change will now have slabinfo look at the new location (and filenames) with a fallback to the prior files. Bug: 254441685 Fixes: 64dd68497be76 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs") Cc: stable@vger.kernel.org Signed-off-by: Stéphane Graber Tested-by: Stéphane Graber Signed-off-by: Vlastimil Babka (cherry picked from commit 0c7e0d699ef1430d7f4cf12b4b1d097af58b5515) Signed-off-by: Lee Jones Change-Id: I7312aa9e86213bd37916e14c8e0e430e9c3cd2d4 --- tools/vm/slabinfo.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 9b68658b6bb8..5b98f3ee58a5 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name) return l; } +static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) +{ + char x[128]; + FILE *f; + size_t l; + + snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} /* * Put a size string together @@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s) { printf("\n%s: Kernel object allocation\n", s->name); printf("-----------------------------------------------------------------------\n"); - if (read_slab_obj(s, "alloc_calls")) + if (read_debug_slab_obj(s, "alloc_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "alloc_calls")) printf("%s", buffer); else printf("No Data\n"); printf("\n%s: Kernel object freeing\n", s->name); printf("------------------------------------------------------------------------\n"); - if (read_slab_obj(s, "free_calls")) + if (read_debug_slab_obj(s, "free_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "free_calls")) printf("%s", buffer); else printf("No Data\n"); From a7ee7101fbc3dd52309622af8cea9aa1ac5faa0c Mon Sep 17 00:00:00 2001 From: Ren Zhijie Date: Thu, 30 Jun 2022 20:35:28 +0800 Subject: [PATCH 066/186] BACKPORT: dma-mapping: Fix build error unused-value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_DMA_DECLARE_COHERENT is not set, make ARCH=x86_64 CROSS_COMPILE=x86_64-linux-gnu- will be failed, like this: drivers/remoteproc/remoteproc_core.c: In function ‘rproc_rvdev_release’: ./include/linux/dma-map-ops.h:182:42: error: statement with no effect [-Werror=unused-value] #define dma_release_coherent_memory(dev) (0) ^ drivers/remoteproc/remoteproc_core.c:464:2: note: in expansion of macro ‘dma_release_coherent_memory’ dma_release_coherent_memory(dev); ^~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The return type of function dma_release_coherent_memory in CONFIG_DMA_DECLARE_COHERENT area is void, so in !CONFIG_DMA_DECLARE_COHERENT area it should neither return any value nor be defined as zero. Bug: 254441685 Reported-by: Hulk Robot Fixes: e61c451476e6 ("dma-mapping: Add dma_release_coherent_memory to DMA API") Signed-off-by: Ren Zhijie Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220630123528.251181-1-renzhijie2@huawei.com Signed-off-by: Mathieu Poirier (cherry picked from commit 50d6281ce9b8412f7ef02d1bc9d23aa62ae0cf98) Signed-off-by: Lee Jones Change-Id: I2af85ae87d77721972a3c3b01288da43d8fb16bb --- include/linux/dma-map-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index b9a8549f9a7f..c25005764d44 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -191,10 +191,10 @@ static inline int dma_declare_coherent_memory(struct device *dev, return -ENOSYS; } -#define dma_release_coherent_memory(dev) (0) #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) #define dma_release_from_dev_coherent(dev, order, vaddr) (0) #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) +static inline void dma_release_coherent_memory(struct device *dev) { } static inline void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle) From 0805a01852c9e95eb20381a65eba548d98e1ac92 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 7 Jul 2022 13:56:12 +0200 Subject: [PATCH 067/186] UPSTREAM: usb: gadget: uvc: fix changing interface name via configfs When setting the function name, it is always truncated by one char since snprintf is always including the null-termination in the len parameter. We use strscpy and fix the size setting to use len + 1 instead. Bug: 254441685 Fixes: 324e4f85070f ("usb: gadget: uvc: allow changing interface name via configfs") Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220707115612.2760569-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3d0dc539029b09fbd125444c16b11a8ed10b9d0f) Signed-off-by: Lee Jones Change-Id: Id38f7f27d02c711ca3f1ad5303894912713ef57f --- drivers/usb/gadget/function/uvc_configfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 9fafa8001d33..3f1e03ef40c9 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2448,6 +2448,7 @@ static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ const char *page, size_t len) \ { \ struct f_uvc_opts *opts = to_f_uvc_opts(item); \ + int size = min(sizeof(opts->aname), len + 1); \ int ret = 0; \ \ mutex_lock(&opts->lock); \ @@ -2456,8 +2457,9 @@ static ssize_t f_uvc_opts_string_##cname##_store(struct config_item *item,\ goto end; \ } \ \ - ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \ - "%s", page); \ + ret = strscpy(opts->aname, page, size); \ + if (ret == -E2BIG) \ + ret = size - 1; \ \ end: \ mutex_unlock(&opts->lock); \ From 5b71c43f5c59d5df3da704aaa530af0afd53a7a3 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 6 Jul 2022 01:16:49 +0800 Subject: [PATCH 068/186] UPSTREAM: PM: domains: Ensure genpd_debugfs_dir exists before remove Both genpd_debug_add() and genpd_debug_remove() may be called indirectly by other drivers while genpd_debugfs_dir is not yet set. For example, drivers can call pm_genpd_init() in probe or pm_genpd_init() in probe fail/cleanup path: pm_genpd_init() --> genpd_debug_add() pm_genpd_remove() --> genpd_remove() --> genpd_debug_remove() At this time, genpd_debug_init() may not yet be called. genpd_debug_add() checks that if genpd_debugfs_dir is NULL, it will return directly. Make sure this is also checked in pm_genpd_remove(), otherwise components under debugfs root which has the same name as other components under pm_genpd may be accidentally removed, since NULL represents debugfs root. Bug: 254441685 Fixes: 718072ceb211 ("PM: domains: create debugfs nodes when adding power domains") Signed-off-by: Hsin-Yi Wang Reviewed-by: Greg Kroah-Hartman Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki (cherry picked from commit 37101d3c719386040ded735a5ec06974f1d94d1f) Signed-off-by: Lee Jones Change-Id: I0b7e93f4bacf5d537f7f44cbb51237165d859054 --- drivers/base/power/domain.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 75154dad3822..b03cc51f1e5c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -219,6 +219,9 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd) { struct dentry *d; + if (!genpd_debugfs_dir) + return; + d = debugfs_lookup(genpd->name, genpd_debugfs_dir); debugfs_remove(d); } From fdc033d445b2bc7ecd0db49fb6d6c3ff49c8c8ba Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Jul 2022 16:00:36 -0400 Subject: [PATCH 069/186] UPSTREAM: mm: fix page leak with multiple threads mapping the same page We have an application with a lot of threads that use a shared mmap backed by tmpfs mounted with -o huge=within_size. This application started leaking loads of huge pages when we upgraded to a recent kernel. Using the page ref tracepoints and a BPF program written by Tejun Heo we were able to determine that these pages would have multiple refcounts from the page fault path, but when it came to unmap time we wouldn't drop the number of refs we had added from the faults. I wrote a reproducer that mmap'ed a file backed by tmpfs with -o huge=always, and then spawned 20 threads all looping faulting random offsets in this map, while using madvise(MADV_DONTNEED) randomly for huge page aligned ranges. This very quickly reproduced the problem. The problem here is that we check for the case that we have multiple threads faulting in a range that was previously unmapped. One thread maps the PMD, the other thread loses the race and then returns 0. However at this point we already have the page, and we are no longer putting this page into the processes address space, and so we leak the page. We actually did the correct thing prior to f9ce0be71d1f, however it looks like Kirill copied what we do in the anonymous page case. In the anonymous page case we don't yet have a page, so we don't have to drop a reference on anything. Previously we did the correct thing for file based faults by returning VM_FAULT_NOPAGE so we correctly drop the reference on the page we faulted in. Fix this by returning VM_FAULT_NOPAGE in the pmd_devmap_trans_unstable() case, this makes us drop the ref on the page properly, and now my reproducer no longer leaks the huge pages. Bug: 254441685 [josef@toxicpanda.com: v2] Link: https://lkml.kernel.org/r/e90c8f0dbae836632b669c2afc434006a00d4a67.1657721478.git.josef@toxicpanda.com Link: https://lkml.kernel.org/r/2b798acfd95c9ab9395fe85e8d5a835e2e10a920.1657051137.git.josef@toxicpanda.com Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Josef Bacik Signed-off-by: Rik van Riel Signed-off-by: Chris Mason Acked-by: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton (cherry picked from commit 3fe2895cfecd03ac74977f32102b966b6589f481) Signed-off-by: Lee Jones Change-Id: I982509aab4bcbf22d66aff5e1d3dfce927426f51 --- mm/memory.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 70384a99bfcf..1b768d29f9d2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4199,9 +4199,12 @@ vm_fault_t finish_fault(struct vm_fault *vmf) } } - /* See comment in handle_pte_fault() */ + /* + * See comment in handle_pte_fault() for how this scenario happens, we + * need to return NOPAGE so that we drop this page. + */ if (pmd_devmap_trans_unstable(vmf->pmd)) - return 0; + return VM_FAULT_NOPAGE; if (!pte_map_lock(vmf)) return VM_FAULT_RETRY; From f1bf5340cdce46fe382952eb008a0458ba8e3f18 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Aug 2022 13:52:07 -0700 Subject: [PATCH 070/186] UPSTREAM: Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm regression The patch d0be8347c623: "Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put" from Jul 21, 2022, leads to the following Smatch static checker warning: net/bluetooth/l2cap_core.c:1977 l2cap_global_chan_by_psm() error: we previously assumed 'c' could be null (see line 1996) Bug: 254441685 Fixes: d0be8347c623 ("Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put") Reported-by: Dan Carpenter Signed-off-by: Luiz Augusto von Dentz (cherry picked from commit 332f1795ca202489c665a75e62e18ff6284de077) Signed-off-by: Lee Jones Change-Id: I81c57064d558d8304d889fa3448a8aff45c7a408 --- net/bluetooth/l2cap_core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7b4752587363..892957d0f6df 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1966,11 +1966,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *dst, u8 link_type) { - struct l2cap_chan *c, *c1 = NULL; + struct l2cap_chan *c, *tmp, *c1 = NULL; read_lock(&chan_list_lock); - list_for_each_entry(c, &chan_list, global_l) { + list_for_each_entry_safe(c, tmp, &chan_list, global_l) { if (state && c->state != state) continue; @@ -1989,11 +1989,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { c = l2cap_chan_hold_unless_zero(c); - if (!c) - continue; - - read_unlock(&chan_list_lock); - return c; + if (c) { + read_unlock(&chan_list_lock); + return c; + } } /* Closest match */ From 8407a5ca142212b8502c5d396b2a854a2f2c9bdd Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 16 Aug 2022 19:25:53 +0000 Subject: [PATCH 071/186] UPSTREAM: KVM: arm64: Treat PMCR_EL1.LC as RES1 on asymmetric systems KVM does not support AArch32 on asymmetric systems. To that end, enforce AArch64-only behavior on PMCR_EL1.LC when on an asymmetric system. Bug: 254441685 Fixes: 2122a833316f ("arm64: Allow mismatched 32-bit EL0 support") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220816192554.1455559-2-oliver.upton@linux.dev (cherry picked from commit f3c6efc72f3b20ec23566e768979802f0a398f04) Signed-off-by: Lee Jones Change-Id: I982a10aee328eec10b7d75b7fa580748540ddf47 --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/arm.c | 3 +-- arch/arm64/kvm/sys_regs.c | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4e9ee0676b53..b7fa62950b78 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -780,6 +780,10 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) +#define kvm_supports_32bit_el0() \ + (system_supports_32bit_el0() && \ + !static_branch_unlikely(&arm64_mismatched_32bit_el0)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 67749fd3604d..78550c856c24 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -695,8 +695,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) if (likely(!vcpu_mode_is_32bit(vcpu))) return false; - return !system_supports_32bit_el0() || - static_branch_unlikely(&arm64_mismatched_32bit_el0); + return !kvm_supports_32bit_el0(); } /** diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 187576986aef..b9bb810991c5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -618,7 +618,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, r->reg) = val; } @@ -667,7 +667,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); From af723545ed34851ad500b4e92395dc7b2c58b8fd Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 16 Aug 2022 19:25:54 +0000 Subject: [PATCH 072/186] UPSTREAM: KVM: arm64: Reject 32bit user PSTATE on asymmetric systems KVM does not support AArch32 EL0 on asymmetric systems. To that end, prevent userspace from configuring a vCPU in such a state through setting PSTATE. It is already ABI that KVM rejects such a write on a system where AArch32 EL0 is unsupported. Though the kernel's definition of a 32bit system changed in commit 2122a833316f ("arm64: Allow mismatched 32-bit EL0 support"), KVM's did not. Bug: 254441685 Fixes: 2122a833316f ("arm64: Allow mismatched 32-bit EL0 support") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220816192554.1455559-3-oliver.upton@linux.dev (cherry picked from commit b10d86fb8e46cc812171728bcd326df2f34e9ed5) Signed-off-by: Lee Jones Change-Id: I73b63bf79bfbade51dc417fe2c76fd0057eb21b8 --- arch/arm64/kvm/guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c763808cacdf..27b783a711bb 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -225,7 +225,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) return -EINVAL; break; case PSR_AA32_MODE_FIQ: From 347d7198a6fba913068ff03e87bab8468cc02695 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 4 Aug 2022 17:09:57 +0200 Subject: [PATCH 073/186] BACKPORT: usb: dwc3: qcom: fix runtime PM wakeup A device must enable wakeups during runtime suspend regardless of whether it is capable and allowed to wake the system up from system suspend. Bug: 254441685 Fixes: 2664deb09306 ("usb: dwc3: qcom: Honor wakeup enabled/disabled state") Tested-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220804151001.23612-6-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6498a96c8c9ce8ae4078e586a607851491e29a33) Signed-off-by: Lee Jones Change-Id: If23278f148db9f1f5f6ac283ede5ebb5b658f8c8 --- drivers/usb/dwc3/dwc3-qcom.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 504f8af4d0f8..b903106bfb90 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -342,7 +342,7 @@ static void dwc3_qcom_enable_interrupts(struct dwc3_qcom *qcom) } } -static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) +static int dwc3_qcom_suspend(struct dwc3_qcom *qcom, bool wakeup) { u32 val; int i, ret; @@ -361,7 +361,7 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) if (ret) dev_warn(qcom->dev, "failed to disable interconnect: %d\n", ret); - if (device_may_wakeup(qcom->dev)) + if (wakeup) dwc3_qcom_enable_interrupts(qcom); qcom->is_suspended = true; @@ -369,7 +369,7 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) return 0; } -static int dwc3_qcom_resume(struct dwc3_qcom *qcom) +static int dwc3_qcom_resume(struct dwc3_qcom *qcom, bool wakeup) { int ret; int i; @@ -377,7 +377,7 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom) if (!qcom->is_suspended) return 0; - if (device_may_wakeup(qcom->dev)) + if (wakeup) dwc3_qcom_disable_interrupts(qcom); for (i = 0; i < qcom->num_clocks; i++) { @@ -873,9 +873,11 @@ static int dwc3_qcom_remove(struct platform_device *pdev) static int __maybe_unused dwc3_qcom_pm_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); + bool wakeup = device_may_wakeup(dev); int ret = 0; - ret = dwc3_qcom_suspend(qcom); + + ret = dwc3_qcom_suspend(qcom, wakeup); if (!ret) qcom->pm_suspended = true; @@ -885,9 +887,10 @@ static int __maybe_unused dwc3_qcom_pm_suspend(struct device *dev) static int __maybe_unused dwc3_qcom_pm_resume(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); + bool wakeup = device_may_wakeup(dev); int ret; - ret = dwc3_qcom_resume(qcom); + ret = dwc3_qcom_resume(qcom, wakeup); if (!ret) qcom->pm_suspended = false; @@ -898,14 +901,14 @@ static int __maybe_unused dwc3_qcom_runtime_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); - return dwc3_qcom_suspend(qcom); + return dwc3_qcom_suspend(qcom, true); } static int __maybe_unused dwc3_qcom_runtime_resume(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); - return dwc3_qcom_resume(qcom); + return dwc3_qcom_resume(qcom, true); } static const struct dev_pm_ops dwc3_qcom_dev_pm_ops = { From 2a11f678f6544e6ec024738eb8c6141a63538ff8 Mon Sep 17 00:00:00 2001 From: Jing Leng Date: Wed, 20 Jul 2022 18:48:15 -0700 Subject: [PATCH 074/186] BACKPORT: usb: gadget: f_uac2: fix superspeed transfer On page 362 of the USB3.2 specification ( https://usb.org/sites/default/files/usb_32_20210125.zip), The 'SuperSpeed Endpoint Companion Descriptor' shall only be returned by Enhanced SuperSpeed devices that are operating at Gen X speed. Each endpoint described in an interface is followed by a 'SuperSpeed Endpoint Companion Descriptor'. If users use SuperSpeed UDC, host can't recognize the device if endpoint doesn't have 'SuperSpeed Endpoint Companion Descriptor' followed. Currently in the uac2 driver code: 1. ss_epout_desc_comp follows ss_epout_desc; 2. ss_epin_fback_desc_comp follows ss_epin_fback_desc; 3. ss_epin_desc_comp follows ss_epin_desc; 4. Only ss_ep_int_desc endpoint doesn't have 'SuperSpeed Endpoint Companion Descriptor' followed, so we should add it. Bug: 254441685 Fixes: eaf6cbe09920 ("usb: gadget: f_uac2: add volume and mute support") Cc: stable Signed-off-by: Jing Leng Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20220721014815.14453-1-quic_jackp@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f511aef2ebe5377d4c263842f2e0c0b8e274e8e5) Signed-off-by: Lee Jones Change-Id: I7e4a0ce5482f44df32cfa13cc011281c2bc6393d --- drivers/usb/gadget/function/f_uac2.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index da5250a415d5..91affe8babb8 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -281,6 +281,12 @@ static struct usb_endpoint_descriptor ss_ep_int_desc = { .bInterval = 4, }; +static struct usb_ss_ep_comp_descriptor ss_ep_int_desc_comp = { + .bLength = sizeof(ss_ep_int_desc_comp), + .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, + .wBytesPerInterval = cpu_to_le16(6), +}; + /* Audio Streaming OUT Interface - Alt0 */ static struct usb_interface_descriptor std_as_out_if0_desc = { .bLength = sizeof std_as_out_if0_desc, @@ -594,7 +600,8 @@ static struct usb_descriptor_header *ss_audio_desc[] = { (struct usb_descriptor_header *)&in_feature_unit_desc, (struct usb_descriptor_header *)&io_out_ot_desc, - (struct usb_descriptor_header *)&ss_ep_int_desc, + (struct usb_descriptor_header *)&ss_ep_int_desc, + (struct usb_descriptor_header *)&ss_ep_int_desc_comp, (struct usb_descriptor_header *)&std_as_out_if0_desc, (struct usb_descriptor_header *)&std_as_out_if1_desc, @@ -746,6 +753,7 @@ static void setup_headers(struct f_uac2_opts *opts, struct usb_ss_ep_comp_descriptor *epout_desc_comp = NULL; struct usb_ss_ep_comp_descriptor *epin_desc_comp = NULL; struct usb_ss_ep_comp_descriptor *epin_fback_desc_comp = NULL; + struct usb_ss_ep_comp_descriptor *ep_int_desc_comp = NULL; struct usb_endpoint_descriptor *epout_desc; struct usb_endpoint_descriptor *epin_desc; struct usb_endpoint_descriptor *epin_fback_desc; @@ -773,6 +781,7 @@ static void setup_headers(struct f_uac2_opts *opts, epin_fback_desc = &ss_epin_fback_desc; epin_fback_desc_comp = &ss_epin_fback_desc_comp; ep_int_desc = &ss_ep_int_desc; + ep_int_desc_comp = &ss_ep_int_desc_comp; } i = 0; @@ -801,8 +810,11 @@ static void setup_headers(struct f_uac2_opts *opts, if (EPOUT_EN(opts)) headers[i++] = USBDHDR(&io_out_ot_desc); - if (FUOUT_EN(opts) || FUIN_EN(opts)) - headers[i++] = USBDHDR(ep_int_desc); + if (FUOUT_EN(opts) || FUIN_EN(opts)) { + headers[i++] = USBDHDR(ep_int_desc); + if (ep_int_desc_comp) + headers[i++] = USBDHDR(ep_int_desc_comp); + } if (EPOUT_EN(opts)) { headers[i++] = USBDHDR(&std_as_out_if0_desc); From a0630a05362f4b5b42a59115462b30f33cc631a9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 25 Aug 2022 17:38:38 +0900 Subject: [PATCH 075/186] UPSTREAM: cgroup: Add missing cpus_read_lock() to cgroup_attach_task_all() syzbot is hitting percpu_rwsem_assert_held(&cpu_hotplug_lock) warning at cpuset_attach() [1], for commit 4f7e7236435ca0ab ("cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock") missed that cpuset_attach() is also called from cgroup_attach_task_all(). Add cpus_read_lock() like what cgroup_procs_write_start() does. Bug: 254441685 Link: https://syzkaller.appspot.com/bug?extid=29d3a3b4d86c8136ad9e [1] Reported-by: syzbot Signed-off-by: Tetsuo Handa Fixes: 4f7e7236435ca0ab ("cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock") Signed-off-by: Tejun Heo (cherry picked from commit 43626dade36fa74d3329046f4ae2d7fdefe401c6) Signed-off-by: Lee Jones Change-Id: Ifd2f0fb1e4e8c3d474dc4a1d943ae92ff3b0d145 --- kernel/cgroup/cgroup-v1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index ccaffa5a936a..ffcdf33deb92 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -58,6 +58,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) int retval = 0; mutex_lock(&cgroup_mutex); + cpus_read_lock(); percpu_down_write(&cgroup_threadgroup_rwsem); for_each_root(root) { struct cgroup *from_cgrp; @@ -74,6 +75,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) break; } percpu_up_write(&cgroup_threadgroup_rwsem); + cpus_read_unlock(); mutex_unlock(&cgroup_mutex); return retval; From bf4603f49514445494309ee5604faf8d33493491 Mon Sep 17 00:00:00 2001 From: Michael Wu Date: Fri, 19 Aug 2022 10:45:41 +0800 Subject: [PATCH 076/186] UPSTREAM: pinctrl: sunxi: Fix name for A100 R_PIO The name of A100 R_PIO driver should be sun50i-a100-r-pinctrl, not sun50iw10p1-r-pinctrl. Bug: 254441685 Fixes: 473436e7647d6 ("pinctrl: sunxi: add support for the Allwinner A100 pin controller") Signed-off-by: Michael Wu Acked-by: Samuel Holland Link: https://lore.kernel.org/r/20220819024541.74191-1-michael@allwinnertech.com Signed-off-by: Linus Walleij (cherry picked from commit 76648c867c6c03b8a468d9c9222025873ecc613d) Signed-off-by: Lee Jones Change-Id: I950bddf7f50616714a5bd7a8438eb5f518ea70fc --- drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c index 21054fcacd34..18088f6f44b2 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c @@ -98,7 +98,7 @@ MODULE_DEVICE_TABLE(of, a100_r_pinctrl_match); static struct platform_driver a100_r_pinctrl_driver = { .probe = a100_r_pinctrl_probe, .driver = { - .name = "sun50iw10p1-r-pinctrl", + .name = "sun50i-a100-r-pinctrl", .of_match_table = a100_r_pinctrl_match, }, }; From 3f23bb225556e69411cec64d4823216fd65e2d55 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 25 Aug 2022 22:04:56 -0700 Subject: [PATCH 077/186] UPSTREAM: crypto: lib - remove unneeded selection of XOR_BLOCKS CRYPTO_LIB_CHACHA_GENERIC doesn't need to select XOR_BLOCKS. It perhaps was thought that it's needed for __crypto_xor, but that's not the case. Enabling XOR_BLOCKS is problematic because the XOR_BLOCKS code runs a benchmark when it is initialized. That causes a boot time regression on systems that didn't have it enabled before. Therefore, remove this unnecessary and problematic selection. Bug: 254441685 Fixes: e56e18985596 ("lib/crypto: add prompts back to crypto libraries") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu (cherry picked from commit 874b301985ef2f89b8b592ad255e03fb6fbfe605) Signed-off-by: Lee Jones Change-Id: I89d552f31062ad677407107280874bc7eafe60bf --- lib/crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 2082af43d51f..0717a0dcefed 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -33,7 +33,6 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA config CRYPTO_LIB_CHACHA_GENERIC tristate - select XOR_BLOCKS help This symbol can be depended upon by arch implementations of the ChaCha library interface that require the generic code as a From 3093f8b52c38ddbf93b96eb592168eec2cd53fb9 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 21 Aug 2022 18:08:53 +0000 Subject: [PATCH 078/186] UPSTREAM: mm/damon/dbgfs: avoid duplicate context directory creation When user tries to create a DAMON context via the DAMON debugfs interface with a name of an already existing context, the context directory creation fails but a new context is created and added in the internal data structure, due to absence of the directory creation success check. As a result, memory could leak and DAMON cannot be turned on. An example test case is as below: # cd /sys/kernel/debug/damon/ # echo "off" > monitor_on # echo paddr > target_ids # echo "abc" > mk_context # echo "abc" > mk_context # echo $$ > abc/target_ids # echo "on" > monitor_on <<< fails Return value of 'debugfs_create_dir()' is expected to be ignored in general, but this is an exceptional case as DAMON feature is depending on the debugfs functionality and it has the potential duplicate name issue. This commit therefore fixes the issue by checking the directory creation failure and immediately return the error in the case. Bug: 254441685 Link: https://lkml.kernel.org/r/20220821180853.2400-1-sj@kernel.org Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts") Signed-off-by: Badari Pulavarty Signed-off-by: SeongJae Park Cc: [ 5.15.x] Signed-off-by: Andrew Morton (cherry picked from commit d26f60703606ab425eee9882b32a1781a8bed74d) Signed-off-by: Lee Jones Change-Id: I8951b95f41306818ef1b4a5789369a84d8ca2cf2 --- mm/damon/dbgfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 5b899601e56c..6a3121dca040 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -721,6 +721,9 @@ static int dbgfs_mk_context(char *name) return -ENOENT; new_dir = debugfs_create_dir(name, root); + /* Below check is required for a potential duplicated name case */ + if (IS_ERR(new_dir)) + return PTR_ERR(new_dir); dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; new_ctx = dbgfs_new_ctx(); From ed91943b486f218cd6611b7520160dc2ac3dc015 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Sep 2022 19:11:49 +0000 Subject: [PATCH 079/186] UPSTREAM: mm/damon/dbgfs: fix memory leak when using debugfs_lookup() When calling debugfs_lookup() the result must have dput() called on it, otherwise the memory will leak over time. Fix this up by properly calling dput(). Bug: 254441685 Link: https://lkml.kernel.org/r/20220902191149.112434-1-sj@kernel.org Fixes: 75c1c2b53c78b ("mm/damon/dbgfs: support multiple contexts") Signed-off-by: Greg Kroah-Hartman Signed-off-by: SeongJae Park Cc: Signed-off-by: Andrew Morton (cherry picked from commit 1552fd3ef7dbe07208b8ae84a0a6566adf7dfc9d) Signed-off-by: Lee Jones Change-Id: I7f2e8fa0167b3e44c4c251a51df341a6a7d98fd2 --- mm/damon/dbgfs.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 6a3121dca040..f2772fc740c0 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -787,6 +787,7 @@ static int dbgfs_rm_context(char *name) struct dentry *root, *dir, **new_dirs; struct damon_ctx **new_ctxs; int i, j; + int ret = 0; if (damon_nr_running_ctxs()) return -EBUSY; @@ -801,14 +802,16 @@ static int dbgfs_rm_context(char *name) new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs), GFP_KERNEL); - if (!new_dirs) - return -ENOMEM; + if (!new_dirs) { + ret = -ENOMEM; + goto out_dput; + } new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs), GFP_KERNEL); if (!new_ctxs) { - kfree(new_dirs); - return -ENOMEM; + ret = -ENOMEM; + goto out_new_dirs; } for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) { @@ -828,7 +831,13 @@ static int dbgfs_rm_context(char *name) dbgfs_ctxs = new_ctxs; dbgfs_nr_ctxs--; - return 0; + goto out_dput; + +out_new_dirs: + kfree(new_dirs); +out_dput: + dput(dir); + return ret; } static ssize_t dbgfs_rm_context_write(struct file *file, From 2df2e2c0846a8164edd63519d131ecbd7871483b Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 18 Aug 2022 15:37:43 +0800 Subject: [PATCH 080/186] UPSTREAM: mm/damon: validate if the pmd entry is present before accessing pmd_huge() is used to validate if the pmd entry is mapped by a huge page, also including the case of non-present (migration or hwpoisoned) pmd entry on arm64 or x86 architectures. This means that pmd_pfn() can not get the correct pfn number for a non-present pmd entry, which will cause damon_get_page() to get an incorrect page struct (also may be NULL by pfn_to_online_page()), making the access statistics incorrect. This means that the DAMON may make incorrect decision according to the incorrect statistics, for example, DAMON may can not reclaim cold page in time due to this cold page was regarded as accessed mistakenly if DAMOS_PAGEOUT operation is specified. Moreover it does not make sense that we still waste time to get the page of the non-present entry. Just treat it as not-accessed and skip it, which maintains consistency with non-present pte level entries. So add pmd entry present validation to fix the above issues. Bug: 254441685 Link: https://lkml.kernel.org/r/58b1d1f5fbda7db49ca886d9ef6783e3dcbbbc98.1660805030.git.baolin.wang@linux.alibaba.com Fixes: 3f49584b262c ("mm/damon: implement primitives for the virtual memory address spaces") Signed-off-by: Baolin Wang Reviewed-by: SeongJae Park Reviewed-by: Muchun Song Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton (cherry picked from commit c8b9aff419303e4d4219b5ff64b1c7e062dee48e) Signed-off-by: Lee Jones Change-Id: Idda1765dcbc93a28ad38ccc53688d69b64202330 --- mm/damon/vaddr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 382520f1e92d..c0dec53b2330 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -375,6 +375,11 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, if (pmd_huge(*pmd)) { ptl = pmd_lock(walk->mm, pmd); + if (!pmd_present(*pmd)) { + spin_unlock(ptl); + return 0; + } + if (pmd_huge(*pmd)) { damon_pmdp_mkold(pmd, walk->mm, addr); spin_unlock(ptl); @@ -505,6 +510,11 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_huge(*pmd)) { ptl = pmd_lock(walk->mm, pmd); + if (!pmd_present(*pmd)) { + spin_unlock(ptl); + return 0; + } + if (!pmd_huge(*pmd)) { spin_unlock(ptl); goto regular_page; From 5f1257b5fffd9e919e7f12920621568f98862294 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 21 Sep 2022 15:34:47 +0300 Subject: [PATCH 081/186] UPSTREAM: xhci: Don't show warning for reinit on known broken suspend commit 8b328f8002bc ("xhci: re-initialize the HC during resume if HCE was set") introduced a new warning message when the host controller error was set and re-initializing. This is expected behavior on some designs which already set `xhci->broken_suspend` so the new warning is alarming to some users. Modify the code to only show the warning if this was a surprising behavior to the XHCI driver. Bug: 254441685 Link: https://bugzilla.kernel.org/show_bug.cgi?id=216470 Fixes: 8b328f8002bc ("xhci: re-initialize the HC during resume if HCE was set") Reported-by: Artem S. Tashkinov Signed-off-by: Mario Limonciello Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20220921123450.671459-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 484d6f7aa3283d082c87654b7fe7a7f725423dfb) Signed-off-by: Lee Jones Change-Id: Icf85934a304d1fa18c68b9b4ed08dce60f85a403 --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 82eb4a65e068..9f755314649d 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1172,7 +1172,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* re-initialize the HC on Restore Error, or Host Controller Error */ if (temp & (STS_SRE | STS_HCE)) { reinit_xhc = true; - xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); + if (!xhci->broken_suspend) + xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); } if (reinit_xhc) { From 82b904105a0214676209657521bf79acbd74eb18 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 22 Sep 2022 17:59:24 +0300 Subject: [PATCH 082/186] UPSTREAM: usb: typec: ucsi: Remove incorrect warning Sink only devices do not have any source capabilities, so the driver should not warn about that. Also DRP (Dual Role Power) capable devices, such as USB Type-C docking stations, do not return any source capabilities unless they are plugged to a power supply themselves. Bug: 254441685 Fixes: 1f4642b72be7 ("usb: typec: ucsi: Retrieve all the PDOs instead of just the first 4") Reported-by: Paul Menzel Cc: Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20220922145924.80667-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 415ba26cb73f7d22a892043301b91b57ae54db02) Signed-off-by: Lee Jones Change-Id: I3e10fb2c610a753e19deab7dcceda10599f56c03 --- drivers/usb/typec/ucsi/ucsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 18be14d51e02..f253310a92b4 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -515,8 +515,6 @@ static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner, num_pdos * sizeof(u32)); if (ret < 0) dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret); - if (ret == 0 && offset == 0) - dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); return ret; } From e0243d1991deae27d8c2fe171eac1321bb7a0703 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 2 Oct 2022 19:31:30 +0000 Subject: [PATCH 083/186] UPSTREAM: mm/damon/core: initialize damon_target->list in damon_new_target() 'struct damon_target' creation function, 'damon_new_target()' is not initializing its '->list' field, unlike other DAMON structs creator functions such as 'damon_new_region()'. Normal users of 'damon_new_target()' initializes the field by adding the target to DAMON context's targets list, but some code could access the uninitialized field. This commit avoids the case by initializing the field in 'damon_new_target()'. Bug: 254441685 Link: https://lkml.kernel.org/r/20221002193130.8227-1-sj@kernel.org Fixes: f23b8eee1871 ("mm/damon/core: implement region-based sampling") Signed-off-by: SeongJae Park Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton (cherry picked from commit b1f44cdabad8c50cd72d6b6731e9fdf3730a8f4f) Signed-off-by: Lee Jones Change-Id: Ie500358e0cc7d5bf82225e6e2b5229f6629736f4 --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 1dd153c31c9e..b194846f809b 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -155,6 +155,7 @@ struct damon_target *damon_new_target(unsigned long id) t->id = id; t->nr_regions = 0; INIT_LIST_HEAD(&t->regions_list); + INIT_LIST_HEAD(&t->list); return t; } From 910e9e60492a29ed40d604e8ae509cae7742ee75 Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Wed, 4 May 2022 12:36:41 -0700 Subject: [PATCH 084/186] UPSTREAM: usb: dwc3: Fix ep0 handling when getting reset while doing control transfer According to the databook ep0 should be in setup phase during reset. If host issues reset between control transfers, ep0 will be in an invalid state. Fix this by issuing stall and restart on ep0 if it is not in setup phase. Also SW needs to complete pending control transfer and setup core for next setup stage as per data book. Hence check ep0 state during reset interrupt handling and make sure active transfers on ep0 out/in endpoint are stopped by queuing ENDXFER command for that endpoint and restart ep0 out again to receive next setup packet. Signed-off-by: Mayank Rana Link: https://lore.kernel.org/r/1651693001-29891-1-git-send-email-quic_mrana@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9d778f0c5f95ca5aa2ff628ea281978697e8d89b) Bug: 258997352 Change-Id: Ie7482ba08d4f77ad65f404b3014ac880f5a5a75e Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/ep0.c | 11 ++++++++--- drivers/usb/dwc3/gadget.c | 27 +++++++++++++++++++++++++-- drivers/usb/dwc3/gadget.h | 2 ++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 1064be5518f6..9b6ebc3c902d 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -218,7 +218,7 @@ out: return ret; } -static void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) +void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) { struct dwc3_ep *dep; @@ -1087,13 +1087,18 @@ void dwc3_ep0_send_delayed_status(struct dwc3 *dwc) __dwc3_ep0_do_control_status(dwc, dwc->eps[direction]); } -static void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) +void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep) { struct dwc3_gadget_ep_cmd_params params; u32 cmd; int ret; - if (!dep->resource_index) + /* + * For status/DATA OUT stage, TRB will be queued on ep0 out + * endpoint for which resource index is zero. Hence allow + * queuing ENDXFER command for ep0 out endpoint. + */ + if (!dep->resource_index && dep->number) return; cmd = DWC3_DEPCMD_ENDTRANSFER; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d769454ab482..19dbf9897df7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -869,12 +869,13 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action) reg |= DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); + dep->trb_dequeue = 0; + dep->trb_enqueue = 0; + if (usb_endpoint_xfer_control(desc)) goto out; /* Initialize the TRB ring */ - dep->trb_dequeue = 0; - dep->trb_enqueue = 0; memset(dep->trb_pool, 0, sizeof(struct dwc3_trb) * DWC3_TRB_NUM); @@ -2711,6 +2712,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; + dwc->ep0_bounced = false; dwc->link_state = DWC3_LINK_STATE_SS_DIS; dwc->delayed_status = false; dwc3_ep0_out_start(dwc); @@ -3796,6 +3798,27 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) } dwc3_reset_gadget(dwc); + + /* + * From SNPS databook section 8.1.2, the EP0 should be in setup + * phase. So ensure that EP0 is in setup phase by issuing a stall + * and restart if EP0 is not in setup phase. + */ + if (dwc->ep0state != EP0_SETUP_PHASE) { + unsigned int dir; + + dir = !!dwc->ep0_expect_in; + if (dwc->ep0state == EP0_DATA_PHASE) + dwc3_ep0_end_control_data(dwc, dwc->eps[dir]); + else + dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]); + + dwc->eps[0]->trb_enqueue = 0; + dwc->eps[1]->trb_enqueue = 0; + + dwc3_ep0_stall_and_restart(dwc); + } + /* * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a * Section 4.1.2 Table 4-2, it states that during a USB reset, the SW diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index f763380e672e..55a56cf67d73 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -110,6 +110,8 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, void dwc3_ep0_interrupt(struct dwc3 *dwc, const struct dwc3_event_depevt *event); void dwc3_ep0_out_start(struct dwc3 *dwc); +void dwc3_ep0_end_control_data(struct dwc3 *dwc, struct dwc3_ep *dep); +void dwc3_ep0_stall_and_restart(struct dwc3 *dwc); int __dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value); int dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value); int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request, From a8997cb1858d943fd50311023ec8f3b2c0b5346d Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 14 Nov 2022 11:18:30 +0530 Subject: [PATCH 085/186] UPSTREAM: usb: dwc3: gadget: Submit endxfer command if delayed during disconnect During a cable disconnect sequence, if ep0state is not in the SETUP phase, then nothing will trigger any pending end transfer commands. Force stopping of any pending SETUP transaction, and move back to the SETUP phase. Reviewed-by: Thinh Nguyen Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220901193625.8727-6-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 8422b769fa46bd429dc0f324012629a4691f0dd9) Bug: 258997352 Change-Id: I39f41c42d3c5aec76d4f65175e31e2e10a0825be Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 19dbf9897df7..d77866fdc16d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3744,13 +3744,24 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) reg &= ~DWC3_DCTL_INITU2ENA; dwc3_gadget_dctl_write_safe(dwc, reg); + dwc->connected = false; + dwc3_disconnect_gadget(dwc); dwc->gadget->speed = USB_SPEED_UNKNOWN; dwc->setup_packet_pending = false; usb_gadget_set_state(dwc->gadget, USB_STATE_NOTATTACHED); - dwc->connected = false; + if (dwc->ep0state != EP0_SETUP_PHASE) { + unsigned int dir; + + dir = !!dwc->ep0_expect_in; + if (dwc->ep0state == EP0_DATA_PHASE) + dwc3_ep0_end_control_data(dwc, dwc->eps[dir]); + else + dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]); + dwc3_ep0_stall_and_restart(dwc); + } } static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) From 68d4b5fef26d81bc80226c1747df00b691c88b86 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 11 Nov 2022 13:45:21 +0800 Subject: [PATCH 086/186] BACKPORT: mm/page_alloc: always initialize memory map for the holes Patch series "mm: ensure consistency of memory map poisoning". Currently memory map allocation for FLATMEM case does not poison the struct pages regardless of CONFIG_PAGE_POISON setting. This happens because allocation of the memory map for FLATMEM and SPARSMEM use different memblock functions and those that are used for SPARSMEM case (namely memblock_alloc_try_nid_raw() and memblock_alloc_exact_nid_raw()) implicitly poison the allocated memory. Another side effect of this implicit poisoning is that early setup code that uses the same functions to allocate memory burns cycles for the memory poisoning even if it was not intended. These patches introduce memmap_alloc() wrapper that ensure that the memory map allocation is consistent for different memory models. This patch (of 4): Currently memory map for the holes is initialized only when SPARSEMEM memory model is used. Yet, even with FLATMEM there could be holes in the physical memory layout that have memory map entries. For instance, the memory reserved using e820 API on i386 or "reserved-memory" nodes in device tree would not appear in memblock.memory and hence the struct pages for such holes will be skipped during memory map initialization. These struct pages will be zeroed because the memory map for FLATMEM systems is allocated with memblock_alloc_node() that clears the allocated memory. While zeroed struct pages do not cause immediate problems, the correct behaviour is to initialize every page using __init_single_page(). Besides, enabling page poison for FLATMEM case will trigger PF_POISONED_CHECK() unless the memory map is properly initialized. Make sure init_unavailable_range() is called for both SPARSEMEM and FLATMEM so that struct pages representing memory holes would appear as PG_Reserved with any memory layout. [rppt@kernel.org: fix microblaze] Link: https://lkml.kernel.org/r/YQWW3RCE4eWBuMu/@kernel.org (cherry picked from commit c3ab6baf6a004eab7344a1d8880a971f2414e1b6) Bug: 258556132 Link: https://lkml.kernel.org/r/20210714123739.16493-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210714123739.16493-2-rppt@kernel.org Change-Id: Ib60682288ba76e65384de91b70a08662ead12934 Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Tested-by: Guenter Roeck Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/page.h | 3 ++- mm/page_alloc.c | 8 -------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index b13463d39b38..3c837abbebf0 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -162,7 +162,8 @@ extern int page_is_ram(unsigned long pfn); # define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) # else /* CONFIG_MMU */ # define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) -# define pfn_valid(pfn) ((pfn) < (max_mapnr + ARCH_PFN_OFFSET)) +# define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && \ + (pfn) < (max_mapnr + ARCH_PFN_OFFSET)) # endif /* CONFIG_MMU */ # endif /* __ASSEMBLY__ */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 222afa645639..9b2dd98ab4e8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6367,7 +6367,6 @@ static void __meminit zone_init_free_lists(struct zone *zone) } } -#if !defined(CONFIG_FLAT_NODE_MEM_MAP) /* * Only struct pages that correspond to ranges defined by memblock.memory * are zeroed and initialized by going through __init_single_page() during @@ -6412,13 +6411,6 @@ static void __init init_unavailable_range(unsigned long spfn, pr_info("On node %d, zone %s: %lld pages in unavailable ranges", node, zone_names[zone], pgcnt); } -#else -static inline void init_unavailable_range(unsigned long spfn, - unsigned long epfn, - int zone, int node) -{ -} -#endif static void __init memmap_init_zone_range(struct zone *zone, unsigned long start_pfn, From abb277d9f11f6e6ccf688a28f71c23c94026a4ef Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Aug 2022 13:54:25 +0100 Subject: [PATCH 087/186] FROMGIT: mm/vmalloc: Add override for lazy vunmap Add an interface for arch code to disable lazy vunmap by forcing the threshold to zero. This might be interesting for debugging/testing in general, but primarily helps a horrible situation which needs to guarantee that vmalloc aliases are up-to-date from atomic context, wherein the only practical solution is to never let them get stale in the first place. Bug: 223346425 (cherry picked from commit 2a34c1503b85f49dd472dfd932dfcd16cab8ee8a https://git.gitlab.arm.com/linux-arm/linux-rm.git arm64/2454944) Change-Id: I694523564357b4c43d30c129af1e89fd803824d3 Signed-off-by: Robin Murphy Signed-off-by: Beata Michalska --- include/linux/vmalloc.h | 4 ++++ mm/vmalloc.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0549ca17ba6f..12c55bbec8c0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -245,4 +245,8 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); +#ifndef arch_disable_lazy_vunmap +#define arch_disable_lazy_vunmap false +#endif + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 817a472ee30f..50d49ca35885 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1297,6 +1297,9 @@ static unsigned long lazy_max_pages(void) { unsigned int log; + if (arch_disable_lazy_vunmap) + return 0; + log = fls(num_online_cpus()); return log * (32UL * 1024 * 1024 / PAGE_SIZE); From 02e1387801b70fb53b3a96f95f5cb58a8f271c1d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 25 Apr 2022 17:53:55 +0100 Subject: [PATCH 088/186] FROMGIT: arm64: Work around Cortex-A510 erratum 2454944 Cortex-A510 erratum 2454944 may cause clean cache lines to be erroneously written back to memory, breaking the assumptions we rely on for non-coherent DMA. Try to mitigate this by implementing special DMA ops that do their best to avoid cacheable aliases via a combination of bounce-buffering and manipulating the linear map directly, to minimise the chance of DMA-mapped pages being speculated back into caches. The other main concern is initial entry, where cache lines covering the kernel image might potentially become affected between being cleaned by the bootloader and the kernel being called, so perform some additional maintenance to be safe in that regard too. Cortex-A510 supports S2FWB, so KVM should be unaffected. Bug: 223346425 (cherry picked from commit 5bb88dd8ed70973eeb15722710a46d60951c8255 https://git.gitlab.arm.com/linux-arm/linux-rm.git arm64/2454944) Change-Id: Iffd38bf97114f7151f01c70750b465fc991c89c8 Signed-off-by: Robin Murphy Signed-off-by: Beata Michalska --- Documentation/arm64/silicon-errata.rst | 2 + arch/arm64/Kconfig | 30 ++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/include/asm/page.h | 13 + arch/arm64/include/asm/vmalloc.h | 4 + arch/arm64/kernel/cpu_errata.c | 8 + arch/arm64/kernel/head.S | 23 +- arch/arm64/mm/dma-mapping.c | 604 +++++++++++++++++++++++++ arch/arm64/mm/init.c | 3 +- arch/arm64/mm/mmu.c | 2 +- 10 files changed, 686 insertions(+), 6 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 4ff8a9379ba9..cb04b3baaa33 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -94,6 +94,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2454944 | ARM64_ERRATUM_2454944 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1196fe7686a7..90a8f9925ce8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -713,6 +713,36 @@ config ARM64_ERRATUM_2067961 If unsure, say Y. +config ARM64_ERRATUM_2454944 + bool "Cortex-A510: 2454944: Unmodified cache line might be written back to memory" + select ARCH_HAS_TEARDOWN_DMA_OPS + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2454944. + + Affected Cortex-A510 core might write unmodified cache lines back to + memory, which breaks the assumptions upon which software coherency + management for non-coherent DMA relies. If a cache line is + speculatively fetched while a non-coherent device is writing directly + to DRAM, and subsequently written back by natural eviction, data + written by the device in the intervening period can be lost. + + The workaround is to enforce as far as reasonably possible that all + non-coherent DMA transfers are bounced and/or remapped to minimise + the chance that any Cacheable alias exists through which speculative + cache fills could occur. + + This is quite involved and has unavoidable performance impact on + affected systems. + +config ARM64_ERRATUM_2454944_DEBUG + bool "Extra debug checks for Cortex-A510 2454944" + depends on ARM64_ERRATUM_2454944 + default y + help + Enable additional checks and warnings to detect and mitigate driver + bugs breaking the remapping workaround. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ed39d674de5f..cbe87b1bc4a8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -72,7 +72,8 @@ #define ARM64_WORKAROUND_TSB_FLUSH_FAILURE 61 #define ARM64_SPECTRE_BHB 62 -/* kabi: reserve 63 - 76 for future cpu capabilities */ +/* kabi: reserve 63 - 74 for future cpu capabilities */ +#define ARM64_WORKAROUND_NO_DMA_ALIAS 75 #define ARM64_NCAPS 76 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index ed1b9dcf12b2..ced097af888d 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -43,6 +43,19 @@ typedef struct page *pgtable_t; extern int pfn_valid(unsigned long); +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG +#include + +void page_check_nc(struct page *page, int order); + +static inline void arch_free_page(struct page *page, int order) +{ + if (cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) + page_check_nc(page, order); +} +#define HAVE_ARCH_FREE_PAGE +#endif + #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 2ca708ab9b20..b5a0614e48c0 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -1,4 +1,8 @@ #ifndef _ASM_ARM64_VMALLOC_H #define _ASM_ARM64_VMALLOC_H +#include + +#define arch_disable_lazy_vunmap cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS) + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 57ece7965bd6..d4848a3f88c4 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -555,6 +555,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2454944 + { + .desc = "ARM erratum 2454944", + .capability = ARM64_WORKAROUND_NO_DMA_ALIAS, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), + MIDR_FIXED(MIDR_CPU_VAR_REV(1, 1), BIT(25)), + }, #endif { } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 047eac4046e0..1431d840d119 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -125,9 +125,13 @@ SYM_CODE_END(primary_entry) SYM_CODE_START_LOCAL(preserve_boot_args) mov x21, x0 // x21=FDT - adr_l x0, boot_args // record the contents of - stp x21, x1, [x0] // x0 .. x3 at kernel entry - stp x2, x3, [x0, #16] + adr_l x0, boot_args +#ifdef CONFIG_ARM64_ERRATUM_2454944 + dc ivac, x0 // Cortex-A510 CWG is 64 bytes, so plenty + dsb sy +#endif + stp x21, x1, [x0] // record the contents of + stp x2, x3, [x0, #16] // x0 .. x3 at kernel entry dmb sy // needed before dc ivac with // MMU off @@ -282,8 +286,17 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * the kernel image, and thus are clean to the PoC per the boot * protocol. */ +#ifndef CONFIG_ARM64_ERRATUM_2454944 adrp x0, init_pg_dir adrp x1, init_pg_end +#else + /* + * However if we can't even trust "clean" cache lines shadowing rodata, + * then nuke the entire image. It's the only way to be sure. + */ + adrp x0, _text + adrp x1, _end +#endif sub x1, x1, x0 bl __inval_dcache_area @@ -531,6 +544,10 @@ SYM_FUNC_END(init_kernel_el) */ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) adr_l x1, __boot_cpu_mode +#ifdef CONFIG_ARM64_ERRATUM_2454944 + dc ivac, x1 + dsb sy +#endif cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e75e5e75b192..bd46ad81d061 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -31,7 +31,599 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } +#ifdef CONFIG_ARM64_ERRATUM_2454944 +#include +#include +#include + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG +#include + +static atomic_t pages_remapped; + +static int __init remap_debugfs_register(void) +{ + debugfs_create_atomic_t("pages_remapped_nc", 0444, NULL, &pages_remapped); + return 0; +} +arch_initcall(remap_debugfs_register); +#endif + +/* + * Nobody should be using these software bits on linear map addresses, right? + * This is categorically the worst, but oh well, needs must... + */ +#define REFCOUNT_INC (1UL << 55) +#define PTE_REFCOUNT(pte) (((pte) >> 55) & 0xf) + +static int pte_set_nc(pte_t *ptep, unsigned long addr, void *data) +{ + pteval_t old_pte, new_pte, pte; + unsigned int refcount; + + pte = pte_val(READ_ONCE(*ptep)); + do { + /* Avoid racing against the transient invalid state */ + old_pte = pte | PTE_VALID; + new_pte = old_pte + REFCOUNT_INC; + refcount = PTE_REFCOUNT(pte); + if (WARN_ON(refcount == 15)) + return -EINVAL; + if (refcount == 0) { + new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); + new_pte |= PTE_ATTRINDX(MT_NORMAL_NC); + } + pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); + } while (pte != old_pte); + + *(unsigned int *)data = refcount; + if (refcount) + return 0; + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG + atomic_inc(&pages_remapped); +#endif + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); + return 0; +} + +static int pte_clear_nc(pte_t *ptep, unsigned long addr, void *data) +{ + pteval_t old_pte, new_pte, pte; + unsigned int refcount; + + pte = pte_val(READ_ONCE(*ptep)); + do { + old_pte = pte | PTE_VALID; + new_pte = old_pte - REFCOUNT_INC; + refcount = PTE_REFCOUNT(pte); + if (WARN_ON(refcount == 0)) + return -EINVAL; + if (refcount == 1) { + new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); + new_pte |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + } + pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); + } while (pte != old_pte); + + if (refcount > 1) + return 0; + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG + atomic_dec(&pages_remapped); +#endif + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); + return 0; +} + +static int set_nc(void *addr, size_t size) +{ + unsigned int count; + int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, + size, pte_set_nc, &count); + WARN_RATELIMIT(IS_ENABLED(CONFIG_ARM64_ERRATUM_2454944_DEBUG) && + count == 0 && page_mapped(virt_to_page(addr)), + "changing linear mapping but cacheable aliases may still exist\n"); + dsb(ishst); + isb(); + __flush_dcache_area(addr, size); + return ret; +} + +static int clear_nc(void *addr, size_t size) +{ + int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, + size, pte_clear_nc, NULL); + dsb(ishst); + isb(); + __inval_dcache_area(addr, size); + return ret; +} + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG +void page_check_nc(struct page *page, int order) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + unsigned long addr = (unsigned long)page_address(page); + int i, j; + + pgdp = pgd_offset(&init_mm, addr); + pgd = READ_ONCE(*pgdp); + + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + + for (i = 0; i < (1 << order); i++) { + ptep = pte_offset_map(pmdp, addr); + pte = READ_ONCE(*ptep); + j = PTE_REFCOUNT(pte_val(pte)); + WARN(j, "Non-Cacheable page leaked! I'm fixing it up but it means you have a bug elsewhere"); + while (j--) + pte_clear_nc(ptep, addr, NULL); + addr += PAGE_SIZE; + } +} +#endif /* CONFIG_ARM64_ERRATUM_2454944_DEBUG */ + +static phys_addr_t __arm64_noalias_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, + unsigned long attrs, bool bounce) +{ + bounce = bounce || (phys | size) & ~PAGE_MASK; + if (bounce) { + phys = swiotlb_tbl_map_single(dev, phys, size, PAGE_ALIGN(size), + dir, attrs); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + } + if (set_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size))) + goto out_unmap; + + return phys; +out_unmap: + if (bounce) + swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); + return DMA_MAPPING_ERROR; +} + +static void __arm64_noalias_unmap(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + clear_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size)); + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, attrs); +} + +static void __arm64_noalias_sync_for_device(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir) +{ + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE); + else + arch_sync_dma_for_device(phys, size, dir); +} + +static void __arm64_noalias_sync_for_cpu(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir) +{ + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); + else + arch_sync_dma_for_cpu(phys, size, dir); +} + +static void *arm64_noalias_alloc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) +{ + struct page *page; + void *ret; + + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + + size = PAGE_ALIGN(size); + page = dma_direct_alloc_pages(dev, size, dma_addr, 0, gfp & ~__GFP_ZERO); + if (!page) + return NULL; + + ret = page_address(page); + if (set_nc(ret, size)) { + dma_direct_free_pages(dev, size, page, *dma_addr, 0); + return NULL; + } + return ret; +} + +static void arm64_noalias_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + size = PAGE_ALIGN(size); + clear_nc(cpu_addr, size); + dma_direct_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); +} + +static dma_addr_t arm64_noalias_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + bool bounce = !dma_capable(dev, phys_to_dma(dev, phys), size, true); + + if (!bounce && dir == DMA_TO_DEVICE) { + arch_sync_dma_for_device(phys, size, dir); + return phys_to_dma(dev, phys); + } + + bounce = bounce || page_mapped(page); + phys = __arm64_noalias_map(dev, phys, size, dir, attrs, bounce); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + return phys_to_dma(dev, phys); +} + +static void arm64_noalias_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + if (dir == DMA_TO_DEVICE) + return; + __arm64_noalias_unmap(dev, dma_to_phys(dev, dma_addr), size, dir, attrs); +} + +static void arm64_noalias_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + if (dir == DMA_TO_DEVICE) + return; + for_each_sg (sgl, sg, nents, i) + __arm64_noalias_unmap(dev, dma_to_phys(dev, sg->dma_address), + sg->length, dir, attrs); +} + +static int arm64_noalias_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = arm64_noalias_map_page(dev, sg_page(sg), sg->offset, + sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) + goto out_unmap; + sg->dma_length = sg->length; + } + + return nents; + +out_unmap: + arm64_noalias_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return 0; +} + +static void arm64_noalias_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + __arm64_noalias_sync_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void arm64_noalias_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + __arm64_noalias_sync_for_cpu(dev, dma_to_phys(dev, addr), size, dir); +} + +static void arm64_noalias_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arm64_noalias_sync_single_for_device(dev, sg->dma_address, sg->length, dir); +} + +static void arm64_noalias_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arm64_noalias_sync_single_for_cpu(dev, sg->dma_address, sg->length, dir); +} + +static const struct dma_map_ops arm64_noalias_ops = { + .alloc = arm64_noalias_alloc, + .free = arm64_noalias_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .map_page = arm64_noalias_map_page, + .unmap_page = arm64_noalias_unmap_page, + .map_sg = arm64_noalias_map_sg, + .unmap_sg = arm64_noalias_unmap_sg, + .sync_single_for_cpu = arm64_noalias_sync_single_for_cpu, + .sync_single_for_device = arm64_noalias_sync_single_for_device, + .sync_sg_for_cpu = arm64_noalias_sync_sg_for_cpu, + .sync_sg_for_device = arm64_noalias_sync_sg_for_device, + .dma_supported = dma_direct_supported, + .get_required_mask = dma_direct_get_required_mask, + .max_mapping_size = swiotlb_max_mapping_size, +}; + #ifdef CONFIG_IOMMU_DMA +static const struct dma_map_ops *iommu_dma_ops; + +static void *arm64_iommu_alloc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) +{ + struct page **pages; + void *ret; + int i; + + size = PAGE_ALIGN(size); + if (!gfpflags_allow_blocking(gfp) || (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { + ret = dma_common_alloc_pages(dev, size, dma_addr, 0, gfp); + return ret ? page_address(ret) : NULL; + } + + ret = iommu_dma_ops->alloc(dev, size, dma_addr, gfp, attrs); + if (ret) { + pages = dma_common_find_pages(ret); + for (i = 0; i < size / PAGE_SIZE; i++) + if (set_nc(page_address(pages[i]), PAGE_SIZE)) + goto err; + } + return ret; + +err: + while (i--) + clear_nc(page_address(pages[i]), PAGE_SIZE); + iommu_dma_ops->free(dev, size, ret, *dma_addr, attrs); + return NULL; +} + +static void arm64_iommu_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + struct page **pages = dma_common_find_pages(cpu_addr); + int i; + + size = PAGE_ALIGN(size); + if (!pages) + return dma_common_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); + + for (i = 0; i < size / PAGE_SIZE; i++) + clear_nc(page_address(pages[i]), PAGE_SIZE); + iommu_dma_ops->free(dev, size, cpu_addr, dma_addr, attrs); +} + +static dma_addr_t arm64_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t ret; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs); + + phys = __arm64_noalias_map(dev, phys, size, dir, attrs, page_mapped(page)); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + attrs |= DMA_ATTR_SKIP_CPU_SYNC; + ret = iommu_dma_ops->map_page(dev, phys_to_page(phys), offset_in_page(phys), + size, dir, attrs); + if (ret == DMA_MAPPING_ERROR) + __arm64_noalias_unmap(dev, phys, size, dir, attrs); + return ret; +} + +static void arm64_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs); + + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + __arm64_noalias_unmap(dev, phys, size, dir, attrs); +} + +static int arm64_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i, ret; + struct scatterlist *sg; + phys_addr_t *orig_phys; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs); + + orig_phys = kmalloc_array(nents, sizeof(*orig_phys), GFP_ATOMIC); + if (!orig_phys) + return 0; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = sg_phys(sg); + /* + * Note we do not have the page_mapped() check here, since + * bouncing plays complete havoc with dma-buf imports. Those + * may well be mapped in userspace, but we hope and pray that + * it's via dma_mmap_attrs() so any such mappings are safely + * non-cacheable. DO NOT allow a block device or other similar + * scatterlist user to get here (disable IOMMUs if necessary), + * since we can't mitigate for both conflicting use-cases. + */ + phys = __arm64_noalias_map(dev, phys, sg->length, dir, attrs, false); + if (phys == DMA_MAPPING_ERROR) + goto out_unmap; + + orig_phys[i] = sg_phys(sg); + sg_assign_page(sg, phys_to_page(phys)); + sg->offset = offset_in_page(phys); + } + ret = iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + if (ret <= 0) + goto out_unmap; + + for_each_sg(sgl, sg, nents, i) { + sg_assign_page(sg, phys_to_page(orig_phys[i])); + sg->offset = offset_in_page(orig_phys[i]); + } + + kfree(orig_phys); + return ret; + +out_unmap: + for_each_sg(sgl, sg, nents, i) { + __arm64_noalias_unmap(dev, sg_phys(sg), sg->length, dir, attrs); + sg_assign_page(sg, phys_to_page(orig_phys[i])); + sg->offset = offset_in_page(orig_phys[i]); + } + kfree(orig_phys); + return 0; +} + +static void arm64_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain; + struct scatterlist *sg, *tmp; + dma_addr_t iova; + int i; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs); + + domain = iommu_get_dma_domain(dev); + iova = sgl->dma_address; + tmp = sgl; + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_unmap(dev, phys, sg->length, dir, attrs); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } + iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); +} + +static void arm64_iommu_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + + __arm64_noalias_sync_for_device(dev, phys, size, dir); +} + +static void arm64_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + + __arm64_noalias_sync_for_cpu(dev, phys, size, dir); +} + +static void arm64_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct scatterlist *sg, *tmp = sgl; + dma_addr_t iova = sgl->dma_address; + int i; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_sync_for_device(dev, phys, sg->length, dir); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } +} + +static void arm64_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct scatterlist *sg, *tmp = sgl; + dma_addr_t iova = sgl->dma_address; + int i; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_sync_for_cpu(dev, phys, sg->length, dir); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } +} + +static struct dma_map_ops arm64_iommu_ops = { + .alloc = arm64_iommu_alloc, + .free = arm64_iommu_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .map_page = arm64_iommu_map_page, + .unmap_page = arm64_iommu_unmap_page, + .map_sg = arm64_iommu_map_sg, + .unmap_sg = arm64_iommu_unmap_sg, + .sync_single_for_cpu = arm64_iommu_sync_single_for_cpu, + .sync_single_for_device = arm64_iommu_sync_single_for_device, + .sync_sg_for_cpu = arm64_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm64_iommu_sync_sg_for_device, +}; + +static void arm64_init_iommu_ops(struct device *dev) +{ + const struct dma_map_ops *ops = dev->dma_ops; + + dev->dma_ops = &arm64_iommu_ops; + if (iommu_dma_ops) + return; + + iommu_dma_ops = ops; + arm64_iommu_ops.mmap = ops->mmap; + arm64_iommu_ops.get_sgtable = ops->get_sgtable; + arm64_iommu_ops.map_resource = ops->map_resource; + arm64_iommu_ops.unmap_resource = ops->unmap_resource; + arm64_iommu_ops.get_merge_boundary = ops->get_merge_boundary; +} +#endif /* CONFIG_IOMMU_DMA */ +#endif /* CONFIG_ARM64_ERRATUM_2454944*/ + +#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS void arch_teardown_dma_ops(struct device *dev) { dev->dma_ops = NULL; @@ -42,6 +634,14 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { int cls = cache_line_size_of_cpu(); +#ifdef CONFIG_ARM64_ERRATUM_2454944 + bool noalias = !coherent && cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS); + + if (noalias) { + dev_info(dev, "applying no-alias DMA workaround\n"); + dev->dma_ops = &arm64_noalias_ops; + } +#endif WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, @@ -55,6 +655,10 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, trace_android_vh_iommu_setup_dma_ops(dev, dma_base, size); trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size); } +#if defined(CONFIG_ARM64_ERRATUM_2454944) && defined(CONFIG_IOMMU_DMA) + if (noalias && dev->dma_ops != &arm64_noalias_ops) + arm64_init_iommu_ops(dev); +#endif #ifdef CONFIG_XEN if (xen_initial_domain()) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0cbb63cf955f..275f190f0c3d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -581,7 +581,8 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit) || + cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cce252206962..57383bcbb6cb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -502,7 +502,7 @@ static void __init map_mem(pgd_t *pgdp) u64 i; if (rodata_full || debug_pagealloc_enabled() || - IS_ENABLED(CONFIG_KFENCE)) + IS_ENABLED(CONFIG_KFENCE) || cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* From 015859081a80be2454238d215e2cf41c2d9ecc87 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 7 Sep 2022 09:07:14 -0700 Subject: [PATCH 089/186] UPSTREAM: efi: capsule-loader: Fix use-after-free in efi_capsule_write commit 9cb636b5f6a8cc6d1b50809ec8f8d33ae0c84c95 upstream. A race condition may occur if the user calls close() on another thread during a write() operation on the device node of the efi capsule. This is a race condition that occurs between the efi_capsule_write() and efi_capsule_flush() functions of efi_capsule_fops, which ultimately results in UAF. So, the page freeing process is modified to be done in efi_capsule_release() instead of efi_capsule_flush(). Bug: 246690517 Cc: # v4.9+ Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/all/20220907102920.GA88602@ubuntu/ Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: I6b11df91a87c027ebed4a7b239610a9b9e28cec0 --- drivers/firmware/efi/capsule-loader.c | 31 ++++++--------------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 4dde8edd53b6..3e8d4b51a814 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -242,29 +242,6 @@ failed: return ret; } -/** - * efi_capsule_flush - called by file close or file flush - * @file: file pointer - * @id: not used - * - * If a capsule is being partially uploaded then calling this function - * will be treated as upload termination and will free those completed - * buffer pages and -ECANCELED will be returned. - **/ -static int efi_capsule_flush(struct file *file, fl_owner_t id) -{ - int ret = 0; - struct capsule_info *cap_info = file->private_data; - - if (cap_info->index > 0) { - pr_err("capsule upload not complete\n"); - efi_free_all_buff_pages(cap_info); - ret = -ECANCELED; - } - - return ret; -} - /** * efi_capsule_release - called by file close * @inode: not used @@ -277,6 +254,13 @@ static int efi_capsule_release(struct inode *inode, struct file *file) { struct capsule_info *cap_info = file->private_data; + if (cap_info->index > 0 && + (cap_info->header.headersize == 0 || + cap_info->count < cap_info->total_size)) { + pr_err("capsule upload not complete\n"); + efi_free_all_buff_pages(cap_info); + } + kfree(cap_info->pages); kfree(cap_info->phys); kfree(file->private_data); @@ -324,7 +308,6 @@ static const struct file_operations efi_capsule_fops = { .owner = THIS_MODULE, .open = efi_capsule_open, .write = efi_capsule_write, - .flush = efi_capsule_flush, .release = efi_capsule_release, .llseek = no_llseek, }; From 450a37133d502be2877308e836941db037a25fad Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Nov 2022 12:21:05 +0000 Subject: [PATCH 090/186] Revert "FROMGIT: arm64: Work around Cortex-A510 erratum 2454944" Revert submission 2302443 Reason for revert: Series is not queued in a maintainer tree and has not been posted to a public mailing list. Reverted Changes: Iffd38bf97:FROMGIT: arm64: Work around Cortex-A510 erratum 24... I694523564:FROMGIT: mm/vmalloc: Add override for lazy vunmap Change-Id: I254d427b9dad0791ca8df4dc51be92e458c58728 Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 - arch/arm64/Kconfig | 30 -- arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/include/asm/page.h | 13 - arch/arm64/include/asm/vmalloc.h | 4 - arch/arm64/kernel/cpu_errata.c | 8 - arch/arm64/kernel/head.S | 23 +- arch/arm64/mm/dma-mapping.c | 604 ------------------------- arch/arm64/mm/init.c | 3 +- arch/arm64/mm/mmu.c | 2 +- 10 files changed, 6 insertions(+), 686 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index cb04b3baaa33..4ff8a9379ba9 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -94,8 +94,6 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | +----------------+-----------------+-----------------+-----------------------------+ -| ARM | Cortex-A510 | #2454944 | ARM64_ERRATUM_2454944 | -+----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 90a8f9925ce8..1196fe7686a7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -713,36 +713,6 @@ config ARM64_ERRATUM_2067961 If unsure, say Y. -config ARM64_ERRATUM_2454944 - bool "Cortex-A510: 2454944: Unmodified cache line might be written back to memory" - select ARCH_HAS_TEARDOWN_DMA_OPS - default y - help - This option adds the workaround for ARM Cortex-A510 erratum 2454944. - - Affected Cortex-A510 core might write unmodified cache lines back to - memory, which breaks the assumptions upon which software coherency - management for non-coherent DMA relies. If a cache line is - speculatively fetched while a non-coherent device is writing directly - to DRAM, and subsequently written back by natural eviction, data - written by the device in the intervening period can be lost. - - The workaround is to enforce as far as reasonably possible that all - non-coherent DMA transfers are bounced and/or remapped to minimise - the chance that any Cacheable alias exists through which speculative - cache fills could occur. - - This is quite involved and has unavoidable performance impact on - affected systems. - -config ARM64_ERRATUM_2454944_DEBUG - bool "Extra debug checks for Cortex-A510 2454944" - depends on ARM64_ERRATUM_2454944 - default y - help - Enable additional checks and warnings to detect and mitigate driver - bugs breaking the remapping workaround. - config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index cbe87b1bc4a8..ed39d674de5f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -72,8 +72,7 @@ #define ARM64_WORKAROUND_TSB_FLUSH_FAILURE 61 #define ARM64_SPECTRE_BHB 62 -/* kabi: reserve 63 - 74 for future cpu capabilities */ -#define ARM64_WORKAROUND_NO_DMA_ALIAS 75 +/* kabi: reserve 63 - 76 for future cpu capabilities */ #define ARM64_NCAPS 76 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index ced097af888d..ed1b9dcf12b2 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -43,19 +43,6 @@ typedef struct page *pgtable_t; extern int pfn_valid(unsigned long); -#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG -#include - -void page_check_nc(struct page *page, int order); - -static inline void arch_free_page(struct page *page, int order) -{ - if (cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) - page_check_nc(page, order); -} -#define HAVE_ARCH_FREE_PAGE -#endif - #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index b5a0614e48c0..2ca708ab9b20 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -1,8 +1,4 @@ #ifndef _ASM_ARM64_VMALLOC_H #define _ASM_ARM64_VMALLOC_H -#include - -#define arch_disable_lazy_vunmap cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS) - #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d4848a3f88c4..57ece7965bd6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -555,14 +555,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), }, -#endif -#ifdef CONFIG_ARM64_ERRATUM_2454944 - { - .desc = "ARM erratum 2454944", - .capability = ARM64_WORKAROUND_NO_DMA_ALIAS, - ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), - MIDR_FIXED(MIDR_CPU_VAR_REV(1, 1), BIT(25)), - }, #endif { } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 1431d840d119..047eac4046e0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -125,13 +125,9 @@ SYM_CODE_END(primary_entry) SYM_CODE_START_LOCAL(preserve_boot_args) mov x21, x0 // x21=FDT - adr_l x0, boot_args -#ifdef CONFIG_ARM64_ERRATUM_2454944 - dc ivac, x0 // Cortex-A510 CWG is 64 bytes, so plenty - dsb sy -#endif - stp x21, x1, [x0] // record the contents of - stp x2, x3, [x0, #16] // x0 .. x3 at kernel entry + adr_l x0, boot_args // record the contents of + stp x21, x1, [x0] // x0 .. x3 at kernel entry + stp x2, x3, [x0, #16] dmb sy // needed before dc ivac with // MMU off @@ -286,17 +282,8 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * the kernel image, and thus are clean to the PoC per the boot * protocol. */ -#ifndef CONFIG_ARM64_ERRATUM_2454944 adrp x0, init_pg_dir adrp x1, init_pg_end -#else - /* - * However if we can't even trust "clean" cache lines shadowing rodata, - * then nuke the entire image. It's the only way to be sure. - */ - adrp x0, _text - adrp x1, _end -#endif sub x1, x1, x0 bl __inval_dcache_area @@ -544,10 +531,6 @@ SYM_FUNC_END(init_kernel_el) */ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) adr_l x1, __boot_cpu_mode -#ifdef CONFIG_ARM64_ERRATUM_2454944 - dc ivac, x1 - dsb sy -#endif cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index bd46ad81d061..e75e5e75b192 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -31,599 +31,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } -#ifdef CONFIG_ARM64_ERRATUM_2454944 -#include -#include -#include - -#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG -#include - -static atomic_t pages_remapped; - -static int __init remap_debugfs_register(void) -{ - debugfs_create_atomic_t("pages_remapped_nc", 0444, NULL, &pages_remapped); - return 0; -} -arch_initcall(remap_debugfs_register); -#endif - -/* - * Nobody should be using these software bits on linear map addresses, right? - * This is categorically the worst, but oh well, needs must... - */ -#define REFCOUNT_INC (1UL << 55) -#define PTE_REFCOUNT(pte) (((pte) >> 55) & 0xf) - -static int pte_set_nc(pte_t *ptep, unsigned long addr, void *data) -{ - pteval_t old_pte, new_pte, pte; - unsigned int refcount; - - pte = pte_val(READ_ONCE(*ptep)); - do { - /* Avoid racing against the transient invalid state */ - old_pte = pte | PTE_VALID; - new_pte = old_pte + REFCOUNT_INC; - refcount = PTE_REFCOUNT(pte); - if (WARN_ON(refcount == 15)) - return -EINVAL; - if (refcount == 0) { - new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); - new_pte |= PTE_ATTRINDX(MT_NORMAL_NC); - } - pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); - } while (pte != old_pte); - - *(unsigned int *)data = refcount; - if (refcount) - return 0; - -#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG - atomic_inc(&pages_remapped); -#endif - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); - return 0; -} - -static int pte_clear_nc(pte_t *ptep, unsigned long addr, void *data) -{ - pteval_t old_pte, new_pte, pte; - unsigned int refcount; - - pte = pte_val(READ_ONCE(*ptep)); - do { - old_pte = pte | PTE_VALID; - new_pte = old_pte - REFCOUNT_INC; - refcount = PTE_REFCOUNT(pte); - if (WARN_ON(refcount == 0)) - return -EINVAL; - if (refcount == 1) { - new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); - new_pte |= PTE_ATTRINDX(MT_NORMAL_TAGGED); - } - pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); - } while (pte != old_pte); - - if (refcount > 1) - return 0; - -#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG - atomic_dec(&pages_remapped); -#endif - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); - return 0; -} - -static int set_nc(void *addr, size_t size) -{ - unsigned int count; - int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, - size, pte_set_nc, &count); - WARN_RATELIMIT(IS_ENABLED(CONFIG_ARM64_ERRATUM_2454944_DEBUG) && - count == 0 && page_mapped(virt_to_page(addr)), - "changing linear mapping but cacheable aliases may still exist\n"); - dsb(ishst); - isb(); - __flush_dcache_area(addr, size); - return ret; -} - -static int clear_nc(void *addr, size_t size) -{ - int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, - size, pte_clear_nc, NULL); - dsb(ishst); - isb(); - __inval_dcache_area(addr, size); - return ret; -} - -#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG -void page_check_nc(struct page *page, int order) -{ - pgd_t *pgdp, pgd; - p4d_t *p4dp, p4d; - pud_t *pudp, pud; - pmd_t *pmdp, pmd; - pte_t *ptep, pte; - unsigned long addr = (unsigned long)page_address(page); - int i, j; - - pgdp = pgd_offset(&init_mm, addr); - pgd = READ_ONCE(*pgdp); - - p4dp = p4d_offset(pgdp, addr); - p4d = READ_ONCE(*p4dp); - - pudp = pud_offset(p4dp, addr); - pud = READ_ONCE(*pudp); - - pmdp = pmd_offset(pudp, addr); - pmd = READ_ONCE(*pmdp); - - for (i = 0; i < (1 << order); i++) { - ptep = pte_offset_map(pmdp, addr); - pte = READ_ONCE(*ptep); - j = PTE_REFCOUNT(pte_val(pte)); - WARN(j, "Non-Cacheable page leaked! I'm fixing it up but it means you have a bug elsewhere"); - while (j--) - pte_clear_nc(ptep, addr, NULL); - addr += PAGE_SIZE; - } -} -#endif /* CONFIG_ARM64_ERRATUM_2454944_DEBUG */ - -static phys_addr_t __arm64_noalias_map(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, - unsigned long attrs, bool bounce) -{ - bounce = bounce || (phys | size) & ~PAGE_MASK; - if (bounce) { - phys = swiotlb_tbl_map_single(dev, phys, size, PAGE_ALIGN(size), - dir, attrs); - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - } - if (set_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size))) - goto out_unmap; - - return phys; -out_unmap: - if (bounce) - swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); - return DMA_MAPPING_ERROR; -} - -static void __arm64_noalias_unmap(struct device *dev, phys_addr_t phys, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - clear_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size)); - if (is_swiotlb_buffer(phys)) - swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, attrs); -} - -static void __arm64_noalias_sync_for_device(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir) -{ - if (is_swiotlb_buffer(phys)) - swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE); - else - arch_sync_dma_for_device(phys, size, dir); -} - -static void __arm64_noalias_sync_for_cpu(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir) -{ - if (is_swiotlb_buffer(phys)) - swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); - else - arch_sync_dma_for_cpu(phys, size, dir); -} - -static void *arm64_noalias_alloc(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) -{ - struct page *page; - void *ret; - - if (attrs & DMA_ATTR_NO_WARN) - gfp |= __GFP_NOWARN; - - size = PAGE_ALIGN(size); - page = dma_direct_alloc_pages(dev, size, dma_addr, 0, gfp & ~__GFP_ZERO); - if (!page) - return NULL; - - ret = page_address(page); - if (set_nc(ret, size)) { - dma_direct_free_pages(dev, size, page, *dma_addr, 0); - return NULL; - } - return ret; -} - -static void arm64_noalias_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - size = PAGE_ALIGN(size); - clear_nc(cpu_addr, size); - dma_direct_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); -} - -static dma_addr_t arm64_noalias_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - bool bounce = !dma_capable(dev, phys_to_dma(dev, phys), size, true); - - if (!bounce && dir == DMA_TO_DEVICE) { - arch_sync_dma_for_device(phys, size, dir); - return phys_to_dma(dev, phys); - } - - bounce = bounce || page_mapped(page); - phys = __arm64_noalias_map(dev, phys, size, dir, attrs, bounce); - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - return phys_to_dma(dev, phys); -} - -static void arm64_noalias_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if (dir == DMA_TO_DEVICE) - return; - __arm64_noalias_unmap(dev, dma_to_phys(dev, dma_addr), size, dir, attrs); -} - -static void arm64_noalias_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - if (dir == DMA_TO_DEVICE) - return; - for_each_sg (sgl, sg, nents, i) - __arm64_noalias_unmap(dev, dma_to_phys(dev, sg->dma_address), - sg->length, dir, attrs); -} - -static int arm64_noalias_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - sg->dma_address = arm64_noalias_map_page(dev, sg_page(sg), sg->offset, - sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) - goto out_unmap; - sg->dma_length = sg->length; - } - - return nents; - -out_unmap: - arm64_noalias_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return 0; -} - -static void arm64_noalias_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - __arm64_noalias_sync_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void arm64_noalias_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - __arm64_noalias_sync_for_cpu(dev, dma_to_phys(dev, addr), size, dir); -} - -static void arm64_noalias_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arm64_noalias_sync_single_for_device(dev, sg->dma_address, sg->length, dir); -} - -static void arm64_noalias_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arm64_noalias_sync_single_for_cpu(dev, sg->dma_address, sg->length, dir); -} - -static const struct dma_map_ops arm64_noalias_ops = { - .alloc = arm64_noalias_alloc, - .free = arm64_noalias_free, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - .mmap = dma_common_mmap, - .get_sgtable = dma_common_get_sgtable, - .map_page = arm64_noalias_map_page, - .unmap_page = arm64_noalias_unmap_page, - .map_sg = arm64_noalias_map_sg, - .unmap_sg = arm64_noalias_unmap_sg, - .sync_single_for_cpu = arm64_noalias_sync_single_for_cpu, - .sync_single_for_device = arm64_noalias_sync_single_for_device, - .sync_sg_for_cpu = arm64_noalias_sync_sg_for_cpu, - .sync_sg_for_device = arm64_noalias_sync_sg_for_device, - .dma_supported = dma_direct_supported, - .get_required_mask = dma_direct_get_required_mask, - .max_mapping_size = swiotlb_max_mapping_size, -}; - #ifdef CONFIG_IOMMU_DMA -static const struct dma_map_ops *iommu_dma_ops; - -static void *arm64_iommu_alloc(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) -{ - struct page **pages; - void *ret; - int i; - - size = PAGE_ALIGN(size); - if (!gfpflags_allow_blocking(gfp) || (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { - ret = dma_common_alloc_pages(dev, size, dma_addr, 0, gfp); - return ret ? page_address(ret) : NULL; - } - - ret = iommu_dma_ops->alloc(dev, size, dma_addr, gfp, attrs); - if (ret) { - pages = dma_common_find_pages(ret); - for (i = 0; i < size / PAGE_SIZE; i++) - if (set_nc(page_address(pages[i]), PAGE_SIZE)) - goto err; - } - return ret; - -err: - while (i--) - clear_nc(page_address(pages[i]), PAGE_SIZE); - iommu_dma_ops->free(dev, size, ret, *dma_addr, attrs); - return NULL; -} - -static void arm64_iommu_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - struct page **pages = dma_common_find_pages(cpu_addr); - int i; - - size = PAGE_ALIGN(size); - if (!pages) - return dma_common_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); - - for (i = 0; i < size / PAGE_SIZE; i++) - clear_nc(page_address(pages[i]), PAGE_SIZE); - iommu_dma_ops->free(dev, size, cpu_addr, dma_addr, attrs); -} - -static dma_addr_t arm64_iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t ret; - - if (dir == DMA_TO_DEVICE) - return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs); - - phys = __arm64_noalias_map(dev, phys, size, dir, attrs, page_mapped(page)); - if (phys == DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; - - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - ret = iommu_dma_ops->map_page(dev, phys_to_page(phys), offset_in_page(phys), - size, dir, attrs); - if (ret == DMA_MAPPING_ERROR) - __arm64_noalias_unmap(dev, phys, size, dir, attrs); - return ret; -} - -static void arm64_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys; - - if (dir == DMA_TO_DEVICE) - return iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs); - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); - iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - __arm64_noalias_unmap(dev, phys, size, dir, attrs); -} - -static int arm64_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - int i, ret; - struct scatterlist *sg; - phys_addr_t *orig_phys; - - if (dir == DMA_TO_DEVICE) - return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs); - - orig_phys = kmalloc_array(nents, sizeof(*orig_phys), GFP_ATOMIC); - if (!orig_phys) - return 0; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t phys = sg_phys(sg); - /* - * Note we do not have the page_mapped() check here, since - * bouncing plays complete havoc with dma-buf imports. Those - * may well be mapped in userspace, but we hope and pray that - * it's via dma_mmap_attrs() so any such mappings are safely - * non-cacheable. DO NOT allow a block device or other similar - * scatterlist user to get here (disable IOMMUs if necessary), - * since we can't mitigate for both conflicting use-cases. - */ - phys = __arm64_noalias_map(dev, phys, sg->length, dir, attrs, false); - if (phys == DMA_MAPPING_ERROR) - goto out_unmap; - - orig_phys[i] = sg_phys(sg); - sg_assign_page(sg, phys_to_page(phys)); - sg->offset = offset_in_page(phys); - } - ret = iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - if (ret <= 0) - goto out_unmap; - - for_each_sg(sgl, sg, nents, i) { - sg_assign_page(sg, phys_to_page(orig_phys[i])); - sg->offset = offset_in_page(orig_phys[i]); - } - - kfree(orig_phys); - return ret; - -out_unmap: - for_each_sg(sgl, sg, nents, i) { - __arm64_noalias_unmap(dev, sg_phys(sg), sg->length, dir, attrs); - sg_assign_page(sg, phys_to_page(orig_phys[i])); - sg->offset = offset_in_page(orig_phys[i]); - } - kfree(orig_phys); - return 0; -} - -static void arm64_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - struct iommu_domain *domain; - struct scatterlist *sg, *tmp; - dma_addr_t iova; - int i; - - if (dir == DMA_TO_DEVICE) - return iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs); - - domain = iommu_get_dma_domain(dev); - iova = sgl->dma_address; - tmp = sgl; - for_each_sg(sgl, sg, nents, i) { - phys_addr_t phys = iommu_iova_to_phys(domain, iova); - - __arm64_noalias_unmap(dev, phys, sg->length, dir, attrs); - iova += sg->length; - if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { - tmp = sg_next(tmp); - iova = tmp->dma_address; - } - } - iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); -} - -static void arm64_iommu_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); - - __arm64_noalias_sync_for_device(dev, phys, size, dir); -} - -static void arm64_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); - - __arm64_noalias_sync_for_cpu(dev, phys, size, dir); -} - -static void arm64_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct scatterlist *sg, *tmp = sgl; - dma_addr_t iova = sgl->dma_address; - int i; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t phys = iommu_iova_to_phys(domain, iova); - - __arm64_noalias_sync_for_device(dev, phys, sg->length, dir); - iova += sg->length; - if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { - tmp = sg_next(tmp); - iova = tmp->dma_address; - } - } -} - -static void arm64_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct iommu_domain *domain = iommu_get_dma_domain(dev); - struct scatterlist *sg, *tmp = sgl; - dma_addr_t iova = sgl->dma_address; - int i; - - for_each_sg(sgl, sg, nents, i) { - phys_addr_t phys = iommu_iova_to_phys(domain, iova); - - __arm64_noalias_sync_for_cpu(dev, phys, sg->length, dir); - iova += sg->length; - if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { - tmp = sg_next(tmp); - iova = tmp->dma_address; - } - } -} - -static struct dma_map_ops arm64_iommu_ops = { - .alloc = arm64_iommu_alloc, - .free = arm64_iommu_free, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - .map_page = arm64_iommu_map_page, - .unmap_page = arm64_iommu_unmap_page, - .map_sg = arm64_iommu_map_sg, - .unmap_sg = arm64_iommu_unmap_sg, - .sync_single_for_cpu = arm64_iommu_sync_single_for_cpu, - .sync_single_for_device = arm64_iommu_sync_single_for_device, - .sync_sg_for_cpu = arm64_iommu_sync_sg_for_cpu, - .sync_sg_for_device = arm64_iommu_sync_sg_for_device, -}; - -static void arm64_init_iommu_ops(struct device *dev) -{ - const struct dma_map_ops *ops = dev->dma_ops; - - dev->dma_ops = &arm64_iommu_ops; - if (iommu_dma_ops) - return; - - iommu_dma_ops = ops; - arm64_iommu_ops.mmap = ops->mmap; - arm64_iommu_ops.get_sgtable = ops->get_sgtable; - arm64_iommu_ops.map_resource = ops->map_resource; - arm64_iommu_ops.unmap_resource = ops->unmap_resource; - arm64_iommu_ops.get_merge_boundary = ops->get_merge_boundary; -} -#endif /* CONFIG_IOMMU_DMA */ -#endif /* CONFIG_ARM64_ERRATUM_2454944*/ - -#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS void arch_teardown_dma_ops(struct device *dev) { dev->dma_ops = NULL; @@ -634,14 +42,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { int cls = cache_line_size_of_cpu(); -#ifdef CONFIG_ARM64_ERRATUM_2454944 - bool noalias = !coherent && cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS); - - if (noalias) { - dev_info(dev, "applying no-alias DMA workaround\n"); - dev->dma_ops = &arm64_noalias_ops; - } -#endif WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, @@ -655,10 +55,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, trace_android_vh_iommu_setup_dma_ops(dev, dma_base, size); trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size); } -#if defined(CONFIG_ARM64_ERRATUM_2454944) && defined(CONFIG_IOMMU_DMA) - if (noalias && dev->dma_ops != &arm64_noalias_ops) - arm64_init_iommu_ops(dev); -#endif #ifdef CONFIG_XEN if (xen_initial_domain()) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 275f190f0c3d..0cbb63cf955f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -581,8 +581,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit) || - cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 57383bcbb6cb..cce252206962 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -502,7 +502,7 @@ static void __init map_mem(pgd_t *pgdp) u64 i; if (rodata_full || debug_pagealloc_enabled() || - IS_ENABLED(CONFIG_KFENCE) || cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) + IS_ENABLED(CONFIG_KFENCE)) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* From dcdb7eadfde5639d57508570d72b873da60e5bb1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Nov 2022 12:21:05 +0000 Subject: [PATCH 091/186] Revert "FROMGIT: mm/vmalloc: Add override for lazy vunmap" Revert submission 2302443 Reason for revert: Series is not queued in a maintainer tree and has not been posted to a public mailing list. Reverted Changes: Iffd38bf97:FROMGIT: arm64: Work around Cortex-A510 erratum 24... I694523564:FROMGIT: mm/vmalloc: Add override for lazy vunmap Change-Id: I345e32bac76292413908b4a81295a228003fa4c0 Signed-off-by: Will Deacon --- include/linux/vmalloc.h | 4 ---- mm/vmalloc.c | 3 --- 2 files changed, 7 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 12c55bbec8c0..0549ca17ba6f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -245,8 +245,4 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); -#ifndef arch_disable_lazy_vunmap -#define arch_disable_lazy_vunmap false -#endif - #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 50d49ca35885..817a472ee30f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1297,9 +1297,6 @@ static unsigned long lazy_max_pages(void) { unsigned int log; - if (arch_disable_lazy_vunmap) - return 0; - log = fls(num_online_cpus()); return log * (32UL * 1024 * 1024 / PAGE_SIZE); From c7eb32a49fdb0079349e1218187c4a280c837eec Mon Sep 17 00:00:00 2001 From: Luffy Ou Date: Mon, 21 Nov 2022 18:00:23 +0800 Subject: [PATCH 092/186] ANDROID: GKI: Update symbols to symbol list Leaf changes summary: 8 artifacts changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 8 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 8 Added functions: [A] 'function int __bio_crypt_clone(bio*, bio*, gfp_t)' [A] 'function void address_space_init_once(address_space*)' [A] 'function void bio_crypt_set_ctx(bio*, const blk_crypto_key*, const u64*, gfp_t)' [A] 'function bool blk_mq_queue_inflight(request_queue*)' [A] 'function bool prepare_to_wait_exclusive(wait_queue_head*, wait_queue_entry*, int)' [A] 'function void* radix_tree_delete_item(xarray*, unsigned long int, void*)' [A] 'function void** radix_tree_lookup_slot(const xarray*, unsigned long int)' [A] 'function void radix_tree_replace_slot(xarray*, void**, void*)' Bug: 257892683 Signed-off-by: Luffy Ou Change-Id: I2cc1d97c7b086604b27163cb306e51bad20f5b53 --- android/abi_gki_aarch64.xml | 1219 +++++++++++++++++---------------- android/abi_gki_aarch64_oplus | 8 + 2 files changed, 644 insertions(+), 583 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 1b91e08e38ce..51b59f6d7cb4 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -18,6 +18,7 @@ + @@ -814,6 +815,7 @@ + @@ -898,6 +900,7 @@ + @@ -953,6 +956,7 @@ + @@ -4276,6 +4280,7 @@ + @@ -4339,14 +4344,17 @@ + + + @@ -22536,222 +22544,222 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -26153,7 +26161,7 @@ - + @@ -26459,12 +26467,12 @@ - + - + - + @@ -48528,24 +48536,24 @@ - + - + - + - + - + - + - + @@ -49036,18 +49044,18 @@ - + - + - + - + - + @@ -75803,18 +75811,18 @@ - + - + - + - + - + @@ -91113,6 +91121,7 @@ + @@ -115281,36 +115290,36 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -115560,9 +115569,9 @@ - + - + @@ -115576,9 +115585,9 @@ - - - + + + @@ -115620,6 +115629,12 @@ + + + + + + @@ -116912,9 +116927,9 @@ - - - + + + @@ -117131,12 +117146,12 @@ - - + + - - + + @@ -117161,10 +117176,10 @@ - - - - + + + + @@ -120828,12 +120843,12 @@ - - - - - - + + + + + + @@ -120918,6 +120933,10 @@ + + + + @@ -121049,12 +121068,12 @@ - - - - - - + + + + + + @@ -121378,6 +121397,13 @@ + + + + + + + @@ -121598,8 +121624,8 @@ - - + + @@ -121629,8 +121655,8 @@ - - + + @@ -121646,15 +121672,15 @@ - - + + - - - - - + + + + + @@ -121667,6 +121693,10 @@ + + + + @@ -121676,8 +121706,8 @@ - - + + @@ -121685,9 +121715,9 @@ - - - + + + @@ -121714,13 +121744,13 @@ - - - + + + - - + + @@ -121750,9 +121780,9 @@ - - - + + + @@ -121765,10 +121795,10 @@ - - - - + + + + @@ -122815,9 +122845,9 @@ - - - + + + @@ -122854,8 +122884,8 @@ - - + + @@ -122887,14 +122917,14 @@ - - - + + + - - - + + + @@ -122905,9 +122935,9 @@ - - - + + + @@ -123019,8 +123049,8 @@ - - + + @@ -123055,26 +123085,26 @@ - - - - - + + + + + - - - + + + - - + + - - - - + + + + @@ -124561,23 +124591,23 @@ - - - + + + - - + + - - - + + + - - - + + + @@ -125340,9 +125370,9 @@ - - - + + + @@ -125357,9 +125387,9 @@ - - - + + + @@ -125751,10 +125781,10 @@ - - - - + + + + @@ -129567,10 +129597,10 @@ - - - - + + + + @@ -130169,10 +130199,10 @@ - - - - + + + + @@ -130475,32 +130505,32 @@ - - - + + + - - - - + + + + - - - - + + + + - - - + + + - - - - + + + + @@ -136177,53 +136207,53 @@ - - - - + + + + - - - - + + + + - - + + - - - + + + - - - + + + - - + + - - + + - - - + + + - - - + + + - - - + + + @@ -136231,14 +136261,14 @@ - - - + + + - - - + + + @@ -136425,28 +136455,28 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - + + @@ -138699,33 +138729,33 @@ - - - + + + - - - + + + - - - - + + + + - - + + - - + + - - - + + + @@ -138963,6 +138993,12 @@ + + + + + + @@ -139299,6 +139335,12 @@ + + + + + + @@ -139328,6 +139370,11 @@ + + + + + @@ -139342,6 +139389,12 @@ + + + + + + @@ -141015,10 +141068,10 @@ - - - - + + + + @@ -142096,16 +142149,16 @@ - - - - + + + + - - - - + + + + @@ -142161,18 +142214,18 @@ - - - - - + + + + + - - - - - + + + + + @@ -142184,8 +142237,8 @@ - - + + @@ -142195,9 +142248,9 @@ - - - + + + @@ -142205,11 +142258,11 @@ - - - - - + + + + + @@ -142226,11 +142279,11 @@ - - - - - + + + + + @@ -142238,12 +142291,12 @@ - - + + - - + + @@ -142252,9 +142305,9 @@ - - - + + + @@ -142268,49 +142321,49 @@ - - - - + + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - + + - - - + + + - - - + + + @@ -142325,33 +142378,33 @@ - - - - - + + + + + - - - - - + + + + + - - - + + + - - - + + + - - - + + + @@ -143895,9 +143948,9 @@ - - - + + + @@ -143942,9 +143995,9 @@ - - - + + + @@ -144555,29 +144608,29 @@ - - + + - - - + + + - - + + - - + + - - + + @@ -144590,36 +144643,36 @@ - - - + + + - - + + - - + + - - + + - - + + - - - - - + + + + + - - + + @@ -145637,26 +145690,26 @@ - - - + + + - - - + + + - - + + - - + + - - + + @@ -145666,13 +145719,13 @@ - - - + + + - - + + @@ -145681,28 +145734,28 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - + + @@ -145710,19 +145763,19 @@ - - - - - + + + + + - - - - - - + + + + + + @@ -145732,9 +145785,9 @@ - - - + + + @@ -145747,12 +145800,12 @@ - - + + - - + + @@ -145760,18 +145813,18 @@ - - + + - - + + - - - - + + + + @@ -145803,91 +145856,91 @@ - - - - - - - + + + + + + + - - - - - - - + + + + + + + - - - - - - - - + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - - + + + + + + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - + + + + @@ -148427,11 +148480,11 @@ - - - - - + + + + + @@ -148464,16 +148517,16 @@ - - - - + + + + - - - - + + + + @@ -148810,15 +148863,15 @@ - - - - + + + + - - - + + + @@ -148842,19 +148895,19 @@ - - - + + + - - - - + + + + @@ -148867,13 +148920,13 @@ - - - + + + - - + + @@ -148891,9 +148944,9 @@ - - - + + + @@ -148916,9 +148969,9 @@ - - - + + + @@ -148968,8 +149021,8 @@ - - + + diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index 4479f29e3b31..ac8c1165135b 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -5,6 +5,7 @@ add_device_randomness add_memory add_memory_subsection + address_space_init_once add_swap_extent add_taint add_timer @@ -64,6 +65,8 @@ bio_add_pc_page bio_alloc_bioset bio_associate_blkg_from_css + __bio_crypt_clone + bio_crypt_set_ctx bio_endio bio_put bio_reset @@ -89,6 +92,7 @@ blk_execute_rq_nowait blk_get_request blk_mq_free_request + blk_mq_queue_inflight blk_mq_rq_cpu blk_mq_sched_mark_restart_hctx blk_mq_start_request @@ -2006,6 +2010,7 @@ preempt_schedule_notrace prepare_to_wait prepare_to_wait_event + prepare_to_wait_exclusive print_hex_dump printk printk_deferred @@ -2047,11 +2052,14 @@ qcom_smem_state_update_bits queue_delayed_work_on queue_work_on + radix_tree_delete_item radix_tree_gang_lookup radix_tree_insert radix_tree_iter_delete radix_tree_lookup + radix_tree_lookup_slot radix_tree_next_chunk + radix_tree_replace_slot radix_tree_preload ___ratelimit rational_best_approximation From d84fac979544e22591afc13ed40c9355c8b8a6a2 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Wed, 9 Nov 2022 10:48:36 +0530 Subject: [PATCH 093/186] FROMGIT: mm/madvise: fix madvise_pageout for private file mappings When MADV_PAGEOUT is called on a private file mapping VMA region, we bail out early if the process is neither owner nor write capable of the file. However, this VMA may have both private/shared clean pages and private dirty pages. The opportunity of paging out the private dirty pages (Anon pages) is missed. Fix this by caching the file access check and use it later along with PageAnon() during page walk. We observe ~10% improvement in zram usage, thus leaving more available memory on a 4GB RAM system running Android. Link: https://lkml.kernel.org/r/1667971116-12900-1-git-send-email-quic_pkondeti@quicinc.com Signed-off-by: Pavankumar Kondeti Cc: Charan Teja Kalla Cc: Minchan Kim Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton (cherry picked from commit 8fc5be8efc3cf356f25098fbd4bda7c0e949c2ea git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-unstable) Bug: 259329159 Signed-off-by: Pavankumar Kondeti Change-Id: I5f2d425aec94e5a75ebeaf90f9f5d7adf1975c59 --- mm/madvise.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 2758648a60f6..3e431f71fb27 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -38,6 +38,7 @@ struct madvise_walk_private { struct mmu_gather *tlb; bool pageout; + bool can_pageout_file; }; /* @@ -313,6 +314,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, struct madvise_walk_private *private = walk->private; struct mmu_gather *tlb = private->tlb; bool pageout = private->pageout; + bool pageout_anon_only = pageout && !private->can_pageout_file; struct mm_struct *mm = tlb->mm; struct vm_area_struct *vma = walk->vma; pte_t *orig_pte, *pte, ptent; @@ -351,6 +353,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (page_mapcount(page) != 1) goto huge_unlock; + if (pageout_anon_only && !PageAnon(page)) + goto huge_unlock; + if (next - addr != HPAGE_PMD_SIZE) { int err; @@ -419,6 +424,8 @@ regular_page: if (PageTransCompound(page)) { if (page_mapcount(page) != 1) break; + if (pageout_anon_only && !PageAnon(page)) + break; get_page(page); if (!trylock_page(page)) { put_page(page); @@ -443,6 +450,9 @@ regular_page: if (!allow_shared && page_mapcount(page) != 1) continue; + if (pageout_anon_only && !PageAnon(page)) + continue; + VM_BUG_ON_PAGE(PageTransCompound(page), page); if (pte_young(ptent)) { @@ -524,11 +534,13 @@ static long madvise_cold(struct vm_area_struct *vma, static void madvise_pageout_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long addr, unsigned long end) + unsigned long addr, unsigned long end, + bool can_pageout_file) { struct madvise_walk_private walk_private = { .pageout = true, .tlb = tlb, + .can_pageout_file = can_pageout_file, }; vm_write_begin(vma); @@ -538,10 +550,8 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb, vm_write_end(vma); } -static inline bool can_do_pageout(struct vm_area_struct *vma) +static inline bool can_do_file_pageout(struct vm_area_struct *vma) { - if (vma_is_anonymous(vma)) - return true; if (!vma->vm_file) return false; /* @@ -560,17 +570,23 @@ static long madvise_pageout(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; + bool can_pageout_file; *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; - if (!can_do_pageout(vma)) - return 0; + /* + * If the VMA belongs to a private file mapping, there can be private + * dirty pages which can be paged out if even this process is neither + * owner nor write capable of the file. Cache the file access check + * here and use it later during page walk. + */ + can_pageout_file = can_do_file_pageout(vma); lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); + madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file); tlb_finish_mmu(&tlb, start_addr, end_addr); return 0; From 1169f70f8f15ea4378ecadb9baba8791824c8b2a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:05:48 -0800 Subject: [PATCH 094/186] ANDROID: mm: skip pte_alloc during speculative page fault Speculative page fault checks pmd to be valid before starting to handle the page fault and pte_alloc() should do nothing if pmd stays valid. If pmd gets changed during speculative page fault, we will detect the change later and retry with mmap_lock. Therefore pte_alloc() can be safely skipped and this prevents the racy pmd_lock() call which can access pmd->ptl after pmd was cleared. Bug: 257443051 Change-Id: Iec57df5530dba6e0e0bdf9f7500f910851c3d3fd Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 1b768d29f9d2..639b9b09e36e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3858,6 +3858,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (vmf->vma_flags & VM_SHARED) return VM_FAULT_SIGBUS; + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created @@ -3875,6 +3879,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; +skip_pmd_checks: /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { From 0560f5f7b3a8f3ecf598e477a3d9d8cac5a5c75d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:20:34 -0800 Subject: [PATCH 095/186] ANDROID: mm: prevent speculative page fault handling for userfaults handle_userfault() should be protected against a concurrent userfaultfd_release(), therefore handling a userfaults speculatively without mmap_lock protection should be disallowed. Bug: 257443051 Signed-off-by: Suren Baghdasaryan Change-Id: Ic6ae39329c73e8849048ea15b5351a49346404d3 --- mm/memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 639b9b09e36e..62fba9f3fd67 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3387,6 +3387,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) if (userfaultfd_pte_wp(vma, *vmf->pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + return VM_FAULT_RETRY; return handle_userfault(vmf, VM_UFFD_WP); } @@ -5010,7 +5012,7 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot); /* Can't call userland page fault handler in the speculative path */ - if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) { + if (unlikely(vmf.vma_flags & __VM_UFFD_FLAGS)) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } From 4b388752aca20f2588212251ad59d80a2cc5d214 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:23:53 -0800 Subject: [PATCH 096/186] ANDROID: mm: prevent speculative page fault handling for in do_swap_page() do_swap_page() uses migration_entry_wait() which operates on page tables without protection. Disable speculative page fault handling. Bug: 257443051 Change-Id: I677eb1ee85707dce533d5d811dcde5f5dabcfdf3 Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 62fba9f3fd67..739f0983bb8c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3601,6 +3601,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vm_fault_t ret; void *shadow = NULL; + if (vmf->flags & FAULT_FLAG_SPECULATIVE) { + pte_unmap(vmf->pte); + return VM_FAULT_RETRY; + } + ret = pte_unmap_same(vmf); if (ret) { /* From 2bb39b912175c3c087978ae5547e277a8422c601 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 15:36:13 -0800 Subject: [PATCH 097/186] ANDROID: mm: prevent reads of unstable pmd during speculation Checks of pmd during speculative page fault handling are racy because pmd is unprotected and might be modified or cleared. This might cause use-after-free reads from speculative path, therefore prevent such checks. At the beginning of speculation pmd is checked to be valid and if it's changed before page fault is handled, the change will be detected and page fault will be retried under mmap_lock protection. Bug: 257443051 Change-Id: I0cbd3b0b44e8296cf0d6cb298fae48c696580068 Signed-off-by: Suren Baghdasaryan --- mm/filemap.c | 18 ++++-------------- mm/memory.c | 31 +++++++++++++++++++------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 632734e9f8f1..a5a347b8157a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2874,11 +2874,6 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page) } if (pmd_none(*vmf->pmd)) { - if (vmf->flags & FAULT_FLAG_SPECULATIVE) { - unlock_page(page); - put_page(page); - return true; - } vmf->ptl = pmd_lock(mm, vmf->pmd); if (likely(pmd_none(*vmf->pmd))) { mm_inc_nr_ptes(mm); @@ -2976,20 +2971,16 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *head, *page; unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); - vm_fault_t ret = 0; + vm_fault_t ret = (vmf->flags & FAULT_FLAG_SPECULATIVE) ? + VM_FAULT_RETRY : 0; rcu_read_lock(); head = first_map_page(mapping, &xas, end_pgoff); if (!head) goto out; - if (filemap_map_pmd(vmf, head)) { - if (pmd_none(*vmf->pmd) && - vmf->flags & FAULT_FLAG_SPECULATIVE) { - ret = VM_FAULT_RETRY; - goto out; - } - + if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) && + filemap_map_pmd(vmf, head)) { ret = VM_FAULT_NOPAGE; goto out; } @@ -2998,7 +2989,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, if (!pte_map_lock_addr(vmf, addr)) { unlock_page(head); put_page(head); - ret = VM_FAULT_RETRY; goto out; } diff --git a/mm/memory.c b/mm/memory.c index 739f0983bb8c..58c05487277c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3994,6 +3994,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + /* * Preallocate pte before we take page_lock because this might lead to * deadlocks for memcg reclaim which waits for pages under writeback: @@ -4016,6 +4020,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) smp_wmb(); /* See comment in __pte_alloc() */ } +skip_pmd_checks: ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) @@ -4191,7 +4196,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (pmd_none(*vmf->pmd) && !(vmf->flags & FAULT_FLAG_SPECULATIVE)) { + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + + if (pmd_none(*vmf->pmd)) { if (PageTransCompound(page)) { ret = do_set_pmd(vmf, page); if (ret != VM_FAULT_FALLBACK) @@ -4218,6 +4227,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf) if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; +skip_pmd_checks: if (!pte_map_lock(vmf)) return VM_FAULT_RETRY; @@ -4317,7 +4327,8 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); - if (pmd_none(*vmf->pmd)) { + if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) && + pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; @@ -4675,16 +4686,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) pte_t entry; vm_fault_t ret = 0; + /* Do not check unstable pmd, if it's changed will retry later */ + if (vmf->flags & FAULT_FLAG_SPECULATIVE) + goto skip_pmd_checks; + if (unlikely(pmd_none(*vmf->pmd))) { - /* - * In the case of the speculative page fault handler we abort - * the speculative path immediately as the pmd is probably - * in the way to be converted in a huge one. We will try - * again holding the mmap_sem (which implies that the collapse - * operation is done). - */ - if (vmf->flags & FAULT_FLAG_SPECULATIVE) - return VM_FAULT_RETRY; /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table @@ -4692,7 +4698,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) * concurrent faults and from rmap lookups. */ vmf->pte = NULL; - } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) { + } else { /* * If a huge pmd materialized under us just retry later. Use * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead @@ -4734,6 +4740,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } } +skip_pmd_checks: if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); From 59d4d125b7d0108b54860ea8584679d514ef07b0 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Thu, 29 Apr 2021 10:28:25 -0700 Subject: [PATCH 098/186] BACKPORT: FROMLIST: mm: implement speculative handling in filemap_fault() Extend filemap_fault() to handle speculative faults. In the speculative case, we will only be fishing existing pages out of the page cache. The logic we use mirrors what is done in the non-speculative case, assuming that pages are found in the page cache, are up to date and not already locked, and that readahead is not necessary at this time. In all other cases, the fault is aborted to be handled non-speculatively. Signed-off-by: Michel Lespinasse Link: https://lore.kernel.org/all/20210407014502.24091-26-michel@lespinasse.org/ Conflicts: mm/filemap.c 1. Added back file_ra_state variable used by SPF path. 2. Updated comment for filemap_fault to reflect SPF locking rules. Bug: 161210518 Signed-off-by: Suren Baghdasaryan Change-Id: I82eba7fcfc81876245c2e65bc5ae3d33ddfcc368 --- mm/filemap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index a5a347b8157a..9a42edafb5fb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2706,7 +2706,9 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. * - * vma->vm_mm->mmap_lock must be held on entry. + * If FAULT_FLAG_SPECULATIVE is set, this function runs with elevated vma + * refcount and with mmap lock not held. + * Otherwise, vma->vm_mm->mmap_lock must be held on entry. * * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). @@ -2732,6 +2734,47 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) vm_fault_t ret = 0; bool retry = false; + if (vmf->flags & FAULT_FLAG_SPECULATIVE) { + page = find_get_page(mapping, offset); + if (unlikely(!page) || unlikely(PageReadahead(page))) + return VM_FAULT_RETRY; + + if (!trylock_page(page)) + return VM_FAULT_RETRY; + + if (unlikely(compound_head(page)->mapping != mapping)) + goto page_unlock; + VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); + if (unlikely(!PageUptodate(page))) + goto page_unlock; + + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto page_unlock; + + /* + * Update readahead mmap_miss statistic. + * + * Note that we are not sure if finish_fault() will + * manage to complete the transaction. If it fails, + * we'll come back to filemap_fault() non-speculative + * case which will update mmap_miss a second time. + * This is not ideal, we would prefer to guarantee the + * update will happen exactly once. + */ + if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) { + unsigned int mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); + } + + vmf->page = page; + return VM_FAULT_LOCKED; +page_unlock: + unlock_page(page); + return VM_FAULT_RETRY; + } + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) return VM_FAULT_SIGBUS; From 5ed391bd8ad8481d82c1bbb05a35f5538966dce9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 21 Nov 2022 12:15:43 -0800 Subject: [PATCH 099/186] ANDROID: mm/khugepaged: add missing vm_write_{begin|end} Speculative page fault handler needs to detect concurrent pmd changes and relies on vma seqcount for that. pmdp_collapse_flush(), set_huge_pmd() and collapse_and_free_pmd() can modify a pmd. vm_write_{begin|end} are needed in the paths which can call these functions for page fault handler to detect pmd changes. Bug: 257443051 Change-Id: Ieb784b5f44901b66a594f61b9e7c91190ff97f80 Signed-off-by: Suren Baghdasaryan --- mm/khugepaged.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 8008e6c2714e..87be82c20290 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1472,6 +1472,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!pmd) goto drop_hpage; + vm_write_begin(vma); start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); /* step 1: check all mapped PTEs are to the right huge page */ @@ -1521,6 +1522,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ptl = pmd_lock(vma->vm_mm, pmd); _pmd = pmdp_collapse_flush(vma, haddr, pmd); spin_unlock(ptl); + vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); @@ -1531,6 +1533,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + vm_write_end(vma); goto drop_hpage; } @@ -1602,10 +1605,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (mmap_write_trylock(mm)) { if (!khugepaged_test_exit(mm)) { + vm_write_begin(vma); spinlock_t *ptl = pmd_lock(mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); spin_unlock(ptl); + vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); } From bfdcf47ca34dc3b7b63ca16b0a1856e57c57ee47 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 15 Nov 2022 10:38:43 -0800 Subject: [PATCH 100/186] ANDROID: mm: remove sequence counting when mmap_lock is not exclusively owned In a number of cases vm_write_{begin|end} is called while mmap_lock is not owned exclusively. This is unnecessary and can affect correctness of the sequence counting protecting speculative page fault handlers. Remove extra calls. Bug: 257443051 Change-Id: I1278638a0794448e22fbdab5601212b3b2eaebdc Signed-off-by: Suren Baghdasaryan --- mm/madvise.c | 6 ------ mm/memory.c | 2 -- mm/mempolicy.c | 2 -- 3 files changed, 10 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 3e431f71fb27..01d6a36af0ea 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -506,11 +506,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, .tlb = tlb, }; - vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); - vm_write_end(vma); } static long madvise_cold(struct vm_area_struct *vma, @@ -543,11 +541,9 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb, .can_pageout_file = can_pageout_file, }; - vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); - vm_write_end(vma); } static inline bool can_do_file_pageout(struct vm_area_struct *vma) @@ -754,12 +750,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); - vm_write_begin(vma); tlb_start_vma(&tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, &madvise_free_walk_ops, &tlb); tlb_end_vma(&tlb, vma); - vm_write_end(vma); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, range.start, range.end); diff --git a/mm/memory.c b/mm/memory.c index 58c05487277c..918ed4f45d1f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1495,7 +1495,6 @@ void unmap_page_range(struct mmu_gather *tlb, unsigned long next; BUG_ON(addr >= end); - vm_write_begin(vma); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { @@ -1505,7 +1504,6 @@ void unmap_page_range(struct mmu_gather *tlb, next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); - vm_write_end(vma); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c4c1c7bf51d7..142929e67a97 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -657,11 +657,9 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, { int nr_updated; - vm_write_begin(vma); nr_updated = change_protection(vma, addr, end, PAGE_NONE, MM_CP_PROT_NUMA); if (nr_updated) count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated); - vm_write_end(vma); return nr_updated; } From d65d4a0538c3511eb02fed4b628e3588715c90d8 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 15 Nov 2022 10:40:41 -0800 Subject: [PATCH 101/186] ANDROID: mm: assert that mmap_lock is taken exclusively in vm_write_begin vm_write_{begin|end} has to be called when mmap_lock is taken exlusively. Add an assert statement in vm_write_begin to enforce that. free_pgtables can free page tables without exclusive mmap_lock if the vma was isolated, therefore avoid assertions in such cases. Bug: 257443051 Change-Id: Ie81aefe025c743cda6f66717d2f08f4d78a55608 Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1a64ab7ba67a..ba42e189790e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1758,6 +1758,12 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT static inline void vm_write_begin(struct vm_area_struct *vma) { + /* + * Isolated vma might be freed without exclusive mmap_lock but + * speculative page fault handler still needs to know it was changed. + */ + if (!RB_EMPTY_NODE(&vma->vm_rb)) + mmap_assert_write_locked(vma->vm_mm); /* * The reads never spins and preemption * disablement is not required. From 0f43357d37e4451cdc12a60895b6e4929a87adf7 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 18 Nov 2022 17:06:03 -0800 Subject: [PATCH 102/186] ANDROID: disable page table moves when speculative page faults are enabled move_page_tables() can move entire pmd or pud without locking individual ptes. This is problematic for speculative page faults which do not take mmap_lock because they rely on ptl lock when writing new pte value. To avoid possible race, disable move_page_tables() optimization when CONFIG_SPECULATIVE_PAGE_FAULT is enabled. Bug: 257443051 Change-Id: Ib48dda08ecad1abc60d08fc089a6566a63393c13 Signed-off-by: Suren Baghdasaryan --- mm/mremap.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 07f7c869a38f..5a18cec23fa7 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -210,7 +210,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } -#ifdef CONFIG_HAVE_MOVE_PMD +/* + * Speculative page fault handlers will not detect page table changes done + * without ptl locking. + */ +#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) { @@ -277,7 +281,11 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma, } #endif -#ifdef CONFIG_HAVE_MOVE_PUD +/* + * Speculative page fault handlers will not detect page table changes done + * without ptl locking. + */ +#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) { From 1900436df5d947c2ee74bd78cde1366556c93b51 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 22 Nov 2022 10:51:25 -0800 Subject: [PATCH 103/186] ANDROID: mm: fix invalid backport in speculative page fault path Invalid condition was introduced when porting the original SPF patch which would affect NUMA mode. Fixes: 736ae8bde8da3 ("FROMLIST: mm: adding speculative page fault failure trace events") Bug: 257443051 Change-Id: Ib20c625615b279dc467588933a1f598dc179861b Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 918ed4f45d1f..e37dfad032ed 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5064,11 +5064,10 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); - if (!pol) - if (pol && pol->mode == MPOL_INTERLEAVE) { - trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); - return VM_FAULT_RETRY; - } + if (pol && pol->mode == MPOL_INTERLEAVE) { + trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); + return VM_FAULT_RETRY; + } #endif /* From 1c828eb3dad11730aa8881aa9e462aee8e603289 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 22 Nov 2022 11:37:46 -0800 Subject: [PATCH 104/186] ANDROID: mm: disable speculative page faults for CONFIG_NUMA NUMA support with speculative page faults might be broken if vma_replace_policy() replaces the mempolicy object used in do_anonymous_page() alloc_zeroed_user_highpage_movable() alloc_page_vma() alloc_pages_vma() get_vma_policy() __get_vma_policy() in speculative path does not always refcounts the mempolicy object, therefore can't be relied on stabilizing it. Rather than fixing this, just disable speculation for CONFIG_NUMA for now and fix it if it's ever needed in Android. Bug: 257443051 Change-Id: Ib5750b9809979a69a42ebfa6c130e123f416f1aa Signed-off-by: Suren Baghdasaryan --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 6a020a0d7bc5..03bfe7bd8183 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -843,7 +843,7 @@ config SPECULATIVE_PAGE_FAULT bool "Speculative page faults" default y depends on ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT - depends on MMU && SMP + depends on MMU && SMP && !NUMA help Try to handle user space page faults without holding the mmap_sem. From aaf03dd58cbef07e89dacd0b3784bcbda99691c7 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sun, 27 Nov 2022 09:18:39 -0800 Subject: [PATCH 105/186] ANDROID: mm: fix build issue in spf when CONFIG_USERFAULTFD=n When CONFIG_USERFAULTFD=n __VM_UFFD_FLAGS mask is undefined and produces build error. Fix it by making the check conditional on CONFIG_USERFAULTFD. Fixes: ("ANDROID: mm: prevent speculative page fault handling for userfaults") Bug: 257443051 Change-Id: Ie9ff98b840032eb18183b49e3566cf178359948f Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index e37dfad032ed..cdd492d7f980 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5021,11 +5021,13 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags); vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot); +#ifdef CONFIG_USERFAULTFD /* Can't call userland page fault handler in the speculative path */ if (unlikely(vmf.vma_flags & __VM_UFFD_FLAGS)) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } +#endif if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) { /* From d95f5e3da7848cdaf2bd14217d9772938d41f991 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sun, 27 Nov 2022 10:00:18 -0800 Subject: [PATCH 106/186] ANDROID: khugepaged: fix mixing declarations warning in retract_page_tables vm_write_begin() was added before variable definition, producing a "mixing declarations and code is a C99 extension" warning. Fix by rearranging the code. Fixes: ("ANDROID: mm/khugepaged: add missing vm_write_{begin|end}") Bug: 257443051 Change-Id: I6e85ccfabd5e37b1397c654d61d0b8177326c3d8 Signed-off-by: Suren Baghdasaryan --- mm/khugepaged.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 87be82c20290..b40bd0efd37e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1605,8 +1605,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (mmap_write_trylock(mm)) { if (!khugepaged_test_exit(mm)) { + spinlock_t *ptl; vm_write_begin(vma); - spinlock_t *ptl = pmd_lock(mm, pmd); + ptl = pmd_lock(mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); spin_unlock(ptl); From 92a864d9db5dcef1ed29900e7954082f2c8d6f5c Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 30 Nov 2021 10:51:50 -0800 Subject: [PATCH 107/186] UPSTREAM: binder: avoid potential data leakage when copying txn Transactions are copied from the sender to the target first and objects like BINDER_TYPE_PTR and BINDER_TYPE_FDA are then fixed up. This means there is a short period where the sender's version of these objects are visible to the target prior to the fixups. Instead of copying all of the data first, copy data only after any needed fixups have been applied. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-3-tkjos@google.com Signed-off-by: Greg Kroah-Hartman Bug: 137131904 Bug: 257685302 (cherry picked from commit 6d98eb95b450a75adb4516a1d33652dc78d2b20c) Change-Id: I8c14a03a2ee23c5f060c82e1626686f72eff33d9 Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 94 ++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 24 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e2f6da3332a6..1b5092c54c3a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1774,15 +1774,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t, /** * binder_get_object() - gets object and checks for valid metadata * @proc: binder_proc owning the buffer + * @u: sender's user pointer to base of buffer * @buffer: binder_buffer that we're parsing. * @offset: offset in the @buffer at which to validate an object. * @object: struct binder_object to read into * - * Return: If there's a valid metadata object at @offset in @buffer, the + * Copy the binder object at the given offset into @object. If @u is + * provided then the copy is from the sender's buffer. If not, then + * it is copied from the target's @buffer. + * + * Return: If there's a valid metadata object at @offset, the * size of that object. Otherwise, it returns zero. The object * is read into the struct binder_object pointed to by @object. */ static size_t binder_get_object(struct binder_proc *proc, + const void __user *u, struct binder_buffer *buffer, unsigned long offset, struct binder_object *object) @@ -1792,10 +1798,16 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr) || - binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, - offset, read_size)) + if (offset > buffer->data_size || read_size < sizeof(*hdr)) return 0; + if (u) { + if (copy_from_user(object, u + offset, read_size)) + return 0; + } else { + if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, + offset, read_size)) + return 0; + } /* Ok, now see if we read a complete object. */ hdr = &object->hdr; @@ -1868,7 +1880,7 @@ static struct binder_buffer_object *binder_validate_ptr( b, buffer_offset, sizeof(object_offset))) return NULL; - object_size = binder_get_object(proc, b, object_offset, object); + object_size = binder_get_object(proc, NULL, b, object_offset, object); if (!object_size || object->hdr.type != BINDER_TYPE_PTR) return NULL; if (object_offsetp) @@ -1933,7 +1945,8 @@ static bool binder_validate_fixup(struct binder_proc *proc, unsigned long buffer_offset; struct binder_object last_object; struct binder_buffer_object *last_bbo; - size_t object_size = binder_get_object(proc, b, last_obj_offset, + size_t object_size = binder_get_object(proc, NULL, b, + last_obj_offset, &last_object); if (object_size != sizeof(*last_bbo)) return false; @@ -2048,7 +2061,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, buffer, buffer_offset, sizeof(object_offset))) - object_size = binder_get_object(proc, buffer, + object_size = binder_get_object(proc, NULL, buffer, object_offset, &object); if (object_size == 0) { pr_err("transaction release %d bad object at offset %lld, size %zd\n", @@ -2715,6 +2728,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t off_start_offset, off_end_offset; binder_size_t off_min; binder_size_t sg_buf_offset, sg_buf_end_offset; + binder_size_t user_offset = 0; struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; @@ -2729,6 +2743,8 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + const void __user *user_buffer = (const void __user *) + (uintptr_t)tr->data.ptr.buffer; e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3064,19 +3080,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, 0, - (const void __user *) - (uintptr_t)tr->data.ptr.buffer, - tr->data_size)) { - binder_user_error("%d:%d got transaction with invalid data ptr\n", - proc->pid, thread->pid); - return_error = BR_FAILED_REPLY; - return_error_param = -EFAULT; - return_error_line = __LINE__; - goto err_copy_data_failed; - } if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, @@ -3121,6 +3124,7 @@ static void binder_transaction(struct binder_proc *proc, size_t object_size; struct binder_object object; binder_size_t object_offset; + binder_size_t copy_size; if (binder_alloc_copy_from_buffer(&target_proc->alloc, &object_offset, @@ -3132,8 +3136,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - object_size = binder_get_object(target_proc, t->buffer, - object_offset, &object); + + /* + * Copy the source user buffer up to the next object + * that will be processed. + */ + copy_size = object_offset - user_offset; + if (copy_size && (user_offset > object_offset || + binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + copy_size))) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + object_size = binder_get_object(target_proc, user_buffer, + t->buffer, object_offset, &object); if (object_size == 0 || object_offset < off_min) { binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, @@ -3145,6 +3168,11 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } + /* + * Set offset to the next buffer fragment to be + * copied + */ + user_offset = object_offset + object_size; hdr = &object.hdr; off_min = object_offset + object_size; @@ -3240,9 +3268,14 @@ static void binder_transaction(struct binder_proc *proc, } ret = binder_translate_fd_array(fda, parent, t, thread, in_reply_to); - if (ret < 0) { + if (!ret) + ret = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fda, sizeof(*fda)); + if (ret) { return_error = BR_FAILED_REPLY; - return_error_param = ret; + return_error_param = ret > 0 ? -EINVAL : ret; return_error_line = __LINE__; goto err_translate_failed; } @@ -3312,6 +3345,19 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_object_type; } } + /* Done processing objects, copy the rest of the buffer */ + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + tr->data_size - user_offset)) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } if (t->buffer->oneway_spam_suspect) tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; else From 58fd89db0dbf4394f069aa40fe3cc65f02877ca2 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 30 Nov 2021 10:51:51 -0800 Subject: [PATCH 108/186] UPSTREAM: binder: read pre-translated fds from sender buffer This patch is to prepare for an up coming patch where we read pre-translated fds from the sender buffer and translate them before copying them to the target. It does not change run time. The patch adds two new parameters to binder_translate_fd_array() to hold the sender buffer and sender buffer parent. These parameters let us call copy_from_user() directly from the sender instead of using binder_alloc_copy_from_buffer() to copy from the target. Also the patch adds some new alignment checks. Previously the alignment checks would have been done in a different place, but this lets us print more useful error messages. Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-4-tkjos@google.com Signed-off-by: Greg Kroah-Hartman Bug: 137131904 Bug: 257685302 (cherry picked from commit 656e01f3ab54afe71bed066996fc2640881e1220) Change-Id: Ib786020e49bd33e35aec88d43965f9d98021fa53 Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 1b5092c54c3a..3789e187c20c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2403,15 +2403,17 @@ err_fd_not_accepted: } static int binder_translate_fd_array(struct binder_fd_array_object *fda, + const void __user *sender_ubuffer, struct binder_buffer_object *parent, + struct binder_buffer_object *sender_uparent, struct binder_transaction *t, struct binder_thread *thread, struct binder_transaction *in_reply_to) { binder_size_t fdi, fd_buf_size; binder_size_t fda_offset; + const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; - struct binder_proc *target_proc = t->to_proc; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2435,7 +2437,10 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) { + sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; + + if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || + !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); return -EINVAL; @@ -2444,10 +2449,9 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, u32 fd; int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); + binder_size_t sender_uoffset = fdi * sizeof(fd); - ret = binder_alloc_copy_from_buffer(&target_proc->alloc, - &fd, t->buffer, - offset, sizeof(fd)); + ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); @@ -3235,6 +3239,8 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_FDA: { struct binder_object ptr_object; binder_size_t parent_offset; + struct binder_object user_object; + size_t user_parent_size; struct binder_fd_array_object *fda = to_binder_fd_array_object(hdr); size_t num_valid = (buffer_offset - off_start_offset) / @@ -3266,8 +3272,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, parent, t, thread, - in_reply_to); + /* + * We need to read the user version of the parent + * object to get the original user offset + */ + user_parent_size = + binder_get_object(proc, user_buffer, t->buffer, + parent_offset, &user_object); + if (user_parent_size != sizeof(user_object.bbo)) { + binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", + proc->pid, thread->pid, + user_parent_size, + sizeof(user_object.bbo)); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + ret = binder_translate_fd_array(fda, user_buffer, + parent, + &user_object.bbo, t, + thread, in_reply_to); if (!ret) ret = binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, From 3e7e7f4b103673bad73dabe1ec9bec8306e76443 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Tue, 30 Nov 2021 10:51:52 -0800 Subject: [PATCH 109/186] UPSTREAM: binder: defer copies of pre-patched txn data BINDER_TYPE_PTR objects point to memory areas in the source process to be copied into the target buffer as part of a transaction. This implements a scatter- gather model where non-contiguous memory in a source process is "gathered" into a contiguous region in the target buffer. The data can include pointers that must be fixed up to correctly point to the copied data. To avoid making source process pointers visible to the target process, this patch defers the copy until the fixups are known and then copies and fixeups are done together. There is a special case of BINDER_TYPE_FDA which applies the fixup later in the target process context. In this case the user data is skipped (so no untranslated fds become visible to the target). Reviewed-by: Martijn Coenen Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-5-tkjos@google.com Signed-off-by: Greg Kroah-Hartman Bug: 137131904 Bug: 257685302 (cherry picked from commit 09184ae9b5756cc469db6fd1d1cfdcffbf627c2d) [cmllamas: fix trivial merge conflict] Change-Id: I6de75b192d1e3b2cc73c8d91077d97b608e8c5a9 Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 299 +++++++++++++++++++++++++++++++++++---- 1 file changed, 274 insertions(+), 25 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3789e187c20c..3af96469ec3d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2402,7 +2402,246 @@ err_fd_not_accepted: return ret; } -static int binder_translate_fd_array(struct binder_fd_array_object *fda, +/** + * struct binder_ptr_fixup - data to be fixed-up in target buffer + * @offset offset in target buffer to fixup + * @skip_size bytes to skip in copy (fixup will be written later) + * @fixup_data data to write at fixup offset + * @node list node + * + * This is used for the pointer fixup list (pf) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_ptr_fixup { + binder_size_t offset; + size_t skip_size; + binder_uintptr_t fixup_data; + struct list_head node; +}; + +/** + * struct binder_sg_copy - scatter-gather data to be copied + * @offset offset in target buffer + * @sender_uaddr user address in source buffer + * @length bytes to copy + * @node list node + * + * This is used for the sg copy list (sgc) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_sg_copy { + binder_size_t offset; + const void __user *sender_uaddr; + size_t length; + struct list_head node; +}; + +/** + * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data + * @alloc: binder_alloc associated with @buffer + * @buffer: binder buffer in target process + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Processes all elements of @sgc_head, applying fixups from @pf_head + * and copying the scatter-gather data from the source process' user + * buffer to the target's buffer. It is expected that the list creation + * and processing all occurs during binder_transaction() so these lists + * are only accessed in local context. + * + * Return: 0=success, else -errno + */ +static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, + struct binder_buffer *buffer, + struct list_head *sgc_head, + struct list_head *pf_head) +{ + int ret = 0; + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf = + list_first_entry_or_null(pf_head, struct binder_ptr_fixup, + node); + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + size_t bytes_copied = 0; + + while (bytes_copied < sgc->length) { + size_t copy_size; + size_t bytes_left = sgc->length - bytes_copied; + size_t offset = sgc->offset + bytes_copied; + + /* + * We copy up to the fixup (pointed to by pf) + */ + copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) + : bytes_left; + if (!ret && copy_size) + ret = binder_alloc_copy_user_to_buffer( + alloc, buffer, + offset, + sgc->sender_uaddr + bytes_copied, + copy_size); + bytes_copied += copy_size; + if (copy_size != bytes_left) { + BUG_ON(!pf); + /* we stopped at a fixup offset */ + if (pf->skip_size) { + /* + * we are just skipping. This is for + * BINDER_TYPE_FDA where the translated + * fds will be fixed up when we get + * to target context. + */ + bytes_copied += pf->skip_size; + } else { + /* apply the fixup indicated by pf */ + if (!ret) + ret = binder_alloc_copy_to_buffer( + alloc, buffer, + pf->offset, + &pf->fixup_data, + sizeof(pf->fixup_data)); + bytes_copied += sizeof(pf->fixup_data); + } + list_del(&pf->node); + kfree(pf); + pf = list_first_entry_or_null(pf_head, + struct binder_ptr_fixup, node); + } + } + list_del(&sgc->node); + kfree(sgc); + } + BUG_ON(!list_empty(pf_head)); + BUG_ON(!list_empty(sgc_head)); + + return ret > 0 ? -EINVAL : ret; +} + +/** + * binder_cleanup_deferred_txn_lists() - free specified lists + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Called to clean up @sgc_head and @pf_head if there is an + * error. + */ +static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, + struct list_head *pf_head) +{ + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf, *tmppf; + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + list_del(&pf->node); + kfree(pf); + } +} + +/** + * binder_defer_copy() - queue a scatter-gather buffer for copy + * @sgc_head: list_head of scatter-gather copy list + * @offset: binder buffer offset in target process + * @sender_uaddr: user address in source process + * @length: bytes to copy + * + * Specify a scatter-gather block to be copied. The actual copy must + * be deferred until all the needed fixups are identified and queued. + * Then the copy and fixups are done together so un-translated values + * from the source are never visible in the target buffer. + * + * We are guaranteed that repeated calls to this function will have + * monotonically increasing @offset values so the list will naturally + * be ordered. + * + * Return: 0=success, else -errno + */ +static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, + const void __user *sender_uaddr, size_t length) +{ + struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); + + if (!bc) + return -ENOMEM; + + bc->offset = offset; + bc->sender_uaddr = sender_uaddr; + bc->length = length; + INIT_LIST_HEAD(&bc->node); + + /* + * We are guaranteed that the deferred copies are in-order + * so just add to the tail. + */ + list_add_tail(&bc->node, sgc_head); + + return 0; +} + +/** + * binder_add_fixup() - queue a fixup to be applied to sg copy + * @pf_head: list_head of binder ptr fixup list + * @offset: binder buffer offset in target process + * @fixup: bytes to be copied for fixup + * @skip_size: bytes to skip when copying (fixup will be applied later) + * + * Add the specified fixup to a list ordered by @offset. When copying + * the scatter-gather buffers, the fixup will be copied instead of + * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup + * will be applied later (in target process context), so we just skip + * the bytes specified by @skip_size. If @skip_size is 0, we copy the + * value in @fixup. + * + * This function is called *mostly* in @offset order, but there are + * exceptions. Since out-of-order inserts are relatively uncommon, + * we insert the new element by searching backward from the tail of + * the list. + * + * Return: 0=success, else -errno + */ +static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, + binder_uintptr_t fixup, size_t skip_size) +{ + struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); + struct binder_ptr_fixup *tmppf; + + if (!pf) + return -ENOMEM; + + pf->offset = offset; + pf->fixup_data = fixup; + pf->skip_size = skip_size; + INIT_LIST_HEAD(&pf->node); + + /* Fixups are *mostly* added in-order, but there are some + * exceptions. Look backwards through list for insertion point. + */ + list_for_each_entry_reverse(tmppf, pf_head, node) { + if (tmppf->offset < pf->offset) { + list_add(&pf->node, &tmppf->node); + return 0; + } + } + /* + * if we get here, then the new offset is the lowest so + * insert at the head + */ + list_add(&pf->node, pf_head); + return 0; +} + +static int binder_translate_fd_array(struct list_head *pf_head, + struct binder_fd_array_object *fda, const void __user *sender_ubuffer, struct binder_buffer_object *parent, struct binder_buffer_object *sender_uparent, @@ -2414,6 +2653,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, binder_size_t fda_offset; const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; + int ret; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2445,9 +2685,12 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, proc->pid, thread->pid); return -EINVAL; } + ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); + if (ret) + return ret; + for (fdi = 0; fdi < fda->num_fds; fdi++) { u32 fd; - int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); binder_size_t sender_uoffset = fdi * sizeof(fd); @@ -2461,7 +2704,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, return 0; } -static int binder_fixup_parent(struct binder_transaction *t, +static int binder_fixup_parent(struct list_head *pf_head, + struct binder_transaction *t, struct binder_thread *thread, struct binder_buffer_object *bp, binder_size_t off_start_offset, @@ -2507,14 +2751,7 @@ static int binder_fixup_parent(struct binder_transaction *t, } buffer_offset = bp->parent_offset + (uintptr_t)parent->buffer - (uintptr_t)b->user_data; - if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset, - &bp->buffer, sizeof(bp->buffer))) { - binder_user_error("%d:%d got transaction with invalid parent offset\n", - proc->pid, thread->pid); - return -EINVAL; - } - - return 0; + return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); } /** @@ -2747,8 +2984,12 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + struct list_head sgc_head; + struct list_head pf_head; const void __user *user_buffer = (const void __user *) (uintptr_t)tr->data.ptr.buffer; + INIT_LIST_HEAD(&sgc_head); + INIT_LIST_HEAD(&pf_head); e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3289,8 +3530,8 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, user_buffer, - parent, + ret = binder_translate_fd_array(&pf_head, fda, + user_buffer, parent, &user_object.bbo, t, thread, in_reply_to); if (!ret) @@ -3322,19 +3563,14 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, - sg_buf_offset, - (const void __user *) - (uintptr_t)bp->buffer, - bp->length)) { - binder_user_error("%d:%d got transaction with invalid offsets ptr\n", - proc->pid, thread->pid); - return_error_param = -EFAULT; + ret = binder_defer_copy(&sgc_head, sg_buf_offset, + (const void __user *)(uintptr_t)bp->buffer, + bp->length); + if (ret) { return_error = BR_FAILED_REPLY; + return_error_param = ret; return_error_line = __LINE__; - goto err_copy_data_failed; + goto err_translate_failed; } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t) @@ -3343,7 +3579,8 @@ static void binder_transaction(struct binder_proc *proc, num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); - ret = binder_fixup_parent(t, thread, bp, + ret = binder_fixup_parent(&pf_head, t, + thread, bp, off_start_offset, num_valid, last_fixup_obj_off, @@ -3383,6 +3620,17 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_copy_data_failed; } + + ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, + &sgc_head, &pf_head); + if (ret) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_copy_data_failed; + } if (t->buffer->oneway_spam_suspect) tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; else @@ -3458,6 +3706,7 @@ err_bad_object_type: err_bad_offset: err_bad_parent: err_copy_data_failed: + binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, NULL, t->buffer, From 8494d73eeb86cdbdcc01375450e290c29fb32865 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Dec 2021 13:24:42 +0100 Subject: [PATCH 110/186] UPSTREAM: binder: fix pointer cast warning binder_uintptr_t is not the same as uintptr_t, so converting it into a pointer requires a second cast: drivers/android/binder.c: In function 'binder_translate_fd_array': drivers/android/binder.c:2511:28: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 2511 | sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; | ^ Fixes: 656e01f3ab54 ("binder: read pre-translated fds from sender buffer") Acked-by: Todd Kjos Acked-by: Randy Dunlap # build-tested Acked-by: Christian Brauner Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211207122448.1185769-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman Bug: 257685302 (cherry picked from commit 9a0a930fe2535a76ad70d3f43caeccf0d86a3009) Change-Id: I1c9b86a90bcf2be81012e59e0c472869f551e61a Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3af96469ec3d..d527b1b29265 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2677,7 +2677,8 @@ static int binder_translate_fd_array(struct list_head *pf_head, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; + sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + + fda->parent_offset; if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { From befa134c53313b5d43c2760efad1ad4bdd13a911 Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Fri, 15 Apr 2022 14:00:15 +0200 Subject: [PATCH 111/186] UPSTREAM: binder: Address corner cases in deferred copy and fixup When handling BINDER_TYPE_FDA object we are pushing a parent fixup with a certain skip_size but no scatter-gather copy object, since the copy is handled standalone. If BINDER_TYPE_FDA is the last children the scatter-gather copy loop will never stop to skip it, thus we are left with an item in the parent fixup list. This will trigger the BUG_ON(). This is reproducible in android when playing a video. We receive a transaction that looks like this: obj[0] BINDER_TYPE_PTR, parent obj[1] BINDER_TYPE_PTR, child obj[2] BINDER_TYPE_PTR, child obj[3] BINDER_TYPE_FDA, child Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-2-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman Bug: 257685302 (cherry picked from commit 2d1746e3fda0c3612143d7c06f8e1d1830c13e23) Change-Id: I3963a98dfc48b01d7bb8166aaa90341818bf6416 Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d527b1b29265..7346d3c57afb 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2464,6 +2464,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, { int ret = 0; struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; struct binder_ptr_fixup *pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); @@ -2518,7 +2519,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, list_del(&sgc->node); kfree(sgc); } - BUG_ON(!list_empty(pf_head)); + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } BUG_ON(!list_empty(sgc_head)); return ret > 0 ? -EINVAL : ret; From ac30bb6084661f71225ff2b22cd09df34427b963 Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Fri, 15 Apr 2022 14:00:14 +0200 Subject: [PATCH 112/186] UPSTREAM: binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0 Some android userspace is sending BINDER_TYPE_FDA objects with num_fds=0. Like the previous patch, this is reproducible when playing a video. Before commit 09184ae9b575 BINDER_TYPE_FDA objects with num_fds=0 were 'correctly handled', as in no fixup was performed. After commit 09184ae9b575 we aggregate fixup and skip regions in binder_ptr_fixup structs and distinguish between the two by using the skip_size field: if it's 0, then it's a fixup, otherwise skip. When processing BINDER_TYPE_FDA objects with num_fds=0 we add a skip region of skip_size=0, and this causes issues because now binder_do_deferred_txn_copies will think this was a fixup region. To address that, return early from binder_translate_fd_array to avoid adding an empty skip region. Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-1-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman Bug: 257685302 (cherry picked from commit ef38de9217a04c9077629a24652689d8fdb4c6c6) Change-Id: I34fab41c0c1beee366a5df4724b263e4385ad13b Signed-off-by: Carlos Llamas --- drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 7346d3c57afb..d07a6d269776 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2660,6 +2660,9 @@ static int binder_translate_fd_array(struct list_head *pf_head, struct binder_proc *proc = thread->proc; int ret; + if (fda->num_fds == 0) + return 0; + fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", From d55aeb40292f52e310a6012ba2ec010632e43fb5 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 9 Nov 2022 20:25:56 +0000 Subject: [PATCH 113/186] BACKPORT: ANDROID: dma-buf: heaps: replace mutex lock with spinlock We should use spinlock to protect page pool's critical section as 1. The critical section is short, using spinlock is more efficient. 2. Spinlock could protect priority inversion. Ex. Low priority thread (dmabuf-deferred) hold the page lock but get scheduled out under heavy loading. Then the other high priority threads need to wait for dmabuf-deferred to release the lock. It causes long allocation latency and possible UI jank. Also, we could move NR_KERNEL_MISC_RECLAIMABLE stat out of the critical section to make it shorter as mod_node_page_state can handle concurrent access cases. Conflicts: drivers/dma-buf/heaps/page_pool.h drivers/dma-buf/heaps/page_pool.c 1. The android12-5.10 KMI is frozen, and the modification to struct dmabuf_page_pool in the original patch would break the KMI. Instead we wrap dmabuf_page_pool allocations in a struct dmabuf_page_pool_with_spinlock which also contains a spinlock replacement for dmabuf_page_pool's mutex. No callers should attempt to acquire dmabuf_page_pool's mutex on this branch, so it is locked immediately after initialization and never unlocked. (cherry picked from commit 060e38dce1d69b81fe633f31751a610e3dd8e983) Bug: 245454030 Change-Id: I15f349f9e893621f71ca79f1de037de184c33edf Signed-off-by: T.J. Mercier --- drivers/dma-buf/heaps/page_pool.c | 37 ++++++++++++++++++++++++------- drivers/dma-buf/heaps/page_pool.h | 2 +- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/dma-buf/heaps/page_pool.c b/drivers/dma-buf/heaps/page_pool.c index b79e737bac95..de9d72809412 100644 --- a/drivers/dma-buf/heaps/page_pool.c +++ b/drivers/dma-buf/heaps/page_pool.c @@ -11,10 +11,16 @@ #include #include #include +#include #include #include #include "page_pool.h" +struct dmabuf_page_pool_with_spinlock { + struct dmabuf_page_pool pool; + struct spinlock spinlock; +}; + static LIST_HEAD(pool_list); static DEFINE_MUTEX(pool_list_lock); @@ -35,34 +41,41 @@ static inline void dmabuf_page_pool_free_pages(struct dmabuf_page_pool *pool, static void dmabuf_page_pool_add(struct dmabuf_page_pool *pool, struct page *page) { int index; + struct dmabuf_page_pool_with_spinlock *container_pool = + container_of(pool, struct dmabuf_page_pool_with_spinlock, pool); if (PageHighMem(page)) index = POOL_HIGHPAGE; else index = POOL_LOWPAGE; - mutex_lock(&pool->mutex); + spin_lock(&container_pool->spinlock); list_add_tail(&page->lru, &pool->items[index]); pool->count[index]++; + spin_unlock(&container_pool->spinlock); mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, 1 << pool->order); - mutex_unlock(&pool->mutex); } static struct page *dmabuf_page_pool_remove(struct dmabuf_page_pool *pool, int index) { struct page *page; + struct dmabuf_page_pool_with_spinlock *container_pool = + container_of(pool, struct dmabuf_page_pool_with_spinlock, pool); - mutex_lock(&pool->mutex); + spin_lock(&container_pool->spinlock); page = list_first_entry_or_null(&pool->items[index], struct page, lru); if (page) { pool->count[index]--; list_del(&page->lru); + spin_unlock(&container_pool->spinlock); mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, -(1 << pool->order)); + goto out; } - mutex_unlock(&pool->mutex); + spin_unlock(&container_pool->spinlock); +out: return page; } @@ -113,19 +126,25 @@ static int dmabuf_page_pool_total(struct dmabuf_page_pool *pool, bool high) struct dmabuf_page_pool *dmabuf_page_pool_create(gfp_t gfp_mask, unsigned int order) { - struct dmabuf_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL); + struct dmabuf_page_pool *pool; + struct dmabuf_page_pool_with_spinlock *container_pool = + kmalloc(sizeof(*container_pool), GFP_KERNEL); int i; - if (!pool) + if (!container_pool) return NULL; + spin_lock_init(&container_pool->spinlock); + pool = &container_pool->pool; + for (i = 0; i < POOL_TYPE_SIZE; i++) { pool->count[i] = 0; INIT_LIST_HEAD(&pool->items[i]); } pool->gfp_mask = gfp_mask | __GFP_COMP; pool->order = order; - mutex_init(&pool->mutex); + mutex_init(&pool->mutex); /* No longer used! */ + mutex_lock(&pool->mutex); /* Make sure anyone who attempts to acquire this hangs */ mutex_lock(&pool_list_lock); list_add(&pool->list, &pool_list); @@ -138,6 +157,7 @@ EXPORT_SYMBOL_GPL(dmabuf_page_pool_create); void dmabuf_page_pool_destroy(struct dmabuf_page_pool *pool) { struct page *page; + struct dmabuf_page_pool_with_spinlock *container_pool; int i; /* Remove us from the pool list */ @@ -151,7 +171,8 @@ void dmabuf_page_pool_destroy(struct dmabuf_page_pool *pool) dmabuf_page_pool_free_pages(pool, page); } - kfree(pool); + container_pool = container_of(pool, struct dmabuf_page_pool_with_spinlock, pool); + kfree(container_pool); } EXPORT_SYMBOL_GPL(dmabuf_page_pool_destroy); diff --git a/drivers/dma-buf/heaps/page_pool.h b/drivers/dma-buf/heaps/page_pool.h index 6b083b04f195..b578e18dc1ed 100644 --- a/drivers/dma-buf/heaps/page_pool.h +++ b/drivers/dma-buf/heaps/page_pool.h @@ -40,7 +40,7 @@ enum { struct dmabuf_page_pool { int count[POOL_TYPE_SIZE]; struct list_head items[POOL_TYPE_SIZE]; - struct mutex mutex; + struct mutex mutex; /* No longer used! */ gfp_t gfp_mask; unsigned int order; struct list_head list; From 8f0752d06e92f757d26925e94ad7610089b33149 Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Wed, 30 Nov 2022 16:32:58 +0530 Subject: [PATCH 114/186] ANDROID: arm64: mm: perform clean & invalidation in __dma_map_area commit c50f11c6196f ("arm64: mm: Don't invalidate FROM_DEVICE buffers at start of DMA transfer") break assumptions of some device drivers about invalidation that happens as part of __dma_map_area(DMA_FROM_DEVICE). An example include drivers using dmabuf API dma_buf_begin_cpu_access() and dma_buf_end_cpu_access() to achieve buffer invalidation. Fix this breakage by replacing clean with clean and invalidation in __dma_map_area() for DMA inbound case. Bug: 260978220 Change-Id: Id1a2750c2036de693cd52e8f7316f1d820b5a262 Fixes: c50f11c6196f ("arm64: mm: Don't invalidate FROM_DEVICE buffers at start of DMA transfer") Signed-off-by: Charan Teja Kalla Signed-off-by: Shiraz Hashim Signed-off-by: Prakash Gupta Signed-off-by: Pavankumar Kondeti --- arch/arm64/mm/cache.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7b8158ae36ec..83a0b6f9960b 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -228,6 +228,8 @@ SYM_FUNC_END_PI(__dma_flush_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_map_area) + cmp w2, #DMA_FROM_DEVICE + b.eq __dma_flush_area b __dma_clean_area SYM_FUNC_END_PI(__dma_map_area) From 37725ca62ee25e8fd832a351879f73d447a30b58 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sun, 4 Sep 2022 12:31:15 -0700 Subject: [PATCH 115/186] UPSTREAM: HID: roccat: Fix use-after-free in roccat_read() [ Upstream commit cacdb14b1c8d3804a3a7d31773bc7569837b71a4 ] roccat_report_event() is responsible for registering roccat-related reports in struct roccat_device. int roccat_report_event(int minor, u8 const *data) { struct roccat_device *device; struct roccat_reader *reader; struct roccat_report *report; uint8_t *new_value; device = devices[minor]; new_value = kmemdup(data, device->report_size, GFP_ATOMIC); if (!new_value) return -ENOMEM; report = &device->cbuf[device->cbuf_end]; /* passing NULL is safe */ kfree(report->value); ... The registered report is stored in the struct roccat_device member "struct roccat_report cbuf[ROCCAT_CBUF_SIZE];". If more reports are received than the "ROCCAT_CBUF_SIZE" value, kfree() the saved report from cbuf[0] and allocates a new reprot. Since there is no lock when this kfree() is performed, kfree() can be performed even while reading the saved report. static ssize_t roccat_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct roccat_reader *reader = file->private_data; struct roccat_device *device = reader->device; struct roccat_report *report; ssize_t retval = 0, len; DECLARE_WAITQUEUE(wait, current); mutex_lock(&device->cbuf_lock); ... report = &device->cbuf[reader->cbuf_start]; /* * If report is larger than requested amount of data, rest of report * is lost! */ len = device->report_size > count ? count : device->report_size; if (copy_to_user(buffer, report->value, len)) { retval = -EFAULT; goto exit_unlock; } ... The roccat_read() function receives the device->cbuf report and delivers it to the user through copy_to_user(). If the N+ROCCAT_CBUF_SIZE th report is received while copying of the Nth report->value is in progress, the pointer that copy_to_user() is working on is kfree()ed and UAF read may occur. (race condition) Since the device node of this driver does not set separate permissions, this is not a security vulnerability, but because it is used for requesting screen display of profile or dpi settings, a user using the roccat device can apply udev to this device node or There is a possibility to use it by giving. Bug: 251067658 Signed-off-by: Hyunwoo Kim Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Lee Jones Change-Id: I90b24df9216ab87a4fec0ab06fa52e7b1eb97fd1 --- drivers/hid/hid-roccat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c index 26373b82fe81..6da80e442fdd 100644 --- a/drivers/hid/hid-roccat.c +++ b/drivers/hid/hid-roccat.c @@ -257,6 +257,8 @@ int roccat_report_event(int minor, u8 const *data) if (!new_value) return -ENOMEM; + mutex_lock(&device->cbuf_lock); + report = &device->cbuf[device->cbuf_end]; /* passing NULL is safe */ @@ -276,6 +278,8 @@ int roccat_report_event(int minor, u8 const *data) reader->cbuf_start = (reader->cbuf_start + 1) % ROCCAT_CBUF_SIZE; } + mutex_unlock(&device->cbuf_lock); + wake_up_interruptible(&device->wait); return 0; } From 01b3d953ac01b53fd811f0a57e4c46f016cf5002 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 25 Apr 2022 18:18:06 +0800 Subject: [PATCH 116/186] BACKPORT: UPSTREAM: usb: typec: ucsi: Wait for the USB role switches When role switch module probe late than ucsi module, fwnode_usb_role_switch_get() will return -EPROBE_DEFER, it is better to restart ucsi init work to find it again every 100ms, total wait time is 10 second. It also means change ucsi init work to delayed_work. Reviewed-by: Heikki Krogerus Signed-off-by: Linyu Yuan Link: https://lore.kernel.org/r/1650881886-25530-3-git-send-email-quic_linyyuan@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3c162511530c234f95091bdc7225f641e5f35090 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git) Bug: 260537721 [ugoswami: Fixed KMI breakage by moving the delayed_work & work_count to a new parent structure] Change-Id: Ia4e2521c40b9b0ffe8b6f13ec19990e7f3a60e7a Signed-off-by: Udipto Goswami --- drivers/usb/typec/ucsi/ucsi.c | 52 +++++++++++++++++++++++------------ drivers/usb/typec/ucsi/ucsi.h | 22 +++++++++++++++ 2 files changed, 57 insertions(+), 17 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f253310a92b4..516d00affa82 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1061,6 +1061,15 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) con->num = index + 1; con->ucsi = ucsi; + cap->fwnode = ucsi_find_fwnode(con); + con->usb_role_sw = fwnode_usb_role_switch_get(cap->fwnode); + if (IS_ERR(con->usb_role_sw)) { + dev_err(ucsi->dev, "con%d: failed to get usb role switch\n", + con->num); + return PTR_ERR(con->usb_role_sw); + } + + /* Delay other interactions with the con until registration is complete */ mutex_lock(&con->lock); @@ -1096,7 +1105,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) if (con->cap.op_mode & UCSI_CONCAP_OPMODE_DEBUG_ACCESSORY) *accessory = TYPEC_ACCESSORY_DEBUG; - cap->fwnode = ucsi_find_fwnode(con); cap->driver_data = con; cap->ops = &ucsi_ops; @@ -1154,13 +1162,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) ucsi_port_psy_changed(con); } - con->usb_role_sw = fwnode_usb_role_switch_get(cap->fwnode); - if (IS_ERR(con->usb_role_sw)) { - dev_err(ucsi->dev, "con%d: failed to get usb role switch\n", - con->num); - con->usb_role_sw = NULL; - } - /* Only notify USB controller if partner supports USB data */ if (!(UCSI_CONSTAT_PARTNER_FLAGS(con->status.flags) & UCSI_CONSTAT_PARTNER_FLAG_USB)) u_role = USB_ROLE_NONE; @@ -1273,12 +1274,21 @@ err: static void ucsi_init_work(struct work_struct *work) { - struct ucsi *ucsi = container_of(work, struct ucsi, work); + struct ucsi_android *aucsi = container_of(work, + struct ucsi_android, work.work); int ret; - ret = ucsi_init(ucsi); + ret = ucsi_init(&aucsi->ucsi); if (ret) - dev_err(ucsi->dev, "PPM init failed (%d)\n", ret); + dev_err(aucsi->ucsi.dev, "PPM init failed (%d)\n", ret); + + if (ret == -EPROBE_DEFER) { + if (aucsi->work_count++ > UCSI_ROLE_SWITCH_WAIT_COUNT) + return; + + queue_delayed_work(system_long_wq, &aucsi->work, + UCSI_ROLE_SWITCH_INTERVAL); + } } /** @@ -1310,15 +1320,17 @@ EXPORT_SYMBOL_GPL(ucsi_set_drvdata); struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops) { struct ucsi *ucsi; + struct ucsi_android *aucsi; if (!ops || !ops->read || !ops->sync_write || !ops->async_write) return ERR_PTR(-EINVAL); - ucsi = kzalloc(sizeof(*ucsi), GFP_KERNEL); - if (!ucsi) + aucsi = kzalloc(sizeof(*aucsi), GFP_KERNEL); + if (!aucsi) return ERR_PTR(-ENOMEM); - INIT_WORK(&ucsi->work, ucsi_init_work); + ucsi = &aucsi->ucsi; + INIT_DELAYED_WORK(&aucsi->work, ucsi_init_work); mutex_init(&ucsi->ppm_lock); ucsi->dev = dev; ucsi->ops = ops; @@ -1333,7 +1345,9 @@ EXPORT_SYMBOL_GPL(ucsi_create); */ void ucsi_destroy(struct ucsi *ucsi) { - kfree(ucsi); + struct ucsi_android *aucsi = container_of(ucsi, + struct ucsi_android, ucsi); + kfree(aucsi); } EXPORT_SYMBOL_GPL(ucsi_destroy); @@ -1343,6 +1357,8 @@ EXPORT_SYMBOL_GPL(ucsi_destroy); */ int ucsi_register(struct ucsi *ucsi) { + struct ucsi_android *aucsi = container_of(ucsi, + struct ucsi_android, ucsi); int ret; ret = ucsi->ops->read(ucsi, UCSI_VERSION, &ucsi->version, @@ -1353,7 +1369,7 @@ int ucsi_register(struct ucsi *ucsi) if (!ucsi->version) return -ENODEV; - queue_work(system_long_wq, &ucsi->work); + queue_delayed_work(system_long_wq, &aucsi->work, 0); return 0; } @@ -1367,11 +1383,13 @@ EXPORT_SYMBOL_GPL(ucsi_register); */ void ucsi_unregister(struct ucsi *ucsi) { + struct ucsi_android *aucsi = container_of(ucsi, + struct ucsi_android, ucsi); u64 cmd = UCSI_SET_NOTIFICATION_ENABLE; int i; /* Make sure that we are not in the middle of driver initialization */ - cancel_work_sync(&ucsi->work); + cancel_delayed_work_sync(&aucsi->work); /* Disable notifications */ ucsi->ops->async_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd)); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index b3450d87f324..d366ab2add37 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -289,6 +289,9 @@ struct ucsi { struct ucsi_connector *connector; struct work_struct work; +#define UCSI_ROLE_SWITCH_RETRY_PER_HZ 10 +#define UCSI_ROLE_SWITCH_INTERVAL (HZ / UCSI_ROLE_SWITCH_RETRY_PER_HZ) +#define UCSI_ROLE_SWITCH_WAIT_COUNT (10 * UCSI_ROLE_SWITCH_RETRY_PER_HZ) /* PPM Communication lock */ struct mutex ppm_lock; @@ -304,6 +307,25 @@ struct ucsi { #define EVENT_PROCESSING 3 }; +/** + * struct ucsi_android - contains parameters without modifying the format + * of ucsi struct. + * @ucsi: contains the ucsi reference. + * @work: work structure for queuing ucsi_init_work. + * @work_count: to track the wait count(MAX= UCSI_ROLE_SWITCH_WAIT_COUNT). + * + * Required to address Bug: 260537721 + * If the role switch module probes late the + * fwnode_usb_role_switch_get() will fail with -EPROBE_DEFER. + * To recover from this, restart the ucsi_init_work + * to find the fwnode again using a delayed workqueue. + */ +struct ucsi_android { + struct ucsi ucsi; + struct delayed_work work; + int work_count; +}; + #define UCSI_MAX_SVID 5 #define UCSI_MAX_ALTMODES (UCSI_MAX_SVID * 6) From 98671fcc32490eeb4eaec8fc2380bdc42ffb428e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 12 Sep 2022 14:38:55 +0100 Subject: [PATCH 117/186] UPSTREAM: bpf: Ensure correct locking around vulnerable function find_vpid() [ Upstream commit 83c10cc362d91c0d8d25e60779ee52fdbbf3894d ] The documentation for find_vpid() clearly states: "Must be called with the tasklist_lock or rcu_read_lock() held." Presently we do neither for find_vpid() instance in bpf_task_fd_query(). Add proper rcu_read_lock/unlock() to fix the issue. Bug: 232939090 Fixes: 41bdc4b40ed6f ("bpf: introduce bpf subcommand BPF_TASK_FD_QUERY") Signed-off-by: Lee Jones Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220912133855.1218900-1-lee@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Lee Jones Change-Id: I517b52d97a22b92be1a479279067ddb7b84efbba --- kernel/bpf/syscall.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0c45e247f5e1..589332936c53 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3917,7 +3917,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (attr->task_fd_query.flags != 0) return -EINVAL; + rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); + rcu_read_unlock(); if (!task) return -ENOENT; From e2a4080d0429343560249dcf84bb2bb535f752a7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 25 Nov 2022 12:07:50 +0000 Subject: [PATCH 118/186] BACKPORT: Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 152fe65f300e1819d59b80477d3e0999b4d5d7d2 ] When enabled, KASAN enlarges function's stack-frames. Pushing quite a few over the current threshold. This can mainly be seen on 32-bit architectures where the present limit (when !GCC) is a lowly 1024-Bytes. Bug: 261962742 Link: https://lkml.kernel.org/r/20221125120750.3537134-3-lee@kernel.org Signed-off-by: Lee Jones Acked-by: Arnd Bergmann Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Harry Wentland Cc: Leo Li Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Thomas Zimmermann Cc: Tom Rix Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin Change-Id: I505a5187220b426fe49c0f15bf1704198082f63d Signed-off-by: Lee Jones --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d0740234b87a..c21eaff38d92 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -298,6 +298,7 @@ config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 default 2048 if GCC_PLUGIN_LATENT_ENTROPY + default 1280 if KASAN && !64BIT default 1280 if (!64BIT && PARISC) default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT From eddb2f39cd5396c9feacda79e2ceadbdf714c7a4 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 25 Nov 2022 12:07:49 +0000 Subject: [PATCH 119/186] UPSTREAM: drm/amdgpu: temporarily disable broken Clang builds due to blown stack-frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6f6cb1714365a07dbc66851879538df9f6969288 upstream. Patch series "Fix a bunch of allmodconfig errors", v2. Since b339ec9c229aa ("kbuild: Only default to -Werror if COMPILE_TEST") WERROR now defaults to COMPILE_TEST meaning that it's enabled for allmodconfig builds. This leads to some interesting build failures when using Clang, each resolved in this set. With this set applied, I am able to obtain a successful allmodconfig Arm build. This patch (of 2): calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) architectures built with Clang (all released versions), whereby the stack frame gets blown up to well over 5k. This would cause an immediate kernel panic on most architectures. We'll revert this when the following bug report has been resolved: https://github.com/llvm/llvm-project/issues/41896. Bug: 261962742 Link: https://lkml.kernel.org/r/20221125120750.3537134-1-lee@kernel.org Link: https://lkml.kernel.org/r/20221125120750.3537134-2-lee@kernel.org Signed-off-by: Lee Jones Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Harry Wentland Cc: Lee Jones Cc: Leo Li Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Thomas Zimmermann Cc: Tom Rix Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lee Jones Change-Id: Iaa42b18cdcf9fe23d740c036371bd7950d431e14 Signed-off-by: Lee Jones --- drivers/gpu/drm/amd/display/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index f3274eb6b341..6c4cba09d23b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help From ce18af9b5d7d0baad2ac3eea4c732d2bf128d690 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Thu, 8 Dec 2022 16:16:37 +0530 Subject: [PATCH 120/186] ANDROID: dma-buf: don't re-purpose kobject as work_struct The commit 5aec776ef8c9 ("BACKPORT: ANDROID: dma-buf: Move sysfs work out of DMA-BUF export path) re-purposed kobject as work_struct temporarily to create the sysfs entries asynchronously. The author knows what he is doing and rightly added a build assert if kobject struct size is smaller than the work_struct size. We are hitting this build assert on a non-GKI platform where CONFIG_ANDROID_KABI_RESERVE is not set. Fix this problem by allocating a new union with dma_buf_sysfs_entry structure and temporary structure as members. We only end up allocating more memory (because of union) only when kobject size is smaller than work_struct which the original patch any way assumed would never be true. Bug: 261818147 Change-Id: Ifb089bf80d8a3a44ece9f05fc0b99ee76cb11645 Signed-off-by: Pavankumar Kondeti --- drivers/dma-buf/dma-buf-sysfs-stats.c | 44 +++++++++++++++------------ 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 7ae64cd3dbb8..5c8efa55e3aa 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -142,15 +142,21 @@ void dma_buf_uninit_sysfs_statistics(void) kset_unregister(dma_buf_stats_kset); } +struct dma_buf_create_sysfs_entry { + struct dma_buf *dmabuf; + struct work_struct work; +}; + +union dma_buf_create_sysfs_work_entry { + struct dma_buf_create_sysfs_entry create_entry; + struct dma_buf_sysfs_entry sysfs_entry; +}; + static void sysfs_add_workfn(struct work_struct *work) { - /* The ABI would have to change for this to be false, but let's be paranoid. */ - _Static_assert(sizeof(struct kobject) >= sizeof(struct work_struct), - "kobject is smaller than work_struct"); - - struct dma_buf_sysfs_entry *sysfs_entry = - container_of((struct kobject *)work, struct dma_buf_sysfs_entry, kobj); - struct dma_buf *dmabuf = sysfs_entry->dmabuf; + struct dma_buf_create_sysfs_entry *create_entry = + container_of(work, struct dma_buf_create_sysfs_entry, work); + struct dma_buf *dmabuf = create_entry->dmabuf; /* * A dmabuf is ref-counted via its file member. If this handler holds the only @@ -161,6 +167,7 @@ static void sysfs_add_workfn(struct work_struct *work) * is released, and that can't happen until the end of this function. */ if (file_count(dmabuf->file) > 1) { + dmabuf->sysfs_entry->dmabuf = dmabuf; /* * kobject_init_and_add expects kobject to be zero-filled, but we have populated it * to trigger this work function. @@ -185,8 +192,8 @@ static void sysfs_add_workfn(struct work_struct *work) int dma_buf_stats_setup(struct dma_buf *dmabuf) { - struct dma_buf_sysfs_entry *sysfs_entry; - struct work_struct *work; + struct dma_buf_create_sysfs_entry *create_entry; + union dma_buf_create_sysfs_work_entry *work_entry; if (!dmabuf || !dmabuf->file) return -EINVAL; @@ -196,21 +203,18 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf) return -EINVAL; } - sysfs_entry = kmalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); - if (!sysfs_entry) + work_entry = kmalloc(sizeof(union dma_buf_create_sysfs_work_entry), GFP_KERNEL); + if (!work_entry) return -ENOMEM; - sysfs_entry->dmabuf = dmabuf; - dmabuf->sysfs_entry = sysfs_entry; + dmabuf->sysfs_entry = &work_entry->sysfs_entry; - /* - * The use of kobj as a work_struct is an ugly hack - * to avoid an ABI break in this frozen kernel. - */ - work = (struct work_struct *)&dmabuf->sysfs_entry->kobj; - INIT_WORK(work, sysfs_add_workfn); + create_entry = &work_entry->create_entry; + create_entry->dmabuf = dmabuf; + + INIT_WORK(&create_entry->work, sysfs_add_workfn); get_dma_buf(dmabuf); /* This reference will be dropped in sysfs_add_workfn. */ - schedule_work(work); + schedule_work(&create_entry->work); return 0; } From 8ad88eae4b6914c512569b10c19e04754def1746 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Thu, 8 Dec 2022 15:05:08 +0530 Subject: [PATCH 121/186] ANDROID: dma-buf: Fix build breakage with !CONFIG_DMABUF_SYSFS_STATS The commit c5589c7eec41 ("ANDROID: dma-buf: Add vendor hook for deferred dmabuf sysfs stats release") introduced a build breakage on non-GKI targets which don't have CONFIG_DMABUF_SYSFS_STATS enabled. It is due to invisibility of struct dma_buf_sysfs_entry in the trace hook header file. We can get away with it by moving the header inclusion from trace hook header to vendor hooks driver. Bug: 261818075 Change-Id: Ibb79bd67c9f1b36fe2b5d569ab9369f376a78b77 Signed-off-by: Pavankumar Kondeti --- drivers/android/vendor_hooks.c | 1 + include/trace/hooks/dmabuf.h | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 12066949e275..91288f59a390 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -7,6 +7,7 @@ */ #ifndef __GENKSYMS__ +#include #include #endif diff --git a/include/trace/hooks/dmabuf.h b/include/trace/hooks/dmabuf.h index 8963742273bc..0182960efca2 100644 --- a/include/trace/hooks/dmabuf.h +++ b/include/trace/hooks/dmabuf.h @@ -11,13 +11,7 @@ #include -#ifdef __GENKSYMS__ struct dma_buf_sysfs_entry; -#else -/* struct dma_buf_sysfs_entry */ -#include -#endif - DECLARE_RESTRICTED_HOOK(android_rvh_dma_buf_stats_teardown, TP_PROTO(struct dma_buf_sysfs_entry *sysfs_entry, bool *skip_sysfs_release), TP_ARGS(sysfs_entry, skip_sysfs_release), 1); From d37e563bff54a64d9cb2a7402951a733ba1d1e49 Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Thu, 8 Dec 2022 15:30:21 -0600 Subject: [PATCH 122/186] ANDROID: usb: gadget: uvc: remove duplicate code in unbind The uvc_function_unbind() was calling the same code two times, increasing a timeout that may occur. The duplicate code looks to have come in during the merge of 5.10.117. Remove the duplicate code. Bug: 261895714 Change-Id: I8957048bfad4a9e01baea033de9b628362b2d991 Signed-off-by: Dan Vacura --- drivers/usb/gadget/function/f_uvc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 607ea0c1bfb3..1fc00cce83fb 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -906,18 +906,6 @@ static void uvc_function_unbind(struct usb_configuration *c, uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); } - /* If we know we're connected via v4l2, then there should be a cleanup - * of the device from userspace either via UVC_EVENT_DISCONNECT or - * though the video device removal uevent. Allow some time for the - * application to close out before things get deleted. - */ - if (uvc->func_connected) { - uvcg_dbg(f, "waiting for clean disconnect\n"); - wait_ret = wait_event_interruptible_timeout(uvc->func_connected_queue, - uvc->func_connected == false, msecs_to_jiffies(500)); - uvcg_dbg(f, "done waiting with ret: %ld\n", wait_ret); - } - device_remove_file(&uvc->vdev.dev, &dev_attr_function_name); video_unregister_device(&uvc->vdev); v4l2_device_unregister(&uvc->v4l2_dev); From 65654da06db8f6a7e88151240d354b233e086871 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Fri, 20 Aug 2021 10:26:24 +0200 Subject: [PATCH 123/186] UPSTREAM: Documentation: leds: standartizing LED names We have a list of valid functions, but LED names in sysfs are still far from being consistent. Create list of "well known" LED names so we nudge people towards using same LED names (except color) for same functionality. Signed-off-by: Pavel Machek Bug: 260685629 (cherry picked from commit 09f1273064eea23ec41fb206f6eccc2bf79d1fa1) Change-Id: Iea12a9c230d6cd072b0f4fd4e0c616348173dd53 Signed-off-by: Farid Chahla --- Documentation/leds/well-known-leds.txt | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/leds/well-known-leds.txt diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt new file mode 100644 index 000000000000..4a8b9dc4bf52 --- /dev/null +++ b/Documentation/leds/well-known-leds.txt @@ -0,0 +1,58 @@ +-*- org -*- + +It is somehow important to provide consistent interface to the +userland. LED devices have one problem there, and that is naming of +directories in /sys/class/leds. It would be nice if userland would +just know right "name" for given LED function, but situation got more +complex. + +Anyway, if backwards compatibility is not an issue, new code should +use one of the "good" names from this list, and you should extend the +list where applicable. + +Legacy names are listed, too; in case you are writing application that +wants to use particular feature, you should probe for good name, first, +but then try the legacy ones, too. + +Notice there's a list of functions in include/dt-bindings/leds/common.h . + +* Keyboards + +Good: "input*:*:capslock" +Good: "input*:*:scrolllock" +Good: "input*:*:numlock" +Legacy: "shift-key-light" (Motorola Droid 4, capslock) + +Set of common keyboard LEDs, going back to PC AT or so. + +Legacy: "tpacpi::thinklight" (IBM/Lenovo Thinkpads) +Legacy: "lp5523:kb{1,2,3,4,5,6}" (Nokia N900) + +Frontlight/backlight of main keyboard. + +Legacy: "button-backlight" (Motorola Droid 4) + +Some phones have touch buttons below screen; it is different from main +keyboard. And this is their backlight. + +* Sound subsystem + +Good: "platform:*:mute" +Good: "platform:*:micmute" + +LEDs on notebook body, indicating that sound input / output is muted. + +* System notification + +Legacy: "status-led:{red,green,blue}" (Motorola Droid 4) +Legacy: "lp5523:{r,g,b}" (Nokia N900) + +Phones usually have multi-color status LED. + +* Power management + +Good: "platform:*:charging" (allwinner sun50i) + +* Screen + +Good: ":backlight" (Motorola Droid 4) From e1cd3ffe478871deb44f1ce2fbe6a6c8e7e08ab3 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Wed, 8 Sep 2021 09:55:37 -0700 Subject: [PATCH 124/186] UPSTREAM: HID: playstation: expose DualSense lightbar through a multi-color LED. The DualSense lightbar has so far been supported, but it was not yet adjustable from user space. This patch exposes it through a multi-color LED. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit fc97b4d6a1a6d418fd4053fd7716eca746fdd163) Change-Id: I48204113da804b13ad5bed2f651a5826ab5a86f7 Signed-off-by: Farid Chahla --- drivers/hid/hid-playstation.c | 72 +++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index ab7c82c2e886..ff2fc315a89d 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -38,6 +40,7 @@ struct ps_device { uint8_t battery_capacity; int battery_status; + const char *input_dev_name; /* Name of primary input device. */ uint8_t mac_address[6]; /* Note: stored in little endian order. */ uint32_t hw_version; uint32_t fw_version; @@ -147,6 +150,7 @@ struct dualsense { uint8_t motor_right; /* RGB lightbar */ + struct led_classdev_mc lightbar; bool update_lightbar; uint8_t lightbar_red; uint8_t lightbar_green; @@ -288,6 +292,8 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = { {0, 0}, }; +static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue); + /* * Add a new ps_device to ps_devices if it doesn't exist. * Return error on duplicate device, which can happen if the same @@ -525,6 +531,45 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu return 0; } +/* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */ +static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev, + int (*brightness_set)(struct led_classdev *, enum led_brightness)) +{ + struct hid_device *hdev = ps_dev->hdev; + struct mc_subled *mc_led_info; + struct led_classdev *led_cdev; + int ret; + + mc_led_info = devm_kmalloc_array(&hdev->dev, 3, sizeof(*mc_led_info), + GFP_KERNEL | __GFP_ZERO); + if (!mc_led_info) + return -ENOMEM; + + mc_led_info[0].color_index = LED_COLOR_ID_RED; + mc_led_info[1].color_index = LED_COLOR_ID_GREEN; + mc_led_info[2].color_index = LED_COLOR_ID_BLUE; + + lightbar_mc_dev->subled_info = mc_led_info; + lightbar_mc_dev->num_colors = 3; + + led_cdev = &lightbar_mc_dev->led_cdev; + led_cdev->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s:rgb:indicator", + ps_dev->input_dev_name); + if (!led_cdev->name) + return -ENOMEM; + led_cdev->brightness = 255; + led_cdev->max_brightness = 255; + led_cdev->brightness_set_blocking = brightness_set; + + ret = devm_led_classdev_multicolor_register(&hdev->dev, lightbar_mc_dev); + if (ret < 0) { + hid_err(hdev, "Cannot register multicolor LED device\n"); + return ret; + } + + return 0; +} + static struct input_dev *ps_sensors_create(struct hid_device *hdev, int accel_range, int accel_res, int gyro_range, int gyro_res) { @@ -761,6 +806,22 @@ err_free: return ret; } +static int dualsense_lightbar_set_brightness(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(cdev); + struct dualsense *ds = container_of(mc_cdev, struct dualsense, lightbar); + uint8_t red, green, blue; + + led_mc_calc_color_components(mc_cdev, brightness); + red = mc_cdev->subled_info[0].brightness; + green = mc_cdev->subled_info[1].brightness; + blue = mc_cdev->subled_info[2].brightness; + + dualsense_set_lightbar(ds, red, green, blue); + return 0; +} + static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp, void *buf) { @@ -1106,10 +1167,14 @@ static int dualsense_reset_leds(struct dualsense *ds) static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue) { + unsigned long flags; + + spin_lock_irqsave(&ds->base.lock, flags); ds->update_lightbar = true; ds->lightbar_red = red; ds->lightbar_green = green; ds->lightbar_blue = blue; + spin_unlock_irqrestore(&ds->base.lock, flags); schedule_work(&ds->output_worker); } @@ -1196,6 +1261,8 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) ret = PTR_ERR(ds->gamepad); goto err; } + /* Use gamepad input device name as primary device name for e.g. LEDs */ + ps_dev->input_dev_name = dev_name(&ds->gamepad->dev); ds->sensors = ps_sensors_create(hdev, DS_ACC_RANGE, DS_ACC_RES_PER_G, DS_GYRO_RANGE, DS_GYRO_RES_PER_DEG_S); @@ -1223,6 +1290,11 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) if (ret) goto err; + ret = ps_lightbar_register(ps_dev, &ds->lightbar, dualsense_lightbar_set_brightness); + if (ret) + goto err; + + /* Set default lightbar color. */ dualsense_set_lightbar(ds, 0, 0, 128); /* blue */ ret = ps_device_set_player_id(ps_dev); From a70e598cef39bf4c2df855832e3d5e121b57341d Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Wed, 8 Sep 2021 09:55:38 -0700 Subject: [PATCH 125/186] UPSTREAM: leds: add new LED_FUNCTION_PLAYER for player LEDs for game controllers. Player LEDs are commonly found on game controllers from Nintendo and Sony to indicate a player ID across a number of LEDs. For example, "Player 2" might be indicated as "-x--" on a device with 4 LEDs where "x" means on. This patch introduces LED_FUNCTION_PLAYER1-5 defines to properly indicate player LEDs from the kernel. Until now there was no good standard, which resulted in inconsistent behavior across xpad, hid-sony, hid-wiimote and other drivers. Moving forward new drivers should use LED_FUNCTION_PLAYERx. Note: management of Player IDs is left to user space, though a kernel driver may pick a default value. Signed-off-by: Roderick Colenbrander Acked-by: Pavel Machek Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit 61177c088a57bed259122f3c7bc6d61984936a12) Change-Id: Ie1de4d66304bb25fc2c9fcdb1ec9b7589ad9e7ac Signed-off-by: Farid Chahla --- Documentation/leds/well-known-leds.txt | 14 ++++++++++++++ include/dt-bindings/leds/common.h | 7 +++++++ 2 files changed, 21 insertions(+) diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt index 4a8b9dc4bf52..2160382c86be 100644 --- a/Documentation/leds/well-known-leds.txt +++ b/Documentation/leds/well-known-leds.txt @@ -16,6 +16,20 @@ but then try the legacy ones, too. Notice there's a list of functions in include/dt-bindings/leds/common.h . +* Gamepads and joysticks + +Game controllers may feature LEDs to indicate a player number. This is commonly +used on game consoles in which multiple controllers can be connected to a system. +The "player LEDs" are then programmed with a pattern to indicate a particular +player. For example, a game controller with 4 LEDs, may be programmed with "x---" +to indicate player 1, "-x--" to indicate player 2 etcetera where "x" means on. +Input drivers can utilize the LED class to expose the individual player LEDs +of a game controller using the function "player". +Note: tracking and management of Player IDs is the responsibility of user space, +though drivers may pick a default value. + +Good: "input*:*:player-{1,2,3,4,5} + * Keyboards Good: "input*:*:capslock" diff --git a/include/dt-bindings/leds/common.h b/include/dt-bindings/leds/common.h index 52b619d44ba2..3be89a7c20a9 100644 --- a/include/dt-bindings/leds/common.h +++ b/include/dt-bindings/leds/common.h @@ -60,6 +60,13 @@ #define LED_FUNCTION_MICMUTE "micmute" #define LED_FUNCTION_MUTE "mute" +/* Used for player LEDs as found on game controllers from e.g. Nintendo, Sony. */ +#define LED_FUNCTION_PLAYER1 "player-1" +#define LED_FUNCTION_PLAYER2 "player-2" +#define LED_FUNCTION_PLAYER3 "player-3" +#define LED_FUNCTION_PLAYER4 "player-4" +#define LED_FUNCTION_PLAYER5 "player-5" + /* Miscelleaus functions. Use functions above if you can. */ #define LED_FUNCTION_ACTIVITY "activity" #define LED_FUNCTION_ALARM "alarm" From f7901b46a2a79bcf10e46384ca32f871f8b3ea01 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Wed, 8 Sep 2021 09:55:39 -0700 Subject: [PATCH 126/186] UPSTREAM: HID: playstation: expose DualSense player LEDs through LED class. The DualSense player LEDs were so far not adjustable from user-space. This patch exposes each LED individually through the LED class. Each LED uses the new 'player' function resulting in a name like: 'inputX:white:player-1' for the first LED. Signed-off-by: Roderick Colenbrander Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit 8c0ab553b072025530308f74b2c0223ec50dffe5) Change-Id: I49c699a99b0b8a7bb7980560e3ea7a12faf646aa Signed-off-by: Farid Chahla --- drivers/hid/hid-playstation.c | 85 ++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index ff2fc315a89d..5cdfa71d1563 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -56,6 +56,13 @@ struct ps_calibration_data { int sens_denom; }; +struct ps_led_info { + const char *name; + const char *color; + enum led_brightness (*brightness_get)(struct led_classdev *cdev); + int (*brightness_set)(struct led_classdev *cdev, enum led_brightness); +}; + /* Seed values for DualShock4 / DualSense CRC32 for different report types. */ #define PS_INPUT_CRC32_SEED 0xA1 #define PS_OUTPUT_CRC32_SEED 0xA2 @@ -531,6 +538,32 @@ static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *bu return 0; } +static int ps_led_register(struct ps_device *ps_dev, struct led_classdev *led, + const struct ps_led_info *led_info) +{ + int ret; + + led->name = devm_kasprintf(&ps_dev->hdev->dev, GFP_KERNEL, + "%s:%s:%s", ps_dev->input_dev_name, led_info->color, led_info->name); + + if (!led->name) + return -ENOMEM; + + led->brightness = 0; + led->max_brightness = 1; + led->flags = LED_CORE_SUSPENDRESUME; + led->brightness_get = led_info->brightness_get; + led->brightness_set_blocking = led_info->brightness_set; + + ret = devm_led_classdev_register(&ps_dev->hdev->dev, led); + if (ret) { + hid_err(ps_dev->hdev, "Failed to register LED %s: %d\n", led_info->name, ret); + return ret; + } + + return 0; +} + /* Register a DualSense/DualShock4 RGB lightbar represented by a multicolor LED. */ static int ps_lightbar_register(struct ps_device *ps_dev, struct led_classdev_mc *lightbar_mc_dev, int (*brightness_set)(struct led_classdev *, enum led_brightness)) @@ -822,6 +855,35 @@ static int dualsense_lightbar_set_brightness(struct led_classdev *cdev, return 0; } +static enum led_brightness dualsense_player_led_get_brightness(struct led_classdev *led) +{ + struct hid_device *hdev = to_hid_device(led->dev->parent); + struct dualsense *ds = hid_get_drvdata(hdev); + + return !!(ds->player_leds_state & BIT(led - ds->player_leds)); +} + +static int dualsense_player_led_set_brightness(struct led_classdev *led, enum led_brightness value) +{ + struct hid_device *hdev = to_hid_device(led->dev->parent); + struct dualsense *ds = hid_get_drvdata(hdev); + unsigned long flags; + unsigned int led_index; + + spin_lock_irqsave(&ds->base.lock, flags); + + led_index = led - ds->player_leds; + if (value == LED_OFF) + ds->player_leds_state &= ~BIT(led_index); + else + ds->player_leds_state |= BIT(led_index); + + ds->update_player_leds = true; + spin_unlock_irqrestore(&ds->base.lock, flags); + + schedule_work(&ds->output_worker); +} + static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp, void *buf) { @@ -1207,7 +1269,20 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) struct dualsense *ds; struct ps_device *ps_dev; uint8_t max_output_report_size; - int ret; + int i, ret; + + static const struct ps_led_info player_leds_info[] = { + { LED_FUNCTION_PLAYER1, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER2, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER3, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER4, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness }, + { LED_FUNCTION_PLAYER5, "white", dualsense_player_led_get_brightness, + dualsense_player_led_set_brightness } + }; ds = devm_kzalloc(&hdev->dev, sizeof(*ds), GFP_KERNEL); if (!ds) @@ -1297,6 +1372,14 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) /* Set default lightbar color. */ dualsense_set_lightbar(ds, 0, 0, 128); /* blue */ + for (i = 0; i < ARRAY_SIZE(player_leds_info); i++) { + const struct ps_led_info *led_info = &player_leds_info[i]; + + ret = ps_led_register(ps_dev, &ds->player_leds[i], led_info); + if (ret < 0) + goto err; + } + ret = ps_device_set_player_id(ps_dev); if (ret) { hid_err(hdev, "Failed to assign player id for DualSense: %d\n", ret); From 62964653b74c78ca511bba427fa3cd7268b542a3 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 27 Oct 2021 10:04:10 +0200 Subject: [PATCH 127/186] UPSTREAM: HID: playstation: fix return from dualsense_player_led_set_brightness() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit brightness_set_blocking() callback expects function returning int. This fixes the follwoing build failure: drivers/hid/hid-playstation.c: In function ‘dualsense_player_led_set_brightness’: drivers/hid/hid-playstation.c:885:1: error: no return statement in function returning non-void [-Werror=return-type] } ^ Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit 3c92cb4cb60c71b574e47108ead8b6f0470850db) Change-Id: Id16b960826a26ac22c1a14572444f9af29689ed6 Signed-off-by: Farid Chahla --- drivers/hid/hid-playstation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 5cdfa71d1563..b1b5721b5d8f 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -882,6 +882,8 @@ static int dualsense_player_led_set_brightness(struct led_classdev *led, enum le spin_unlock_irqrestore(&ds->base.lock, flags); schedule_work(&ds->output_worker); + + return 0; } static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp, From a301358cb5eac73b8857d50ab977e46625fd29c8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 4 Aug 2022 13:30:52 +0200 Subject: [PATCH 128/186] UPSTREAM: HID: playstation: convert to use dev_groups There is no need for a driver to individually add/create device groups, the driver core will do it automatically for you. Convert the hid-playstation driver to use the dev_groups pointer instead of manually calling the driver core to create the group and have it be cleaned up later on by the devm core. Cc: Roderick Colenbrander Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Acked-by: Roderick Colenbrander Signed-off-by: Jiri Kosina Bug: 260685629 (cherry picked from commit b4a9af9be628e4f9d09997e0bdef30f6718e88ec) Change-Id: I516a1b0ef7f4f8545e0c1b9485b49879dd7a3136 Signed-off-by: Farid Chahla --- drivers/hid/hid-playstation.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index b1b5721b5d8f..40050eb85c0a 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -692,15 +692,12 @@ static ssize_t hardware_version_show(struct device *dev, static DEVICE_ATTR_RO(hardware_version); -static struct attribute *ps_device_attributes[] = { +static struct attribute *ps_device_attrs[] = { &dev_attr_firmware_version.attr, &dev_attr_hardware_version.attr, NULL }; - -static const struct attribute_group ps_device_attribute_group = { - .attrs = ps_device_attributes, -}; +ATTRIBUTE_GROUPS(ps_device); static int dualsense_get_calibration_data(struct dualsense *ds) { @@ -1448,12 +1445,6 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id) } } - ret = devm_device_add_group(&hdev->dev, &ps_device_attribute_group); - if (ret) { - hid_err(hdev, "Failed to register sysfs nodes.\n"); - goto err_close; - } - return ret; err_close: @@ -1487,6 +1478,9 @@ static struct hid_driver ps_driver = { .probe = ps_probe, .remove = ps_remove, .raw_event = ps_raw_event, + .driver = { + .dev_groups = ps_device_groups, + }, }; static int __init ps_init(void) From a3ea8fbc1fa4bc77c45bed8e65fb052b76fab1b0 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Mon, 10 Oct 2022 14:23:11 -0700 Subject: [PATCH 129/186] UPSTREAM: HID: playstation: stop DualSense output work on remove. Ensure we don't schedule any new output work on removal and wait for any existing work to complete. If we don't do this e.g. rumble work can get queued during deletion and we trigger a kernel crash. Signed-off-by: Roderick Colenbrander CC: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20221010212313.78275-2-roderick.colenbrander@sony.com Bug: 260685629 (cherry picked from commit 182934a1e93b17f4edf71f4fcc8d19b19a6fe67a) Change-Id: I40cadfde5765cdabf45def929860258d6019bf10 Signed-off-by: Farid Chahla --- drivers/hid/hid-playstation.c | 41 ++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 40050eb85c0a..d727cd2bf44e 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -46,6 +46,7 @@ struct ps_device { uint32_t fw_version; int (*parse_report)(struct ps_device *dev, struct hid_report *report, u8 *data, int size); + void (*remove)(struct ps_device *dev); }; /* Calibration data for playstation motion sensors. */ @@ -174,6 +175,7 @@ struct dualsense { struct led_classdev player_leds[5]; struct work_struct output_worker; + bool output_worker_initialized; void *output_report_dmabuf; uint8_t output_seq; /* Sequence number for output report. */ }; @@ -299,6 +301,7 @@ static const struct {int x; int y; } ps_gamepad_hat_mapping[] = { {0, 0}, }; +static inline void dualsense_schedule_work(struct dualsense *ds); static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue); /* @@ -789,6 +792,7 @@ err_free: return ret; } + static int dualsense_get_firmware_info(struct dualsense *ds) { uint8_t *buf; @@ -878,7 +882,7 @@ static int dualsense_player_led_set_brightness(struct led_classdev *led, enum le ds->update_player_leds = true; spin_unlock_irqrestore(&ds->base.lock, flags); - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); return 0; } @@ -922,6 +926,16 @@ static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_ } } +static inline void dualsense_schedule_work(struct dualsense *ds) +{ + unsigned long flags; + + spin_lock_irqsave(&ds->base.lock, flags); + if (ds->output_worker_initialized) + schedule_work(&ds->output_worker); + spin_unlock_irqrestore(&ds->base.lock, flags); +} + /* * Helper function to send DualSense output reports. Applies a CRC at the end of a report * for Bluetooth reports. @@ -1082,7 +1096,7 @@ static int dualsense_parse_report(struct ps_device *ps_dev, struct hid_report *r spin_unlock_irqrestore(&ps_dev->lock, flags); /* Schedule updating of microphone state at hardware level. */ - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); } ds->last_btn_mic_state = btn_mic_state; @@ -1197,10 +1211,22 @@ static int dualsense_play_effect(struct input_dev *dev, void *data, struct ff_ef ds->motor_right = effect->u.rumble.weak_magnitude / 256; spin_unlock_irqrestore(&ds->base.lock, flags); - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); return 0; } +static void dualsense_remove(struct ps_device *ps_dev) +{ + struct dualsense *ds = container_of(ps_dev, struct dualsense, base); + unsigned long flags; + + spin_lock_irqsave(&ds->base.lock, flags); + ds->output_worker_initialized = false; + spin_unlock_irqrestore(&ds->base.lock, flags); + + cancel_work_sync(&ds->output_worker); +} + static int dualsense_reset_leds(struct dualsense *ds) { struct dualsense_output_report report; @@ -1237,7 +1263,7 @@ static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t gr ds->lightbar_blue = blue; spin_unlock_irqrestore(&ds->base.lock, flags); - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); } static void dualsense_set_player_leds(struct dualsense *ds) @@ -1260,7 +1286,7 @@ static void dualsense_set_player_leds(struct dualsense *ds) ds->update_player_leds = true; ds->player_leds_state = player_ids[player_id]; - schedule_work(&ds->output_worker); + dualsense_schedule_work(ds); } static struct ps_device *dualsense_create(struct hid_device *hdev) @@ -1299,7 +1325,9 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) ps_dev->battery_capacity = 100; /* initial value until parse_report. */ ps_dev->battery_status = POWER_SUPPLY_STATUS_UNKNOWN; ps_dev->parse_report = dualsense_parse_report; + ps_dev->remove = dualsense_remove; INIT_WORK(&ds->output_worker, dualsense_output_worker); + ds->output_worker_initialized = true; hid_set_drvdata(hdev, ds); max_output_report_size = sizeof(struct dualsense_output_report_bt); @@ -1461,6 +1489,9 @@ static void ps_remove(struct hid_device *hdev) ps_devices_list_remove(dev); ps_device_release_player_id(dev); + if (dev->remove) + dev->remove(dev); + hid_hw_close(hdev); hid_hw_stop(hdev); } From 63b2567f9de8f1b3b2b33a6af647e571e72a96a1 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Mon, 10 Oct 2022 14:23:12 -0700 Subject: [PATCH 130/186] UPSTREAM: HID: playstation: add initial DualSense Edge controller support Provide initial support for the DualSense Edge controller. The brings support up to the level of the original DualSense, but won't yet provide support for new features (e.g. reprogrammable buttons). Signed-off-by: Roderick Colenbrander CC: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20221010212313.78275-3-roderick.colenbrander@sony.com Bug: 260685629 (cherry picked from commit b8a968efab301743fd659b5649c5d7d3e30e63a6) Change-Id: I5b95de806e823085d1144f016d8cfd76e4a933ef Signed-off-by: Farid Chahla --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-playstation.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 07ee87f828e4..6be1658f7f8b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1087,6 +1087,7 @@ #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2 0x09cc #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE 0x0ba0 #define USB_DEVICE_ID_SONY_PS5_CONTROLLER 0x0ce6 +#define USB_DEVICE_ID_SONY_PS5_CONTROLLER_2 0x0df2 #define USB_DEVICE_ID_SONY_MOTION_CONTROLLER 0x03d5 #define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER 0x042f #define USB_DEVICE_ID_SONY_BUZZ_CONTROLLER 0x0002 diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index d727cd2bf44e..396356b6760a 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -1464,7 +1464,8 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id) goto err_stop; } - if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) { + if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER || + hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) { dev = dualsense_create(hdev); if (IS_ERR(dev)) { hid_err(hdev, "Failed to create dualsense.\n"); @@ -1499,6 +1500,8 @@ static void ps_remove(struct hid_device *hdev) static const struct hid_device_id ps_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) }, { } }; MODULE_DEVICE_TABLE(hid, ps_devices); From 4aa3cab588aa55d4e847755505d039c4ce2b056e Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Mon, 10 Oct 2022 14:23:13 -0700 Subject: [PATCH 131/186] UPSTREAM: HID: playstation: support updated DualSense rumble mode. Newer DualSense firmware supports a revised classic rumble mode, which feels more similar to rumble as supported on previous PlayStation controllers. It has been made the default on PlayStation and non-PlayStation devices now (e.g. iOS and Windows). Default to this new mode when supported. Signed-off-by: Roderick Colenbrander Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20221010212313.78275-4-roderick.colenbrander@sony.com Bug: 260685629 (cherry picked from commit 9fecab247ed15e6145c126fc56ee1e89860741a7) Change-Id: Icd330111a4d1b1e76a04cd11c623d0982ce3d66f Signed-off-by: Farid Chahla --- drivers/hid/hid-playstation.c | 37 ++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c index 396356b6760a..0b58763bfd30 100644 --- a/drivers/hid/hid-playstation.c +++ b/drivers/hid/hid-playstation.c @@ -108,6 +108,9 @@ struct ps_led_info { #define DS_STATUS_CHARGING GENMASK(7, 4) #define DS_STATUS_CHARGING_SHIFT 4 +/* Feature version from DualSense Firmware Info report. */ +#define DS_FEATURE_VERSION(major, minor) ((major & 0xff) << 8 | (minor & 0xff)) + /* * Status of a DualSense touch point contact. * Contact IDs, with highest bit set are 'inactive' @@ -126,6 +129,7 @@ struct ps_led_info { #define DS_OUTPUT_VALID_FLAG1_RELEASE_LEDS BIT(3) #define DS_OUTPUT_VALID_FLAG1_PLAYER_INDICATOR_CONTROL_ENABLE BIT(4) #define DS_OUTPUT_VALID_FLAG2_LIGHTBAR_SETUP_CONTROL_ENABLE BIT(1) +#define DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2 BIT(2) #define DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE BIT(4) #define DS_OUTPUT_LIGHTBAR_SETUP_LIGHT_OUT BIT(1) @@ -143,6 +147,9 @@ struct dualsense { struct input_dev *sensors; struct input_dev *touchpad; + /* Update version is used as a feature/capability version. */ + uint16_t update_version; + /* Calibration data for accelerometer and gyroscope. */ struct ps_calibration_data accel_calib_data[3]; struct ps_calibration_data gyro_calib_data[3]; @@ -153,6 +160,7 @@ struct dualsense { uint32_t sensor_timestamp_us; /* Compatible rumble state */ + bool use_vibration_v2; bool update_rumble; uint8_t motor_left; uint8_t motor_right; @@ -812,6 +820,15 @@ static int dualsense_get_firmware_info(struct dualsense *ds) ds->base.hw_version = get_unaligned_le32(&buf[24]); ds->base.fw_version = get_unaligned_le32(&buf[28]); + /* Update version is some kind of feature version. It is distinct from + * the firmware version as there can be many different variations of a + * controller over time with the same physical shell, but with different + * PCBs and other internal changes. The update version (internal name) is + * used as a means to detect what features are available and change behavior. + * Note: the version is different between DualSense and DualSense Edge. + */ + ds->update_version = get_unaligned_le16(&buf[44]); + err_free: kfree(buf); return ret; @@ -974,7 +991,10 @@ static void dualsense_output_worker(struct work_struct *work) if (ds->update_rumble) { /* Select classic rumble style haptics and enable it. */ common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_HAPTICS_SELECT; - common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION; + if (ds->use_vibration_v2) + common->valid_flag2 |= DS_OUTPUT_VALID_FLAG2_COMPATIBLE_VIBRATION2; + else + common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION; common->motor_left = ds->motor_left; common->motor_right = ds->motor_right; ds->update_rumble = false; @@ -1348,6 +1368,21 @@ static struct ps_device *dualsense_create(struct hid_device *hdev) return ERR_PTR(ret); } + /* Original DualSense firmware simulated classic controller rumble through + * its new haptics hardware. It felt different from classic rumble users + * were used to. Since then new firmwares were introduced to change behavior + * and make this new 'v2' behavior default on PlayStation and other platforms. + * The original DualSense requires a new enough firmware as bundled with PS5 + * software released in 2021. DualSense edge supports it out of the box. + * Both devices also support the old mode, but it is not really used. + */ + if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) { + /* Feature version 2.21 introduced new vibration method. */ + ds->use_vibration_v2 = ds->update_version >= DS_FEATURE_VERSION(2, 21); + } else if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) { + ds->use_vibration_v2 = true; + } + ret = ps_devices_list_add(ps_dev); if (ret) return ERR_PTR(ret); From 16c03440df4a1fc175b781ed7328d1af290b54e9 Mon Sep 17 00:00:00 2001 From: Farid Chahla Date: Wed, 14 Dec 2022 12:44:43 -0800 Subject: [PATCH 132/186] ANDROID: GKI: enable mulitcolor-led To enable newer version of DualSense driver, i.e. hid-playstation, we need to set LEDS_CLASS_MULTICOLOR to "y". Bug: 260685629 Change-Id: I52b0b1b6a061457e009b62a6bd6b66a91c8c37a2 Signed-off-by: Farid Chahla --- arch/arm64/configs/gki_defconfig | 1 + arch/x86/configs/gki_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index da8ab23b9ce5..33b4ea90b6dd 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -513,6 +513,7 @@ CONFIG_MMC_CRYPTO=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_LEDS_CLASS_FLASH=y +CONFIG_LEDS_CLASS_MULTICOLOR=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_EDAC=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 14ea69c0d417..47edba3df354 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -464,6 +464,7 @@ CONFIG_MMC_CRYPTO=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_LEDS_CLASS_FLASH=y +CONFIG_LEDS_CLASS_MULTICOLOR=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_TRANSIENT=y CONFIG_EDAC=y From 134c1aae4311b6fb9823722647ab9c4ab554a152 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 19 Dec 2022 21:07:49 -0800 Subject: [PATCH 133/186] ANDROID: Make SPF aware of fast mremaps SPF attempts page faults without taking the mmap lock, but takes the PTL. If there is a concurrent fast mremap (at PMD/PUD level), this can lead to a UAF as fast mremap will only take the PTL locks at the PMD/PUD level. SPF cannot take the PTL locks at the larger subtree granularity since this introduces much contention in the page fault paths. To address the race: 1) Fast mremaps wait until there are no users of the VMA. 2) Speculative faults detect ongoing fast mremaps and fallback to conventional fault handling (taking mmap read lock). Since this race condition is very rare the performance impact is negligible. Bug: 263177905 Change-Id: If9755aa4261337fe180e3093a3cefaae8ac9ff1a Signed-off-by: Kalesh Singh --- include/linux/mm.h | 3 ++ mm/mmap.c | 30 ++++++++++++-- mm/mremap.c | 100 ++++++++++++++++++++++++++++++++++++++------- 3 files changed, 116 insertions(+), 17 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index dfefcfa1d6a4..5de4309bfa14 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1758,6 +1758,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); #ifdef CONFIG_SPECULATIVE_PAGE_FAULT +extern wait_queue_head_t vma_users_wait; +extern atomic_t vma_user_waiters; + static inline void vm_write_begin(struct vm_area_struct *vma) { /* diff --git a/mm/mmap.c b/mm/mmap.c index fba57c628671..c3dfbfdb674a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -180,7 +180,17 @@ static void __free_vma(struct vm_area_struct *vma) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT void put_vma(struct vm_area_struct *vma) { - if (atomic_dec_and_test(&vma->vm_ref_count)) + int ref_count = atomic_dec_return(&vma->vm_ref_count); + + /* + * Implicit smp_mb due to atomic_dec_return. + * + * If this is the last reference, wake up the mremap waiter + * (if any). + */ + if (ref_count == 1 && unlikely(atomic_read(&vma_user_waiters) > 0)) + wake_up(&vma_users_wait); + else if (ref_count <= 0) __free_vma(vma); } #else @@ -2421,8 +2431,22 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr) read_lock(&mm->mm_rb_lock); vma = __find_vma(mm, addr); - if (vma) - atomic_inc(&vma->vm_ref_count); + + /* + * If there is a concurrent fast mremap, bail out since the entire + * PMD/PUD subtree may have been remapped. + * + * This is usually safe for conventional mremap since it takes the + * PTE locks as does SPF. However fast mremap only takes the lock + * at the PMD/PUD level which is ok as it is done with the mmap + * write lock held. But since SPF, as the term implies forgoes, + * taking the mmap read lock and also cannot take PTL lock at the + * larger PMD/PUD granualrity, since it would introduce huge + * contention in the page fault path; fall back to regular fault + * handling. + */ + if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count)) + vma = NULL; read_unlock(&mm->mm_rb_lock); return vma; diff --git a/mm/mremap.c b/mm/mremap.c index 5a18cec23fa7..0763b83ef779 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -210,17 +210,74 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +DECLARE_WAIT_QUEUE_HEAD(vma_users_wait); +atomic_t vma_user_waiters = ATOMIC_INIT(0); + +static inline void wait_for_vma_users(struct vm_area_struct *vma) +{ + /* + * If we have the only reference, swap the refcount to -1. This + * will prevent other concurrent references by get_vma() for SPFs. + */ + if (likely(atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1)) + return; + + /* Indicate we are waiting for other users of the VMA to finish. */ + atomic_inc(&vma_user_waiters); + + /* Failed atomic_cmpxchg; no implicit barrier, use an explicit one. */ + smp_mb(); + + /* + * Callers cannot handle failure, sleep uninterruptibly until there + * are no other users of this VMA. + * + * We don't need to worry about references from concurrent waiters, + * since this is only used in the context of fast mremaps, with + * exclusive mmap write lock held. + */ + wait_event(vma_users_wait, atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1); + + atomic_dec(&vma_user_waiters); +} + + /* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. + * Restore the VMA reference count to 1 after a fast mremap. */ -#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +static inline void restore_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * This should only be called after a corresponding, + * wait_for_vma_users() + */ + VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1, + vma); +} +#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */ +static inline void wait_for_vma_users(struct vm_area_struct *vma) +{ +} +static inline void restore_vma_ref_count(struct vm_area_struct *vma) +{ +} +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ + +#ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; struct mm_struct *mm = vma->vm_mm; pmd_t pmd; + bool ret; + + /* + * Wait for concurrent users, since these can potentially be + * speculative page faults. + */ + wait_for_vma_users(vma); /* * The destination pmd shouldn't be established, free_pgtables() @@ -245,8 +302,10 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, * One alternative might be to just unmap the target pmd at * this point, and verify that it really is empty. We'll see. */ - if (WARN_ON_ONCE(!pmd_none(*new_pmd))) - return false; + if (WARN_ON_ONCE(!pmd_none(*new_pmd))) { + ret = false; + goto out; + } /* * We don't have to worry about the ordering of src and dst @@ -270,7 +329,11 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); - return true; + ret = true; + +out: + restore_vma_ref_count(vma); + return ret; } #else static inline bool move_normal_pmd(struct vm_area_struct *vma, @@ -281,24 +344,29 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma, } #endif -/* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. - */ -#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +#ifdef CONFIG_HAVE_MOVE_PUD static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; struct mm_struct *mm = vma->vm_mm; pud_t pud; + bool ret; + + /* + * Wait for concurrent users, since these can potentially be + * speculative page faults. + */ + wait_for_vma_users(vma); /* * The destination pud shouldn't be established, free_pgtables() * should have released it. */ - if (WARN_ON_ONCE(!pud_none(*new_pud))) - return false; + if (WARN_ON_ONCE(!pud_none(*new_pud))) { + ret = false; + goto out; + } /* * We don't have to worry about the ordering of src and dst @@ -322,7 +390,11 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); - return true; + ret = true; + +out: + restore_vma_ref_count(vma); + return ret; } #else static inline bool move_normal_pud(struct vm_area_struct *vma, From 05a8f2c4d2f5b0664b97f3c426783cc26a946b3b Mon Sep 17 00:00:00 2001 From: Pradeep P V K Date: Mon, 6 Dec 2021 14:16:45 +0530 Subject: [PATCH 134/186] FROMLIST: fuse: give wakeup hints to the scheduler The synchronous wakeup interface is available only for the interruptible wakeup. Add it for normal wakeup and use this synchronous wakeup interface to wakeup the userspace daemon. Scheduler can make use of this hint to find a better CPU for the waker task. With this change the performance numbers for compress, decompress and copy use-cases on /sdcard path has improved by ~30%. Use-case details: 1. copy 10000 files of each 4k size into /sdcard path 2. use any File explorer application that has compress/decompress support 3. start compress/decompress and capture the time. ------------------------------------------------- | Default | wakeup support | Improvement/Diff | ------------------------------------------------- | 13.8 sec | 9.9 sec | 3.9 sec (28.26%) | ------------------------------------------------- Co-developed-by: Pavankumar Kondeti Signed-off-by: Pradeep P V K Bug: 216261533 Link: https://lore.kernel.org/lkml/1638780405-38026-1-git-send-email-quic_pragalla@quicinc.com/ Change-Id: I9ac89064e34b1e0605064bf4d2d3a310679cb605 Signed-off-by: Pradeep P V K Signed-off-by: Alessio Balsini (cherry picked from commit 30d72758dbe0e7fa9992f5d21ee8d23eec27934a) --- fs/fuse/dev.c | 21 ++++++++++++--------- fs/fuse/fuse_i.h | 6 +++--- fs/fuse/virtio_fs.c | 8 +++++--- include/linux/wait.h | 1 + 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e9a1543ba7cb..47eef2496230 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -208,10 +208,13 @@ static unsigned int fuse_req_hash(u64 unique) /** * A new request is available, wake fiq->waitq */ -static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) +static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock) { - wake_up(&fiq->waitq); + if (sync) + wake_up_sync(&fiq->waitq); + else + wake_up(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); spin_unlock(&fiq->lock); } @@ -224,14 +227,14 @@ const struct fuse_iqueue_ops fuse_dev_fiq_ops = { EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); static void queue_request_and_unlock(struct fuse_iqueue *fiq, - struct fuse_req *req) + struct fuse_req *req, bool sync) __releases(fiq->lock) { req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); list_add_tail(&req->list, &fiq->pending); - fiq->ops->wake_pending_and_unlock(fiq); + fiq->ops->wake_pending_and_unlock(fiq, sync); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, @@ -246,7 +249,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, if (fiq->connected) { fiq->forget_list_tail->next = forget; fiq->forget_list_tail = forget; - fiq->ops->wake_forget_and_unlock(fiq); + fiq->ops->wake_forget_and_unlock(fiq, false); } else { kfree(forget); spin_unlock(&fiq->lock); @@ -266,7 +269,7 @@ static void flush_bg_queue(struct fuse_conn *fc) fc->active_background++; spin_lock(&fiq->lock); req->in.h.unique = fuse_get_unique(fiq); - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, false); } } @@ -359,7 +362,7 @@ static int queue_interrupt(struct fuse_req *req) spin_unlock(&fiq->lock); return 0; } - fiq->ops->wake_interrupt_and_unlock(fiq); + fiq->ops->wake_interrupt_and_unlock(fiq, false); } else { spin_unlock(&fiq->lock); } @@ -426,7 +429,7 @@ static void __fuse_request_send(struct fuse_req *req) /* acquire extra reference, since request is still needed after fuse_request_end() */ __fuse_get_request(req); - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, true); request_wait_answer(req); /* Pairs with smp_wmb() in fuse_request_end() */ @@ -601,7 +604,7 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm, spin_lock(&fiq->lock); if (fiq->connected) { - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, false); } else { err = -ENODEV; spin_unlock(&fiq->lock); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 69a631c12b15..e20c341864b5 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -412,19 +412,19 @@ struct fuse_iqueue_ops { /** * Signal that a forget has been queued */ - void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** * Signal that an INTERRUPT request has been queued */ - void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** * Signal that a request has been queued */ - void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index b9cfb1165ff4..90a574ba92cf 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -971,7 +971,7 @@ static struct virtio_driver virtio_fs_driver = { #endif }; -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock) { struct fuse_forget_link *link; @@ -1006,7 +1006,8 @@ __releases(fiq->lock) kfree(link); } -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq, + bool sync) __releases(fiq->lock) { /* @@ -1221,7 +1222,8 @@ out: return ret; } -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq, + bool sync) __releases(fiq->lock) { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 1663e47681a3..e9966f3929f6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -219,6 +219,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE) +#define wake_up_sync(x) __wake_up_sync((x), TASK_NORMAL) /* * Wakeup macros to be used to report events to the targets. From 7bc2b8c400c0abae519fa73f0bed2fe8e844322e Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Wed, 18 May 2022 11:12:52 -0700 Subject: [PATCH 135/186] UPSTREAM: usb: dwc3: core: Add error log when core soft reset failed DWC3 controller soft reset is important operation for USB functionality. In case when it fails, currently there is no failure log. Hence add error log when core soft reset failed. Signed-off-by: Mayank Rana Signed-off-by: Greg Kroah-Hartman Bug: 235863377 (cherry picked from commit 859bdc359567f5fa8e8dc780d7b5e53ea43d9ce9) Change-Id: I60500f66af47d93cf9d60bdecab32e6dc48d4b7c Signed-off-by: Mayank Rana (cherry picked from commit d03bf01b43ff923e066938895a6867338778be7a) --- drivers/usb/dwc3/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 83a850038c51..40d358ca8b58 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -301,6 +301,7 @@ int dwc3_core_soft_reset(struct dwc3 *dwc) udelay(1); } while (--retries); + dev_warn(dwc->dev, "DWC3 controller soft reset failed.\n"); return -ETIMEDOUT; done: From 9e6fb5ac724218ac74f83762a4cc2e34e1158b21 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Sun, 18 Dec 2022 18:43:21 +0800 Subject: [PATCH 136/186] ANDROID: GKI: rockchip: Add symbol clk_hw_set_parent Leaf changes summary: 1 artifact changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 1 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 1 Added function: [A] 'function int clk_hw_set_parent(clk_hw*, clk_hw*)' Bug: 239396464 Signed-off-by: Kever Yang Change-Id: Id24cdc739a6254f7676a1f60fd8ecbd0066ca4b0 --- android/abi_gki_aarch64.xml | 6 ++++++ android/abi_gki_aarch64_rockchip | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index ba1c34e4205c..15fe4032346b 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -1173,6 +1173,7 @@ + @@ -122879,6 +122880,11 @@ + + + + + diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index c1df5fc9548f..6489a344910d 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -98,6 +98,7 @@ clk_hw_get_flags clk_hw_get_name clk_hw_get_parent + clk_hw_get_parent_by_index clk_hw_get_rate __clk_mux_determine_rate clk_notifier_register @@ -1596,7 +1597,6 @@ # required by clk-rockchip-regmap.ko clk_hw_get_num_parents - clk_hw_get_parent_by_index divider_recalc_rate divider_round_rate_parent @@ -1608,6 +1608,7 @@ __clk_get_hw clk_hw_register_composite clk_hw_round_rate + clk_hw_set_parent clk_mux_ops clk_mux_ro_ops clk_register_divider_table From a43cd1f2bb8e2a4b6b1943faccf05198afbbd9e9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 8 Dec 2022 17:26:26 +0000 Subject: [PATCH 137/186] Revert "Revert "ANDROID: vendor_hooks:vendor hook for __alloc_pages_slowpath."" This reverts commit cc51dcbc60c4492d68e3b075ff4d8bd61729dae4. Reason for revert: The vendor hooks were reverted but they are needed. Bug: 243629905 Signed-off-by: xiaofeng Signed-off-by: Suren Baghdasaryan Change-Id: I4b2eab1a9bf3bbbb200f9d09f2c57fb4d9f2c143 --- drivers/android/vendor_hooks.c | 2 ++ include/trace/hooks/mm.h | 8 ++++++++ mm/page_alloc.c | 11 +++++++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 91288f59a390..ebcc005090c1 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -465,6 +465,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_si); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_set_shmem_page_flag); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sched_pelt_multiplier); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_reclaim_bypass); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_failure_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_check_page_look_around_ref); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_look_around); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_look_around_migrate_page); diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index 3f32c876441f..a4b855e0a81a 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -287,6 +287,14 @@ DECLARE_HOOK(android_vh_set_shmem_page_flag, DECLARE_HOOK(android_vh_remove_vmalloc_stack, TP_PROTO(struct vm_struct *vm), TP_ARGS(vm)); +DECLARE_HOOK(android_vh_alloc_pages_reclaim_bypass, + TP_PROTO(gfp_t gfp_mask, int order, int alloc_flags, + int migratetype, struct page **page), + TP_ARGS(gfp_mask, order, alloc_flags, migratetype, page)); +DECLARE_HOOK(android_vh_alloc_pages_failure_bypass, + TP_PROTO(gfp_t gfp_mask, int order, int alloc_flags, + int migratetype, struct page **page), + TP_ARGS(gfp_mask, order, alloc_flags, migratetype, page)); DECLARE_HOOK(android_vh_test_clear_look_around_ref, TP_PROTO(struct page *page), TP_ARGS(page)); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 337fba577a09..18a969923841 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4956,6 +4956,12 @@ retry: if (current->flags & PF_MEMALLOC) goto nopage; + trace_android_vh_alloc_pages_reclaim_bypass(gfp_mask, order, + alloc_flags, ac->migratetype, &page); + + if (page) + goto got_pg; + /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, &did_some_progress); @@ -5071,6 +5077,11 @@ nopage: goto retry; } fail: + trace_android_vh_alloc_pages_failure_bypass(gfp_mask, order, + alloc_flags, ac->migratetype, &page); + if (page) + goto got_pg; + warn_alloc(gfp_mask, ac->nodemask, "page allocation failure: order:%u", order); got_pg: From f677efbea129421d02bbba6b5856985f451d08ed Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 14 Dec 2022 22:08:04 +0000 Subject: [PATCH 138/186] Revert "Revert "ANDROID: vendor_hooks:vendor hook for mmput"" This reverts commit 501063ce66dd386c16c47c463c8a3df0e810435f. Reason for revert: The vendor hook is actually needed by a partner Bug: 238821038 Signed-off-by: Suren Baghdasaryan Change-Id: I1c19add348792967975369a10ec9cb41fa268236 --- drivers/android/vendor_hooks.c | 1 + include/trace/hooks/sched.h | 4 ++++ kernel/fork.c | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index ebcc005090c1..3ff54d2aa04d 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -464,6 +464,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_alloc_si); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_si); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_free_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_set_shmem_page_flag); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_mmput); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sched_pelt_multiplier); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_reclaim_bypass); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_alloc_pages_failure_bypass); diff --git a/include/trace/hooks/sched.h b/include/trace/hooks/sched.h index 6488dee32a88..5f63128bb5c6 100644 --- a/include/trace/hooks/sched.h +++ b/include/trace/hooks/sched.h @@ -391,6 +391,10 @@ DECLARE_HOOK(android_vh_setscheduler_uclamp, TP_PROTO(struct task_struct *tsk, int clamp_id, unsigned int value), TP_ARGS(tsk, clamp_id, value)); +DECLARE_HOOK(android_vh_mmput, + TP_PROTO(void *unused), + TP_ARGS(unused)); + DECLARE_HOOK(android_vh_sched_pelt_multiplier, TP_PROTO(unsigned int old, unsigned int cur, int *ret), TP_ARGS(old, cur, ret)); diff --git a/kernel/fork.c b/kernel/fork.c index b2ab89589a7d..d515aa5b7eb5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1150,8 +1150,10 @@ void mmput(struct mm_struct *mm) { might_sleep(); - if (atomic_dec_and_test(&mm->mm_users)) + if (atomic_dec_and_test(&mm->mm_users)) { + trace_android_vh_mmput(NULL); __mmput(mm); + } } EXPORT_SYMBOL_GPL(mmput); From ad1f2eebadfe1d409f9bdf17d3d5e53a9dd724c7 Mon Sep 17 00:00:00 2001 From: xiaofeng Date: Wed, 14 Dec 2022 16:57:31 +0800 Subject: [PATCH 139/186] ANDROID: GKI: update xiaomi symbol list Leaf changes summary: 6 artifacts changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 3 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 3 Added variables 3 Added functions: [A] 'function int __traceiter_android_vh_alloc_pages_failure_bypass(void*, gfp_t, int, int, int, page**)' [A] 'function int __traceiter_android_vh_alloc_pages_reclaim_bypass(void*, gfp_t, int, int, int, page**)' [A] 'function int __traceiter_android_vh_mmput(void*, mm_struct*)' 3 Added variables: [A] 'tracepoint __tracepoint_android_vh_alloc_pages_failure_bypass' [A] 'tracepoint __tracepoint_android_vh_alloc_pages_reclaim_bypass' [A] 'tracepoint __tracepoint_android_vh_mmput' Bug:262486564 Change-Id: I7c78cc3c5fde8a15c8a30073fcb1cb01708d9d37 Signed-off-by: xiaofeng --- android/abi_gki_aarch64.xml | 322 ++++++++++++++++++--------------- android/abi_gki_aarch64_xiaomi | 8 + 2 files changed, 185 insertions(+), 145 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 15fe4032346b..5477e84597cf 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -406,6 +406,8 @@ + + @@ -544,6 +546,7 @@ + @@ -6369,6 +6372,8 @@ + + @@ -6512,6 +6517,7 @@ + @@ -38009,63 +38015,63 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -53461,13 +53467,13 @@ - + - + - + @@ -60736,24 +60742,24 @@ - + - + - + - + - + - + - + @@ -101122,21 +101128,21 @@ - + - + - + - + - + - + @@ -115571,9 +115577,9 @@ - + - + @@ -115602,11 +115608,11 @@ - - - - - + + + + + @@ -116242,9 +116248,9 @@ - - - + + + @@ -116297,9 +116303,9 @@ - - - + + + @@ -116810,9 +116816,9 @@ - - - + + + @@ -118005,6 +118011,24 @@ + + + + + + + + + + + + + + + + + + @@ -118789,18 +118813,18 @@ - - - - - - + + + + + + - - - - + + + + @@ -118904,6 +118928,11 @@ + + + + + @@ -119137,11 +119166,11 @@ - - - - - + + + + + @@ -119368,9 +119397,9 @@ - - - + + + @@ -120096,6 +120125,8 @@ + + @@ -120220,8 +120251,8 @@ - - + + @@ -120239,6 +120270,7 @@ + @@ -120288,7 +120320,7 @@ - + @@ -120330,7 +120362,7 @@ - + @@ -120939,9 +120971,9 @@ - - - + + + @@ -121065,9 +121097,9 @@ - - - + + + @@ -122880,9 +122912,9 @@ - - - + + + @@ -126734,22 +126766,22 @@ - - + + - - - + + + - - + + - - - + + + @@ -130091,9 +130123,9 @@ - - - + + + @@ -130145,9 +130177,9 @@ - - - + + + @@ -130266,14 +130298,14 @@ - - - + + + - - - + + + @@ -131149,12 +131181,12 @@ - - + + - - + + @@ -131173,12 +131205,12 @@ - - - - - - + + + + + + @@ -131221,8 +131253,8 @@ - - + + @@ -137059,14 +137091,14 @@ - - - - + + + + - - + + @@ -142086,11 +142118,11 @@ - + - - + + @@ -145708,21 +145740,21 @@ - - - + + + - - + + - - + + - - + + @@ -145732,13 +145764,13 @@ - - - + + + - - + + @@ -148372,9 +148404,9 @@ - - - + + + @@ -148386,8 +148418,8 @@ - - + + diff --git a/android/abi_gki_aarch64_xiaomi b/android/abi_gki_aarch64_xiaomi index a162683958cd..ebbbb0e81420 100644 --- a/android/abi_gki_aarch64_xiaomi +++ b/android/abi_gki_aarch64_xiaomi @@ -200,3 +200,11 @@ wakeup_sources_read_unlock wakeup_sources_walk_start wakeup_sources_walk_next + +#required by mi_mempool.ko module + __traceiter_android_vh_mmput + __tracepoint_android_vh_mmput + __traceiter_android_vh_alloc_pages_reclaim_bypass + __tracepoint_android_vh_alloc_pages_reclaim_bypass + __traceiter_android_vh_alloc_pages_failure_bypass + __tracepoint_android_vh_alloc_pages_failure_bypass From 6d015667ce74bf8a3cd2400ad6516f24c6a827d6 Mon Sep 17 00:00:00 2001 From: Bing-Jhong Billy Jheng Date: Thu, 15 Dec 2022 06:43:56 -0800 Subject: [PATCH 140/186] UPSTREAM: io_uring: add missing item types for splice request Splice is like read/write and should grab current->nsproxy, denoted by IO_WQ_WORK_FILES as it refers to current->files as well Change-Id: I94a99fdef5764e7eda5da778b5b52a150b9fe5eb Signed-off-by: Bing-Jhong Billy Jheng Reviewed-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 75454b4bbfc7e6a4dd8338556f36ea9107ddf61a) Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a952288b2ab8..5538906e47fe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -935,7 +935,7 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FILES, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, From fe60669d0308bfdd1423976ac24b54c2c386f736 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 15 Dec 2022 21:53:37 +0000 Subject: [PATCH 141/186] ANDROID: fips140: add dump_jitterentropy command to fips140_lab_util For the entropy analysis, we must provide some output from the Jitter RNG: a large amount of output from one instance, and a smaller amount of output from each of a certain number of instances. The original plan was to use a build of the userspace jitterentropy library that matches the kernel's jitterentropy_rng as closely as possible. However, it's now being requested that the output be gotten from the kernel instead. Now that fips140_lab_util depends on AF_ALG anyway, it's straightforward to dump output from jitterentropy_rng instances using AF_ALG. Therefore, add a command dump_jitterentropy which supports this. Bug: 188620248 Change-Id: I78eb26250e88f2fc28fc44aa201acbe5b84df8bb Signed-off-by: Eric Biggers (cherry picked from commit dc015032666ece133065b7fea73f5709f735c9b0) --- samples/crypto/fips140_lab_util.c | 88 +++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/samples/crypto/fips140_lab_util.c b/samples/crypto/fips140_lab_util.c index 996839dbd2e3..5f8e9018013a 100644 --- a/samples/crypto/fips140_lab_util.c +++ b/samples/crypto/fips140_lab_util.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -45,6 +46,8 @@ * ---------------------------------------------------------------------------*/ #define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0])) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) static void __attribute__((noreturn)) do_die(const char *format, va_list va, int err) @@ -109,6 +112,23 @@ static const char *bytes_to_hex(const uint8_t *bytes, size_t count) return hex; } +static void full_write(int fd, const void *buf, size_t count) +{ + while (count) { + ssize_t ret = write(fd, buf, count); + + if (ret < 0) + die_errno("write failed"); + buf += ret; + count -= ret; + } +} + +enum { + OPT_AMOUNT, + OPT_ITERATIONS, +}; + static void usage(void); /* --------------------------------------------------------------------------- @@ -226,6 +246,68 @@ static int get_req_fd(int alg_fd, const char *alg_name) return req_fd; } +/* --------------------------------------------------------------------------- + * dump_jitterentropy command + * ---------------------------------------------------------------------------*/ + +static void dump_from_jent_fd(int fd, size_t count) +{ + uint8_t buf[AF_ALG_MAX_RNG_REQUEST_SIZE]; + + while (count) { + ssize_t ret; + + memset(buf, 0, sizeof(buf)); + ret = read(fd, buf, MIN(count, sizeof(buf))); + if (ret < 0) + die_errno("error reading from jitterentropy_rng"); + full_write(STDOUT_FILENO, buf, ret); + count -= ret; + } +} + +static int cmd_dump_jitterentropy(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "amount", required_argument, NULL, OPT_AMOUNT }, + { "iterations", required_argument, NULL, OPT_ITERATIONS }, + { NULL, 0, NULL, 0 }, + }; + size_t amount = 128; + size_t iterations = 1; + size_t i; + int c; + + while ((c = getopt_long(argc, argv, "", longopts, NULL)) != -1) { + switch (c) { + case OPT_AMOUNT: + amount = strtoul(optarg, NULL, 0); + if (amount <= 0 || amount >= ULONG_MAX) + die("invalid argument to --amount"); + break; + case OPT_ITERATIONS: + iterations = strtoul(optarg, NULL, 0); + if (iterations <= 0 || iterations >= ULONG_MAX) + die("invalid argument to --iterations"); + break; + default: + usage(); + return 1; + } + } + + for (i = 0; i < iterations; i++) { + int alg_fd = get_alg_fd("rng", "jitterentropy_rng"); + int req_fd = get_req_fd(alg_fd, "jitterentropy_rng"); + + dump_from_jent_fd(req_fd, amount); + + close(req_fd); + close(alg_fd); + } + return 0; +} + /* --------------------------------------------------------------------------- * show_invalid_inputs command * ---------------------------------------------------------------------------*/ @@ -510,6 +592,7 @@ static const struct command { const char *name; int (*func)(int argc, char *argv[]); } commands[] = { + { "dump_jitterentropy", cmd_dump_jitterentropy }, { "show_invalid_inputs", cmd_show_invalid_inputs }, { "show_module_version", cmd_show_module_version }, { "show_service_indicators", cmd_show_service_indicators }, @@ -519,9 +602,14 @@ static void usage(void) { fprintf(stderr, "Usage:\n" +" fips140_lab_util dump_jitterentropy [OPTION]...\n" " fips140_lab_util show_invalid_inputs\n" " fips140_lab_util show_module_version\n" " fips140_lab_util show_service_indicators [SERVICE]...\n" +"\n" +"Options for dump_jitterentropy:\n" +" --amount=AMOUNT Amount to dump in bytes per iteration (default 128)\n" +" --iterations=COUNT Number of start-up iterations (default 1)\n" ); } From 135145909741b7d15018babc3f3d45e0c308aa7b Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 28 Dec 2022 14:39:41 +0800 Subject: [PATCH 142/186] ANDROID: GKI: Update symbols to symbol list Update symbols to symbol list externed by oppo network group. 1 Added function: [A] 'function int __rtnl_link_register(rtnl_link_ops*)' Bug: 193384408 Signed-off-by: Wei Liu Change-Id: Ibd8f74fa1f3b68047f6fed9b5c4154c51f23b821 --- android/abi_gki_aarch64.xml | 121 +++++++++++++++++++++++++++++++++- android/abi_gki_aarch64_oplus | 1 + 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 5477e84597cf..49832c52416e 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -255,6 +255,7 @@ + @@ -13101,6 +13102,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -22429,6 +22450,14 @@ + + + + + + + + @@ -69208,7 +69237,20 @@ - + + + + + + + + + + + + + + @@ -70110,6 +70152,7 @@ + @@ -76600,6 +76643,7 @@ + @@ -88213,7 +88257,65 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -112859,6 +112961,17 @@ + + + + + + + + + + + @@ -117062,6 +117175,10 @@ + + + + diff --git a/android/abi_gki_aarch64_oplus b/android/abi_gki_aarch64_oplus index ac8c1165135b..d1c65a2281cb 100644 --- a/android/abi_gki_aarch64_oplus +++ b/android/abi_gki_aarch64_oplus @@ -2297,6 +2297,7 @@ rtc_update_irq rtc_valid_tm rtnl_is_locked + __rtnl_link_register __rtnl_link_unregister rtnl_lock rtnl_unlock From c67f268c849edcebe3bccbb8797da6c31b7f1bb5 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Fri, 6 Jan 2023 13:07:18 -0800 Subject: [PATCH 143/186] Revert "ANDROID: Make SPF aware of fast mremaps" This reverts commit 134c1aae4311b6fb9823722647ab9c4ab554a152. Reason for revert: vts_linux_kselftest_arm_64 timeout Bug: 263479421 Bug: 263177905 Change-Id: I123c56741c982d1539ceebd8bfde2443871aa1de Signed-off-by: Kalesh Singh --- include/linux/mm.h | 3 -- mm/mmap.c | 30 ++------------ mm/mremap.c | 100 +++++++-------------------------------------- 3 files changed, 17 insertions(+), 116 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5de4309bfa14..dfefcfa1d6a4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1758,9 +1758,6 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); #ifdef CONFIG_SPECULATIVE_PAGE_FAULT -extern wait_queue_head_t vma_users_wait; -extern atomic_t vma_user_waiters; - static inline void vm_write_begin(struct vm_area_struct *vma) { /* diff --git a/mm/mmap.c b/mm/mmap.c index c3dfbfdb674a..fba57c628671 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -180,17 +180,7 @@ static void __free_vma(struct vm_area_struct *vma) #ifdef CONFIG_SPECULATIVE_PAGE_FAULT void put_vma(struct vm_area_struct *vma) { - int ref_count = atomic_dec_return(&vma->vm_ref_count); - - /* - * Implicit smp_mb due to atomic_dec_return. - * - * If this is the last reference, wake up the mremap waiter - * (if any). - */ - if (ref_count == 1 && unlikely(atomic_read(&vma_user_waiters) > 0)) - wake_up(&vma_users_wait); - else if (ref_count <= 0) + if (atomic_dec_and_test(&vma->vm_ref_count)) __free_vma(vma); } #else @@ -2431,22 +2421,8 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr) read_lock(&mm->mm_rb_lock); vma = __find_vma(mm, addr); - - /* - * If there is a concurrent fast mremap, bail out since the entire - * PMD/PUD subtree may have been remapped. - * - * This is usually safe for conventional mremap since it takes the - * PTE locks as does SPF. However fast mremap only takes the lock - * at the PMD/PUD level which is ok as it is done with the mmap - * write lock held. But since SPF, as the term implies forgoes, - * taking the mmap read lock and also cannot take PTL lock at the - * larger PMD/PUD granualrity, since it would introduce huge - * contention in the page fault path; fall back to regular fault - * handling. - */ - if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count)) - vma = NULL; + if (vma) + atomic_inc(&vma->vm_ref_count); read_unlock(&mm->mm_rb_lock); return vma; diff --git a/mm/mremap.c b/mm/mremap.c index 0763b83ef779..5a18cec23fa7 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -210,74 +210,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } -#ifdef CONFIG_SPECULATIVE_PAGE_FAULT -DECLARE_WAIT_QUEUE_HEAD(vma_users_wait); -atomic_t vma_user_waiters = ATOMIC_INIT(0); - -static inline void wait_for_vma_users(struct vm_area_struct *vma) -{ - /* - * If we have the only reference, swap the refcount to -1. This - * will prevent other concurrent references by get_vma() for SPFs. - */ - if (likely(atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1)) - return; - - /* Indicate we are waiting for other users of the VMA to finish. */ - atomic_inc(&vma_user_waiters); - - /* Failed atomic_cmpxchg; no implicit barrier, use an explicit one. */ - smp_mb(); - - /* - * Callers cannot handle failure, sleep uninterruptibly until there - * are no other users of this VMA. - * - * We don't need to worry about references from concurrent waiters, - * since this is only used in the context of fast mremaps, with - * exclusive mmap write lock held. - */ - wait_event(vma_users_wait, atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1); - - atomic_dec(&vma_user_waiters); -} - - /* - * Restore the VMA reference count to 1 after a fast mremap. + * Speculative page fault handlers will not detect page table changes done + * without ptl locking. */ -static inline void restore_vma_ref_count(struct vm_area_struct *vma) -{ - /* - * This should only be called after a corresponding, - * wait_for_vma_users() - */ - VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1, - vma); -} -#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */ -static inline void wait_for_vma_users(struct vm_area_struct *vma) -{ -} -static inline void restore_vma_ref_count(struct vm_area_struct *vma) -{ -} -#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ - -#ifdef CONFIG_HAVE_MOVE_PMD +#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; struct mm_struct *mm = vma->vm_mm; pmd_t pmd; - bool ret; - - /* - * Wait for concurrent users, since these can potentially be - * speculative page faults. - */ - wait_for_vma_users(vma); /* * The destination pmd shouldn't be established, free_pgtables() @@ -302,10 +245,8 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, * One alternative might be to just unmap the target pmd at * this point, and verify that it really is empty. We'll see. */ - if (WARN_ON_ONCE(!pmd_none(*new_pmd))) { - ret = false; - goto out; - } + if (WARN_ON_ONCE(!pmd_none(*new_pmd))) + return false; /* * We don't have to worry about the ordering of src and dst @@ -329,11 +270,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); - ret = true; - -out: - restore_vma_ref_count(vma); - return ret; + return true; } #else static inline bool move_normal_pmd(struct vm_area_struct *vma, @@ -344,29 +281,24 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma, } #endif -#ifdef CONFIG_HAVE_MOVE_PUD +/* + * Speculative page fault handlers will not detect page table changes done + * without ptl locking. + */ +#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) { spinlock_t *old_ptl, *new_ptl; struct mm_struct *mm = vma->vm_mm; pud_t pud; - bool ret; - - /* - * Wait for concurrent users, since these can potentially be - * speculative page faults. - */ - wait_for_vma_users(vma); /* * The destination pud shouldn't be established, free_pgtables() * should have released it. */ - if (WARN_ON_ONCE(!pud_none(*new_pud))) { - ret = false; - goto out; - } + if (WARN_ON_ONCE(!pud_none(*new_pud))) + return false; /* * We don't have to worry about the ordering of src and dst @@ -390,11 +322,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); - ret = true; - -out: - restore_vma_ref_count(vma); - return ret; + return true; } #else static inline bool move_normal_pud(struct vm_area_struct *vma, From 529351c4c8202aa7f5bc4a8a100e583a70ab6110 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Mon, 19 Dec 2022 21:07:49 -0800 Subject: [PATCH 144/186] ANDROID: Re-enable fast mremap and fix UAF with SPF SPF attempts page faults without taking the mmap lock, but takes the PTL. If there is a concurrent fast mremap (at PMD/PUD level), this can lead to a UAF as fast mremap will only take the PTL locks at the PMD/PUD level. SPF cannot take the PTL locks at the larger subtree granularity since this introduces much contention in the page fault paths. To address the race: 1) Only try fast mremaps if there are no users of the VMA. Android is concerned with this optimization in the context of GC stop-the-world pause. So there are no other threads active and this should almost always succeed. 2) Speculative faults detect ongoing fast mremaps and fallback to conventional fault handling (taking mmap read lock). Bug: 263177905 Change-Id: I23917e493ddc8576de19883cac053dfde9982b7f Signed-off-by: Kalesh Singh --- mm/mmap.c | 18 +++++++++++++++-- mm/mremap.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index fba57c628671..dd52d8764c2f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2421,8 +2421,22 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr) read_lock(&mm->mm_rb_lock); vma = __find_vma(mm, addr); - if (vma) - atomic_inc(&vma->vm_ref_count); + + /* + * If there is a concurrent fast mremap, bail out since the entire + * PMD/PUD subtree may have been remapped. + * + * This is usually safe for conventional mremap since it takes the + * PTE locks as does SPF. However fast mremap only takes the lock + * at the PMD/PUD level which is ok as it is done with the mmap + * write lock held. But since SPF, as the term implies forgoes, + * taking the mmap read lock and also cannot take PTL lock at the + * larger PMD/PUD granualrity, since it would introduce huge + * contention in the page fault path; fall back to regular fault + * handling. + */ + if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count)) + vma = NULL; read_unlock(&mm->mm_rb_lock); return vma; diff --git a/mm/mremap.c b/mm/mremap.c index 5a18cec23fa7..b1263e9a16af 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -210,11 +210,39 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT +static inline bool trylock_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * If we have the only reference, swap the refcount to -1. This + * will prevent other concurrent references by get_vma() for SPFs. + */ + return atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1; +} + /* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. + * Restore the VMA reference count to 1 after a fast mremap. */ -#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +static inline void unlock_vma_ref_count(struct vm_area_struct *vma) +{ + /* + * This should only be called after a corresponding, + * successful trylock_vma_ref_count(). + */ + VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1, + vma); +} +#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */ +static inline bool trylock_vma_ref_count(struct vm_area_struct *vma) +{ + return true; +} +static inline void unlock_vma_ref_count(struct vm_area_struct *vma) +{ +} +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ + +#ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) { @@ -248,6 +276,14 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, if (WARN_ON_ONCE(!pmd_none(*new_pmd))) return false; + /* + * We hold both exclusive mmap_lock and rmap_lock at this point and + * cannot block. If we cannot immediately take exclusive ownership + * of the VMA fallback to the move_ptes(). + */ + if (!trylock_vma_ref_count(vma)) + return false; + /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -270,6 +306,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); + unlock_vma_ref_count(vma); return true; } #else @@ -281,11 +318,7 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma, } #endif -/* - * Speculative page fault handlers will not detect page table changes done - * without ptl locking. - */ -#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT) +#ifdef CONFIG_HAVE_MOVE_PUD static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pud_t *old_pud, pud_t *new_pud) { @@ -300,6 +333,14 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, if (WARN_ON_ONCE(!pud_none(*new_pud))) return false; + /* + * We hold both exclusive mmap_lock and rmap_lock at this point and + * cannot block. If we cannot immediately take exclusive ownership + * of the VMA fallback to the move_ptes(). + */ + if (!trylock_vma_ref_count(vma)) + return false; + /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_lock prevents deadlock. @@ -322,6 +363,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, spin_unlock(new_ptl); spin_unlock(old_ptl); + unlock_vma_ref_count(vma); return true; } #else From 1960d4cfad2d1a1685e84a2056d42c56daee2c16 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Wed, 21 Dec 2022 23:08:35 +0800 Subject: [PATCH 145/186] ANDROID: GKI: rockchip: Enable symbols for Ethernet Leaf changes summary: 28 artifacts changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 28 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 28 Added functions: [A] 'function void ethtool_convert_legacy_u32_to_link_mode(unsigned long int*, u32)' [A] 'function bool ethtool_convert_link_mode_to_legacy_u32(unsigned int*, const unsigned long int*)' [A] 'function int flow_block_cb_setup_simple(flow_block_offload*, list_head*, flow_setup_cb_t*, void*, void*, bool)' [A] 'function void flow_rule_match_basic(const flow_rule*, flow_match_basic*)' [A] 'function void flow_rule_match_ipv4_addrs(const flow_rule*, flow_match_ipv4_addrs*)' [A] 'function void flow_rule_match_ports(const flow_rule*, flow_match_ports*)' [A] 'function void netdev_rss_key_fill(void*, size_t)' [A] 'function page* page_pool_alloc_pages(page_pool*, gfp_t)' [A] 'function page_pool* page_pool_create(const page_pool_params*)' [A] 'function void page_pool_destroy(page_pool*)' [A] 'function void page_pool_put_page(page_pool*, page*, unsigned int, bool)' [A] 'function void page_pool_release_page(page_pool*, page*)' [A] 'function void phylink_disconnect_phy(phylink*)' [A] 'function int phylink_ethtool_get_eee(phylink*, ethtool_eee*)' [A] 'function void phylink_ethtool_get_pauseparam(phylink*, ethtool_pauseparam*)' [A] 'function void phylink_ethtool_get_wol(phylink*, ethtool_wolinfo*)' [A] 'function int phylink_ethtool_ksettings_get(phylink*, ethtool_link_ksettings*)' [A] 'function int phylink_ethtool_ksettings_set(phylink*, const ethtool_link_ksettings*)' [A] 'function int phylink_ethtool_nway_reset(phylink*)' [A] 'function int phylink_ethtool_set_eee(phylink*, ethtool_eee*)' [A] 'function int phylink_ethtool_set_pauseparam(phylink*, ethtool_pauseparam*)' [A] 'function int phylink_ethtool_set_wol(phylink*, ethtool_wolinfo*)' [A] 'function int phylink_get_eee_err(phylink*)' [A] 'function void phylink_mac_change(phylink*, bool)' [A] 'function int phylink_mii_ioctl(phylink*, ifreq*, int)' [A] 'function int phylink_speed_down(phylink*, bool)' [A] 'function int phylink_speed_up(phylink*)' [A] 'function void phylink_stop(phylink*)' Bug: 239396464 Signed-off-by: Kever Yang Change-Id: I6adf45c9a7159ef07a1913222248128afc0dcccb --- android/abi_gki_aarch64.xml | 711 +++++++++++++++++++++++++++++++ android/abi_gki_aarch64_rockchip | 134 +++++- 2 files changed, 822 insertions(+), 23 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 49832c52416e..67f1874ac840 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -2499,6 +2499,8 @@ + + @@ -2562,6 +2564,10 @@ + + + + @@ -3667,6 +3673,7 @@ + @@ -3912,6 +3919,11 @@ + + + + + @@ -4144,9 +4156,25 @@ + + + + + + + + + + + + + + + + @@ -7170,6 +7198,7 @@ + @@ -7294,6 +7323,9 @@ + + + @@ -10036,11 +10068,20 @@ + + + + + + + + + @@ -11797,6 +11838,14 @@ + + + + + + + + @@ -11983,6 +12032,17 @@ + + + + + + + + + + + @@ -15162,6 +15222,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -19310,6 +19426,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -21332,6 +21474,20 @@ + + + + + + + + + + + + + + @@ -21871,6 +22027,14 @@ + + + + + + + + @@ -22301,6 +22465,7 @@ + @@ -23535,6 +23700,7 @@ + @@ -27508,6 +27674,7 @@ + @@ -27535,6 +27702,26 @@ + + + + + + + + + + + + + + + + + + + + @@ -31735,6 +31922,7 @@ + @@ -33688,6 +33876,17 @@ + + + + + + + + + + + @@ -34300,6 +34499,9 @@ + + + @@ -38886,6 +39088,14 @@ + + + + + + + + @@ -39921,6 +40131,17 @@ + + + + + + + + + + + @@ -40490,6 +40711,7 @@ + @@ -40788,6 +41010,14 @@ + + + + + + + + @@ -41378,6 +41608,7 @@ + @@ -41767,6 +41998,7 @@ + @@ -46527,6 +46759,7 @@ + @@ -46639,6 +46872,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -48756,6 +49015,7 @@ + @@ -49016,6 +49276,7 @@ + @@ -50662,6 +50923,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -51599,6 +51895,14 @@ + + + + + + + + @@ -52087,6 +52391,20 @@ + + + + + + + + + + + + + + @@ -53056,6 +53374,7 @@ + @@ -54164,6 +54483,20 @@ + + + + + + + + + + + + + + @@ -54977,6 +55310,17 @@ + + + + + + + + + + + @@ -59147,6 +59491,7 @@ + @@ -60256,6 +60601,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -62717,6 +63100,7 @@ + @@ -63894,6 +64278,7 @@ + @@ -64694,6 +65079,11 @@ + + + + + @@ -65669,6 +66059,7 @@ + @@ -66257,6 +66648,20 @@ + + + + + + + + + + + + + + @@ -66555,6 +66960,17 @@ + + + + + + + + + + + @@ -67200,6 +67616,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -71936,6 +72385,14 @@ + + + + + + + + @@ -75007,6 +75464,11 @@ + + + + + @@ -75646,6 +76108,7 @@ + @@ -77713,6 +78176,14 @@ + + + + + + + + @@ -83297,6 +83768,14 @@ + + + + + + + + @@ -88787,6 +89266,15 @@ + + + + + + + + + @@ -91285,6 +91773,7 @@ + @@ -93341,6 +93830,17 @@ + + + + + + + + + + + @@ -93815,6 +94315,7 @@ + @@ -94366,6 +94867,11 @@ + + + + + @@ -94997,6 +95503,29 @@ + + + + + + + + + + + + + + + + + + + + + + + @@ -100058,6 +100587,7 @@ + @@ -101118,6 +101648,23 @@ + + + + + + + + + + + + + + + + + @@ -102500,6 +103047,14 @@ + + + + + + + + @@ -110691,6 +111246,14 @@ + + + + + + + + @@ -111371,6 +111934,14 @@ + + + + + + + + @@ -129997,6 +130568,16 @@ + + + + + + + + + + @@ -130331,7 +130912,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + @@ -135962,6 +136567,11 @@ + + + + + @@ -137236,6 +137846,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + @@ -138448,6 +139083,69 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -138458,10 +139156,23 @@ + + + + + + + + + + + + + diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index 6489a344910d..12d5af00e945 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -22,6 +22,8 @@ bdget_disk bdput _bin2bcd + __bitmap_and + __bitmap_andnot blk_cleanup_queue blk_execute_rq_nowait blk_mq_free_request @@ -174,6 +176,7 @@ debugfs_create_file debugfs_create_regset32 debugfs_remove + debugfs_rename default_llseek delayed_work_timer_fn del_gendisk @@ -200,6 +203,7 @@ device_del device_destroy device_get_child_node_count + device_get_match_data device_get_named_child_node device_get_next_child_node device_initialize @@ -285,6 +289,7 @@ devm_snd_soc_register_component devm_usb_get_phy _dev_notice + dev_open dev_pm_domain_detach dev_pm_opp_find_freq_ceil dev_pm_opp_find_freq_floor @@ -469,6 +474,8 @@ enable_irq eth_mac_addr eth_platform_get_mac_address + ethtool_op_get_link + ethtool_op_get_ts_info eth_type_trans eth_validate_addr event_triggers_call @@ -480,6 +487,7 @@ extcon_set_state_sync extcon_unregister_notifier failure_tracking + fasync_helper fd_install find_next_bit find_next_zero_bit @@ -584,6 +592,9 @@ i2c_smbus_xfer i2c_transfer i2c_transfer_buffer_flags + ida_alloc_range + ida_destroy + ida_free idr_alloc idr_destroy idr_find @@ -661,6 +672,7 @@ kfree_const kfree_sensitive kfree_skb + kill_fasync kimage_voffset __kmalloc kmalloc_caches @@ -682,6 +694,10 @@ kstrtouint_from_user kstrtoull kthread_create_on_node + kthread_create_worker + kthread_destroy_worker + kthread_flush_worker + kthread_queue_work kthread_should_stop kthread_stop ktime_get @@ -703,7 +719,10 @@ __log_read_mmio __log_write_mmio lzo1x_decompress_safe + mdiobus_alloc_size + mdiobus_free mdiobus_read + mdiobus_unregister mdiobus_write media_create_pad_link media_device_init @@ -753,10 +772,14 @@ mutex_lock_interruptible mutex_trylock mutex_unlock + napi_gro_receive __netdev_alloc_skb netdev_err netdev_info + netdev_update_features netdev_warn + netif_carrier_off + netif_carrier_on netif_rx netif_rx_ni netif_tx_wake_queue @@ -850,6 +873,7 @@ perf_trace_buf_alloc perf_trace_run_bpf_submit pfn_valid + phy_attached_info phy_configure phy_drivers_register phy_drivers_unregister @@ -965,6 +989,7 @@ __register_chrdev register_chrdev_region register_inetaddr_notifier + register_netdev register_netdevice register_netdevice_notifier register_pm_notifier @@ -1069,6 +1094,7 @@ simple_strtoul single_open single_release + skb_add_rx_frag skb_clone skb_copy skb_copy_bits @@ -1126,6 +1152,7 @@ sscanf __stack_chk_fail __stack_chk_guard + strcasecmp strchr strcmp strcpy @@ -1147,6 +1174,7 @@ sync_file_create sync_file_get_fence synchronize_irq + synchronize_net synchronize_rcu syscon_node_to_regmap syscon_regmap_lookup_by_phandle @@ -1194,6 +1222,7 @@ __unregister_chrdev unregister_chrdev_region unregister_inetaddr_notifier + unregister_netdev unregister_netdevice_notifier unregister_netdevice_queue unregister_reboot_notifier @@ -1389,7 +1418,6 @@ # required by bcmdhd.ko alloc_etherdev_mqs complete_and_exit - dev_open down_interruptible down_timeout iwe_stream_add_event @@ -1400,7 +1428,6 @@ mmc_set_data_timeout mmc_sw_reset mmc_wait_for_req - netdev_update_features netif_napi_add __netif_napi_del netif_set_xps_queue @@ -1411,7 +1438,6 @@ __nlmsg_put _raw_read_lock_bh _raw_read_unlock_bh - register_netdev sched_set_fifo_low sdio_claim_host sdio_disable_func @@ -1445,13 +1471,11 @@ strcat strspn sys_tz - unregister_netdev unregister_pm_notifier wireless_send_event # required by bifrost_kbase.ko __arch_clear_user - __bitmap_andnot __bitmap_equal __bitmap_or __bitmap_weight @@ -1537,7 +1561,6 @@ # required by cfg80211.ko bpf_trace_run10 _ctype - debugfs_rename dev_change_net_namespace __dev_get_by_index dev_get_by_index @@ -1570,7 +1593,6 @@ rfkill_blocked rfkill_pause_polling rfkill_resume_polling - skb_add_rx_frag __sock_create sock_release unregister_pernet_device @@ -1759,6 +1781,11 @@ usb_speed_string usb_wakeup_enabled_descendants +# required by dwmac-rockchip.ko + csum_tcpudp_nofold + ip_send_check + of_get_phy_mode + # required by fusb302.ko extcon_get_extcon_dev fwnode_create_software_node @@ -1881,7 +1908,6 @@ dev_fetch_sw_netstats dev_queue_xmit ether_setup - ethtool_op_get_link get_random_u32 __hw_addr_init __hw_addr_sync @@ -1890,10 +1916,7 @@ kernel_param_unlock kfree_skb_list ktime_get_seconds - napi_gro_receive netdev_set_default_ethtool_ops - netif_carrier_off - netif_carrier_on netif_receive_skb netif_receive_skb_list netif_tx_stop_all_queues @@ -1919,7 +1942,6 @@ skb_queue_head skb_queue_purge skb_queue_tail - synchronize_net unregister_inet6addr_notifier unregister_netdevice_many @@ -1956,9 +1978,6 @@ dev_pm_qos_expose_latency_tolerance dev_pm_qos_hide_latency_tolerance dev_pm_qos_update_user_latency_tolerance - ida_alloc_range - ida_destroy - ida_free init_srcu_struct memchr_inv param_ops_ulong @@ -2107,7 +2126,6 @@ extcon_sync # required by phy-rockchip-inno-usb3.ko - strcasecmp usb_add_phy # required by phy-rockchip-samsung-hdptx-hdmi.ko @@ -2159,6 +2177,18 @@ pm_genpd_init pm_genpd_remove +# required by pps_core.ko + kobject_get + +# required by ptp.ko + kthread_cancel_delayed_work_sync + kthread_delayed_work_timer_fn + kthread_mod_delayed_work + kthread_queue_delayed_work + ktime_get_snapshot + posix_clock_register + posix_clock_unregister + # required by pwm-regulator.ko regulator_map_voltage_iterate @@ -2562,10 +2592,8 @@ blk_verify_command cdev_alloc class_interface_unregister - fasync_helper get_sg_io_hdr import_iovec - kill_fasync put_sg_io_hdr _raw_read_lock_irqsave _raw_read_unlock_irqrestore @@ -2593,12 +2621,7 @@ # required by smsc95xx.ko csum_partial - ethtool_op_get_ts_info - mdiobus_alloc_size - mdiobus_free __mdiobus_register - mdiobus_unregister - phy_attached_info phy_connect_direct phy_disconnect phy_ethtool_get_link_ksettings @@ -2677,6 +2700,71 @@ spi_setup stream_open +# required by stmmac-platform.ko + device_get_phy_mode + of_get_mac_address + of_phy_is_fixed_link + platform_get_irq_byname_optional + +# required by stmmac.ko + devm_alloc_etherdev_mqs + dql_completed + dql_reset + ethtool_convert_legacy_u32_to_link_mode + ethtool_convert_link_mode_to_legacy_u32 + flow_block_cb_setup_simple + flow_rule_match_basic + flow_rule_match_ipv4_addrs + flow_rule_match_ports + mdiobus_get_phy + __napi_alloc_skb + napi_complete_done + napi_disable + __napi_schedule + napi_schedule_prep + netdev_alert + netdev_pick_tx + netdev_rss_key_fill + netif_device_attach + netif_device_detach + netif_napi_add + __netif_napi_del + netif_schedule_queue + netif_set_real_num_rx_queues + netif_set_real_num_tx_queues + of_mdiobus_register + page_pool_alloc_pages + page_pool_create + page_pool_destroy + page_pool_put_page + page_pool_release_page + phy_init_eee + phylink_connect_phy + phylink_create + phylink_destroy + phylink_disconnect_phy + phylink_ethtool_get_eee + phylink_ethtool_get_pauseparam + phylink_ethtool_get_wol + phylink_ethtool_ksettings_get + phylink_ethtool_ksettings_set + phylink_ethtool_nway_reset + phylink_ethtool_set_eee + phylink_ethtool_set_pauseparam + phylink_ethtool_set_wol + phylink_get_eee_err + phylink_mac_change + phylink_mii_ioctl + phylink_of_phy_connect + phylink_set_port_modes + phylink_speed_down + phylink_speed_up + phylink_start + phylink_stop + pm_wakeup_dev_event + reset_control_reset + skb_tstamp_tx + # required by sw_sync.ko dma_fence_free dma_fence_signal_locked From 91e760f1f24b5dbc72e570863a42b6fc20f3a60f Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Wed, 21 Dec 2022 23:34:03 +0800 Subject: [PATCH 146/186] ANDROID: GKI: rockchip: Enable symbols for HDMIRX Leaf changes summary: 1 artifact changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 1 Added function Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 1 Added function: [A] 'function bool v4l2_find_dv_timings_cap(v4l2_dv_timings*, const v4l2_dv_timings_cap*, unsigned int, v4l2_check_dv_timings_fnc*, void*)' Bug: 239396464 Signed-off-by: Kever Yang Change-Id: I45009e2791f99b65476daafaff343df33af72433 --- android/abi_gki_aarch64.xml | 9 +++++++++ android/abi_gki_aarch64_rockchip | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index 67f1874ac840..bda4150b97dd 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -5954,6 +5954,7 @@ + @@ -148267,6 +148268,14 @@ + + + + + + + + diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index 12d5af00e945..ad2a3046e510 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -122,6 +122,7 @@ __const_udelay consume_skb cpu_bit_bitmap + cpufreq_cpu_get __cpufreq_driver_target cpufreq_generic_suspend cpufreq_register_governor @@ -219,6 +220,7 @@ device_remove_file device_set_wakeup_capable device_set_wakeup_enable + device_unregister device_wakeup_enable _dev_info __dev_kfree_skb_any @@ -1187,6 +1189,7 @@ sysfs_remove_link sysfs_streq system_freezable_wq + system_highpri_wq system_long_wq system_power_efficient_wq system_state @@ -1528,7 +1531,6 @@ simple_open strcspn system_freezing_cnt - system_highpri_wq _totalram_pages __traceiter_gpu_mem_total trace_output_call @@ -2069,7 +2071,6 @@ __arm_smccc_hvc bus_for_each_dev device_register - device_unregister free_pages_exact memremap memunmap @@ -2320,6 +2321,18 @@ dev_pm_opp_put_prop_name dev_pm_opp_set_supported_hw +# required by rockchip-hdmirx.ko + cec_s_phys_addr_from_edid + cpu_latency_qos_remove_request + device_create_with_groups + of_reserved_mem_device_release + v4l2_ctrl_log_status + v4l2_ctrl_subscribe_event + v4l2_find_dv_timings_cap + v4l2_src_change_event_subscribe + vb2_dma_contig_memops + vb2_fop_read + # required by rockchip-rng.ko devm_hwrng_register devm_of_iomap @@ -2328,7 +2341,6 @@ cpu_topology # required by rockchip_dmc.ko - cpufreq_cpu_get cpufreq_cpu_put cpufreq_quick_get devfreq_event_disable_edev @@ -2574,7 +2586,6 @@ sdhci_setup_host # required by sdhci-of-dwcmshc.ko - device_get_match_data devm_clk_bulk_get_optional dma_get_required_mask sdhci_adma_write_desc From b460d3c09a7ba6f020b532a3a21acb58acebc315 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Thu, 22 Dec 2022 11:03:08 +0800 Subject: [PATCH 147/186] ANDROID: GKI: rockchip: Update module fragment and symbol list Add fragment need by rockchip platform and sync the symbol list to the latest source code. This patch does not add or remove symbol from xml file. Bug: 239396464 Signed-off-by: Kever Yang Change-Id: I54e37e865124cbc7f70646481ca798e27fcc4706 --- android/abi_gki_aarch64_rockchip | 132 +++++++++++++---------- arch/arm64/configs/rockchip_gki.fragment | 12 ++- 2 files changed, 84 insertions(+), 60 deletions(-) diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index ad2a3046e510..ec0b9c854e41 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -82,10 +82,13 @@ __cfi_slowpath __check_object_size __class_create + class_create_file_ns class_destroy class_for_each_device __class_register + class_remove_file_ns class_unregister + __ClearPageMovable clk_bulk_disable clk_bulk_enable clk_bulk_prepare @@ -171,6 +174,7 @@ crypto_unregister_shash crypto_unregister_template __crypto_xor + _ctype debugfs_attr_read debugfs_attr_write debugfs_create_dir @@ -183,7 +187,6 @@ del_gendisk del_timer del_timer_sync - desc_to_gpio destroy_workqueue dev_close dev_driver_string @@ -192,6 +195,7 @@ devfreq_add_governor devfreq_recommended_opp devfreq_register_opp_notifier + devfreq_remove_governor devfreq_resume_device devfreq_suspend_device devfreq_unregister_opp_notifier @@ -234,6 +238,7 @@ devm_devfreq_add_device devm_devfreq_event_add_edev devm_devfreq_register_opp_notifier + devm_device_add_group devm_extcon_dev_allocate devm_extcon_dev_register devm_free_irq @@ -306,6 +311,7 @@ dev_pm_opp_register_set_opp_helper dev_pm_opp_set_rate dev_pm_opp_set_regulators + dev_pm_opp_set_supported_hw dev_pm_opp_unregister_set_opp_helper dev_printk devres_add @@ -368,8 +374,8 @@ driver_register driver_unregister drm_add_edid_modes + drm_add_modes_noedid drm_atomic_get_crtc_state - drm_atomic_get_new_bridge_state drm_atomic_get_new_connector_for_encoder drm_atomic_helper_bridge_destroy_state drm_atomic_helper_bridge_duplicate_state @@ -387,8 +393,12 @@ drm_compat_ioctl drm_connector_attach_encoder drm_connector_cleanup + drm_connector_has_possible_encoder drm_connector_init drm_connector_init_with_ddc + drm_connector_list_iter_begin + drm_connector_list_iter_end + drm_connector_list_iter_next drm_connector_unregister drm_connector_update_edid_property __drm_dbg @@ -406,7 +416,9 @@ drm_dp_aux_register drm_dp_aux_unregister drm_dp_bw_code_to_link_rate + drm_dp_channel_eq_ok drm_dp_dpcd_read + drm_dp_dpcd_read_link_status drm_dp_dpcd_write drm_dp_get_phy_test_pattern drm_dp_link_rate_to_bw_code @@ -550,6 +562,7 @@ gpiod_get_value_cansleep gpiod_set_consumer_name gpiod_set_raw_value + gpiod_set_raw_value_cansleep gpiod_set_value gpiod_set_value_cansleep gpiod_to_irq @@ -579,6 +592,7 @@ hrtimer_start_range_ns i2c_adapter_type i2c_add_adapter + i2c_add_numbered_adapter i2c_del_adapter i2c_del_driver i2c_get_adapter @@ -651,7 +665,6 @@ irq_find_mapping irq_get_irq_data irq_modify_status - irq_of_parse_and_map irq_set_affinity_hint irq_set_chained_handler_and_data irq_set_chip @@ -659,6 +672,7 @@ irq_set_chip_data irq_set_irq_type irq_set_irq_wake + irq_to_desc is_vmalloc_addr jiffies jiffies_to_msecs @@ -717,6 +731,7 @@ __list_add_valid __list_del_entry_valid __local_bh_enable_ip + __lock_page __log_post_read_mmio __log_read_mmio __log_write_mmio @@ -739,7 +754,6 @@ media_pipeline_start media_pipeline_stop memcpy - __memcpy_fromio memdup_user memmove memset @@ -758,7 +772,9 @@ mipi_dsi_host_unregister misc_deregister misc_register + mmc_cqe_request_done mmc_of_parse + mmc_request_done __mmdrop mod_delayed_work_on mod_timer @@ -814,6 +830,7 @@ of_device_is_available of_device_is_compatible of_drm_find_bridge + of_drm_find_panel of_find_compatible_node of_find_device_by_node of_find_i2c_device_by_node @@ -832,6 +849,7 @@ of_get_parent of_get_property of_get_regulator_init_data + of_graph_get_endpoint_by_regs of_graph_get_next_endpoint of_graph_get_remote_node of_graph_get_remote_port_parent @@ -914,6 +932,7 @@ platform_driver_unregister platform_get_irq platform_get_irq_byname + platform_get_irq_optional platform_get_resource platform_get_resource_byname platform_irq_count @@ -940,6 +959,7 @@ power_supply_changed power_supply_class power_supply_get_battery_info + power_supply_get_by_name power_supply_get_by_phandle power_supply_get_drvdata power_supply_get_property @@ -963,6 +983,7 @@ put_disk __put_page __put_task_struct + put_unused_fd pwm_adjust_config pwm_apply_state queue_delayed_work_on @@ -973,6 +994,7 @@ _raw_spin_lock_bh _raw_spin_lock_irq _raw_spin_lock_irqsave + _raw_spin_trylock _raw_spin_unlock _raw_spin_unlock_bh _raw_spin_unlock_irq @@ -1080,6 +1102,7 @@ seq_puts seq_read set_page_dirty_lock + __SetPageMovable sg_alloc_table sg_alloc_table_from_pages sg_free_table @@ -1113,6 +1136,7 @@ skcipher_walk_virt snd_pcm_format_width snd_soc_add_component_controls + snd_soc_add_dai_controls snd_soc_card_jack_new snd_soc_component_read snd_soc_component_set_jack @@ -1145,6 +1169,7 @@ snd_soc_pm_ops snd_soc_put_enum_double snd_soc_put_volsw + snd_soc_register_component snd_soc_unregister_component snprintf sort @@ -1222,6 +1247,7 @@ typec_switch_register typec_switch_unregister __udelay + unlock_page __unregister_chrdev unregister_chrdev_region unregister_inetaddr_notifier @@ -1431,8 +1457,6 @@ mmc_set_data_timeout mmc_sw_reset mmc_wait_for_req - netif_napi_add - __netif_napi_del netif_set_xps_queue __netlink_kernel_create netlink_kernel_release @@ -1482,6 +1506,7 @@ __bitmap_equal __bitmap_or __bitmap_weight + __bitmap_xor cache_line_size clear_page complete_all @@ -1542,9 +1567,6 @@ vmalloc_user vmf_insert_pfn_prot -# required by bq25700_charger.ko - power_supply_get_by_name - # required by cdc-wdm.ko cdc_parse_cdc_header @@ -1562,7 +1584,6 @@ # required by cfg80211.ko bpf_trace_run10 - _ctype dev_change_net_namespace __dev_get_by_index dev_get_by_index @@ -1652,11 +1673,6 @@ # required by cm3218.ko i2c_smbus_write_word_data -# required by cma_heap.ko - cma_get_name - dma_heap_get_drvdata - dma_heap_put - # required by cpufreq-dt.ko cpufreq_enable_boost_support cpufreq_freq_attr_scaling_available_freqs @@ -1691,7 +1707,6 @@ # required by cqhci.ko devm_blk_ksm_init - mmc_cqe_request_done # required by cryptodev.ko crypto_ahash_final @@ -1702,7 +1717,11 @@ sg_last unregister_sysctl_table +# required by cw221x_battery.ko + power_supply_is_system_supplied + # required by display-connector.ko + drm_atomic_get_new_bridge_state drm_probe_ddc # required by dm9601.ko @@ -1720,7 +1739,6 @@ # required by dw-hdmi.ko drm_connector_attach_max_bpc_property drm_default_rgb_quant_range - of_graph_get_endpoint_by_regs # required by dw-mipi-dsi.ko drm_panel_bridge_add_typed @@ -1746,14 +1764,12 @@ mmc_regulator_set_ocr mmc_regulator_set_vqmmc mmc_remove_host - mmc_request_done sdio_signal_irq sg_miter_next sg_miter_start sg_miter_stop # required by dw_wdt.ko - platform_get_irq_optional watchdog_init_timeout watchdog_register_device watchdog_set_restart_priority @@ -1764,7 +1780,6 @@ bitmap_find_next_zero_area_off __bitmap_set phy_reset - _raw_spin_trylock usb_add_gadget_udc usb_del_gadget_udc usb_ep_set_maxpacket_limit @@ -1791,6 +1806,7 @@ # required by fusb302.ko extcon_get_extcon_dev fwnode_create_software_node + sched_set_fifo tcpm_cc_change tcpm_pd_hard_reset tcpm_pd_receive @@ -1842,6 +1858,7 @@ i2c_verify_client # required by i2c-gpio.ko + desc_to_gpio i2c_bit_add_numbered_bus # required by i2c-hid.ko @@ -1852,7 +1869,6 @@ hid_parse_report # required by i2c-mux.ko - i2c_add_numbered_adapter __i2c_transfer rt_mutex_lock rt_mutex_trylock @@ -2097,6 +2113,7 @@ # required by pcie-dw-rockchip.ko cpumask_next_and + debugfs_create_devm_seqfile dw_pcie_find_ext_capability dw_pcie_host_init dw_pcie_link_up @@ -2173,10 +2190,13 @@ clk_bulk_put of_genpd_add_provider_onecell panic + param_get_bool + param_set_bool pm_clk_add_clk pm_genpd_add_subdomain pm_genpd_init pm_genpd_remove + pm_wq # required by pps_core.ko kobject_get @@ -2205,6 +2225,13 @@ pwm_free pwm_request +# required by pwrseq_simple.ko + bitmap_alloc + devm_gpiod_get_array + gpiod_set_array_value_cansleep + mmc_pwrseq_register + mmc_pwrseq_unregister + # required by reboot-mode.ko devres_release kernel_kobj @@ -2218,6 +2245,7 @@ alloc_iova_fast dma_fence_wait_timeout free_iova_fast + idr_alloc_cyclic kstrdup_quotable_cmdline mmput @@ -2226,9 +2254,6 @@ irq_domain_xlate_onetwocell irq_set_parent -# required by rk628_dsi.ko - of_drm_find_panel - # required by rk805-pwrkey.ko devm_request_any_context_irq @@ -2253,6 +2278,10 @@ # required by rk860x-regulator.ko regulator_suspend_enable +# required by rk_cma_heap.ko + dma_heap_get_drvdata + dma_heap_put + # required by rk_crypto.ko crypto_ahash_digest crypto_dequeue_request @@ -2281,8 +2310,15 @@ # required by rk_ircut.ko drain_workqueue +# required by rk_system_heap.ko + deferred_free + dmabuf_page_pool_alloc + dmabuf_page_pool_create + dmabuf_page_pool_destroy + dmabuf_page_pool_free + swiotlb_max_segment + # required by rk_vcodec.ko - devfreq_remove_governor devm_iounmap dev_pm_domain_attach dev_pm_opp_get_freq @@ -2292,9 +2328,7 @@ __fdget iommu_device_unregister iommu_dma_reserve_iova - kthread_flush_worker __kthread_init_worker - kthread_queue_work kthread_worker_fn of_device_alloc of_dma_configure_id @@ -2319,7 +2353,6 @@ # required by rockchip-cpufreq.ko cpufreq_unregister_notifier dev_pm_opp_put_prop_name - dev_pm_opp_set_supported_hw # required by rockchip-hdmirx.ko cec_s_phys_addr_from_edid @@ -2340,6 +2373,9 @@ # required by rockchip_bus.ko cpu_topology +# required by rockchip_debug.ko + nr_irqs + # required by rockchip_dmc.ko cpufreq_cpu_put cpufreq_quick_get @@ -2373,7 +2409,6 @@ regulator_get_linear_step # required by rockchip_pwm_remotectl.ko - irq_to_desc __tasklet_hi_schedule # required by rockchip_saradc.ko @@ -2407,7 +2442,6 @@ component_unbind_all devm_of_phy_get_by_index driver_find_device - drm_add_modes_noedid drm_atomic_commit drm_atomic_get_connector_state drm_atomic_get_plane_state @@ -2441,11 +2475,8 @@ drm_atomic_set_mode_for_crtc drm_atomic_state_alloc __drm_atomic_state_free + drm_bridge_chain_mode_set drm_bridge_get_edid - drm_connector_has_possible_encoder - drm_connector_list_iter_begin - drm_connector_list_iter_end - drm_connector_list_iter_next drm_connector_list_update drm_crtc_cleanup drm_crtc_enable_color_mgmt @@ -2459,9 +2490,7 @@ drm_crtc_vblank_put drm_debugfs_create_files drm_do_get_edid - drm_dp_channel_eq_ok drm_dp_clock_recovery_ok - drm_dp_dpcd_read_link_status drm_dp_get_adjust_request_pre_emphasis drm_dp_get_adjust_request_voltage drm_dp_read_desc @@ -2592,10 +2621,6 @@ sdhci_remove_host sdhci_request -# required by sensor_dev.ko - class_create_file_ns - class_remove_file_ns - # required by sg.ko blk_get_request blk_put_request @@ -2650,9 +2675,6 @@ # required by snd-soc-es8316.ko snd_pcm_hw_constraint_list -# required by snd-soc-es8326.ko - snd_soc_register_component - # required by snd-soc-hdmi-codec.ko snd_ctl_add snd_ctl_new1 @@ -2661,7 +2683,7 @@ snd_pcm_fill_iec958_consumer snd_pcm_fill_iec958_consumer_hw_params snd_pcm_hw_constraint_eld - snd_pcm_stop_xrun + snd_pcm_stop # required by snd-soc-rk817.ko snd_soc_component_exit_regmap @@ -2672,7 +2694,8 @@ # required by snd-soc-rockchip-i2s-tdm.ko clk_is_match - snd_soc_add_dai_controls + pm_runtime_forbid + snd_pcm_stop_xrun # required by snd-soc-rockchip-i2s.ko of_prop_next_string @@ -2682,8 +2705,10 @@ snd_soc_jack_add_zones snd_soc_jack_get_type +# required by snd-soc-rockchip-spdif.ko + snd_pcm_create_iec958_consumer_hw_params + # required by snd-soc-rt5640.ko - gpiod_set_raw_value_cansleep regmap_register_patch snd_soc_dapm_force_bias_level @@ -2780,15 +2805,6 @@ dma_fence_free dma_fence_signal_locked __get_task_comm - put_unused_fd - -# required by system_heap.ko - deferred_free - dmabuf_page_pool_alloc - dmabuf_page_pool_create - dmabuf_page_pool_destroy - dmabuf_page_pool_free - swiotlb_max_segment # required by tcpci_husb311.ko tcpci_get_tcpm_port @@ -2812,6 +2828,7 @@ # required by timer-rockchip.ko clockevents_config_and_register + irq_of_parse_and_map # required by tps65132-regulator.ko regulator_set_active_discharge_regmap @@ -2892,6 +2909,7 @@ # required by video_rkisp.ko media_device_cleanup + __memcpy_fromio __memcpy_toio param_ops_ullong v4l2_pipeline_link_notify @@ -2931,7 +2949,6 @@ # required by zsmalloc.ko alloc_anon_inode - __ClearPageMovable contig_page_data dec_zone_page_state inc_zone_page_state @@ -2940,11 +2957,8 @@ kern_mount kern_unmount kill_anon_super - __lock_page page_mapping _raw_read_lock _raw_read_unlock _raw_write_lock _raw_write_unlock - __SetPageMovable - unlock_page diff --git a/arch/arm64/configs/rockchip_gki.fragment b/arch/arm64/configs/rockchip_gki.fragment index 6253108101a8..1eb35920205b 100644 --- a/arch/arm64/configs/rockchip_gki.fragment +++ b/arch/arm64/configs/rockchip_gki.fragment @@ -50,10 +50,16 @@ CONFIG_DRM_PANEL_SIMPLE=m CONFIG_DRM_RK1000_TVE=m CONFIG_DRM_RK630_TVE=m CONFIG_DRM_ROCKCHIP=m +CONFIG_DRM_ROCKCHIP_RK618=m CONFIG_DRM_ROCKCHIP_RK628=m CONFIG_DRM_ROHM_BU18XL82=m CONFIG_DRM_SII902X=m CONFIG_DTC_SYMBOLS=y +# CONFIG_DWMAC_GENERIC is not set +# CONFIG_DWMAC_IPQ806X is not set +# CONFIG_DWMAC_QCOM_ETHQOS is not set +# CONFIG_DWMAC_SUN8I is not set +# CONFIG_DWMAC_SUNXI is not set CONFIG_DW_WATCHDOG=m CONFIG_GPIO_ROCKCHIP=m CONFIG_GREENASIA_FF=y @@ -146,6 +152,7 @@ CONFIG_MALI_BIFROST_EXPERT=y CONFIG_MALI_CSF_SUPPORT=y CONFIG_MALI_PLATFORM_NAME="rk" CONFIG_MALI_PWRSOFT_765=y +CONFIG_MFD_RK618=m CONFIG_MFD_RK628=m CONFIG_MFD_RK630_I2C=m CONFIG_MFD_RK806_SPI=m @@ -186,6 +193,7 @@ CONFIG_PROXIMITY_DEVICE=m CONFIG_PS_STK3410=m CONFIG_PS_UCS14620=m CONFIG_PWM_ROCKCHIP=m +CONFIG_PWRSEQ_SIMPLE=m CONFIG_REGULATOR_ACT8865=m CONFIG_REGULATOR_FAN53555=m CONFIG_REGULATOR_GPIO=m @@ -236,6 +244,7 @@ CONFIG_ROCKCHIP_PM_DOMAINS=m CONFIG_ROCKCHIP_PVTM=m CONFIG_ROCKCHIP_REMOTECTL=m CONFIG_ROCKCHIP_REMOTECTL_PWM=m +CONFIG_ROCKCHIP_MULTI_RGA=m CONFIG_ROCKCHIP_RGB=y CONFIG_ROCKCHIP_RKNPU=m CONFIG_ROCKCHIP_SARADC=m @@ -278,6 +287,7 @@ CONFIG_SND_SOC_RT5640=m CONFIG_SND_SOC_SPDIF=m CONFIG_SPI_ROCKCHIP=m CONFIG_SPI_SPIDEV=m +CONFIG_STMMAC_ETH=m CONFIG_SW_SYNC=m CONFIG_SYSCON_REBOOT_MODE=m CONFIG_TEE=m @@ -328,8 +338,8 @@ CONFIG_VIDEO_RK628_BT1120=m CONFIG_VIDEO_RK628_CSI=m CONFIG_VIDEO_RK_IRCUT=m CONFIG_VIDEO_ROCKCHIP_CIF=m +CONFIG_VIDEO_ROCKCHIP_HDMIRX=m CONFIG_VIDEO_ROCKCHIP_ISP=m -CONFIG_VIDEO_ROCKCHIP_ISPP=m CONFIG_VIDEO_S5K3L6XX=m CONFIG_VIDEO_S5KJN1=m CONFIG_VIDEO_SGM3784=m From 91e4675508a2633a8c9d178912f40fd9b0adb299 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Jan 2023 22:29:52 +0000 Subject: [PATCH 148/186] ANDROID: fips140: add crypto_memneq() back to the module crypto_memneq() is one of the utility functions that was intentionally included in the fips140 module, out of concerns that it would be seen as "cryptographic" and thus would be required to be included the module for the FIPS certification. It should not have been removed from the module, so add it back. Bug: 188620248 Fixes: 18cd39b70602 ("Merge tag 'android12-5.10.136_r00' into android12-5.10") Change-Id: I8a19dfd73390f8c1348885f97fa42d900e47b82b Signed-off-by: Eric Biggers --- crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Makefile b/crypto/Makefile index 2785e5fab9e7..8be22a6ea3d8 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -214,7 +214,7 @@ crypto-fips-objs := drbg.o ecb.o cbc.o ctr.o cts.o gcm.o xts.o hmac.o cmac.o \ gf128mul.o aes_generic.o lib-crypto-aes.o \ jitterentropy.o jitterentropy-kcapi.o \ sha1_generic.o sha256_generic.o sha512_generic.o \ - lib-sha1.o lib-crypto-sha256.o + lib-memneq.o lib-sha1.o lib-crypto-sha256.o crypto-fips-objs := $(foreach o,$(crypto-fips-objs),$(o:.o=-fips.o)) # get the arch to add its objects to $(crypto-fips-objs) From 447ba7ae757cd8ff9900875586396d64030933c1 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Mon, 9 Jan 2023 16:15:31 +0800 Subject: [PATCH 149/186] ANDROID: GKI: VIVO: Add a symbol to symbol list Add 'dentry_path_raw' symbol to support some monitoring tools. This patch does not add or remove symbol from xml file. Bug: 264831214 Change-Id: I2b5aaa2945c5fd0ebe4062915b53407251a6ab77 Signed-off-by: Wu Bo --- android/abi_gki_aarch64_vivo | 1 + 1 file changed, 1 insertion(+) diff --git a/android/abi_gki_aarch64_vivo b/android/abi_gki_aarch64_vivo index c3bc87e17c9f..28c4b05aa43f 100644 --- a/android/abi_gki_aarch64_vivo +++ b/android/abi_gki_aarch64_vivo @@ -277,6 +277,7 @@ del_gendisk del_timer del_timer_sync + dentry_path_raw desc_to_gpio destroy_workqueue dev_coredumpv From 824c55581dde8710103b10f8968050f2cc9bf3fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Sep 2021 11:24:05 -0700 Subject: [PATCH 150/186] UPSTREAM: Enable '-Werror' by default for all kernel builds ... but make it a config option so that broken environments can disable it when required. We really should always have a clean build, and will disable specific over-eager warnings as required, if we can't fix them. But while I fairly religiously enforce that in my own tree, it doesn't get enforced by various build robots that don't necessarily report warnings. So this just makes '-Werror' a default compiler flag, but allows people to disable it for their configuration if they have some particular issues. Occasionally, new compiler versions end up enabling new warnings, and it can take a while before we have them fixed (or the warnings disabled if that is what it takes), so the config option allows for that situation. Hopefully this will mean that I get fewer pull requests that have new warnings that were not noticed by various automation we have in place. Knock wood. Signed-off-by: Linus Torvalds (cherry picked from commit 3fe617ccafd6f5bb33c2391d6f4eeb41c1fd0151) Signed-off-by: Alistair Delva Change-Id: If981f26ebe668be7c727661fede10215c4ee5bc5 --- Makefile | 3 +++ init/Kconfig | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/Makefile b/Makefile index a413e6c0c15c..e8b8d5894f9e 100644 --- a/Makefile +++ b/Makefile @@ -793,6 +793,9 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) +KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) + ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments KBUILD_CFLAGS += -Wno-format-invalid-specifier diff --git a/init/Kconfig b/init/Kconfig index 6c119a4010ee..f7b680e39f65 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -131,6 +131,20 @@ config COMPILE_TEST here. If you are a user/distributor, say N here to exclude useless drivers to be distributed. +config WERROR + bool "Compile the kernel with warnings as errors" + default y + help + A kernel build should not cause any compiler warnings, and this + enables the '-Werror' flag to enforce that rule by default. + + However, if you have a new (or very old) compiler with odd and + unusual warnings, or you have some architecture with problems, + you may need to disable this config option in order to + successfully build the kernel. + + If in doubt, say Y. + config UAPI_HEADER_TEST bool "Compile test UAPI headers" depends on HEADERS_INSTALL && CC_CAN_LINK From 7c31ae524c45378af224f52d47caaa117a582490 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Dec 2021 14:13:22 +0100 Subject: [PATCH 151/186] ANDROID: allmodconfig: disable WERROR -Werror still fails on some arm and arm64 code due to clang issues (works on gcc!), so disable it when building allmodconfig builds for now. Hopefully the clang developers will work on this... Bug: 199872592 Signed-off-by: Greg Kroah-Hartman Change-Id: I6ccc856773c40e3c0f541a1316b20e9ae3de4380 (cherry picked from commit eb57c31115051c5404d1bb1f2daec20e051b0287) Signed-off-by: Alistair Delva --- build.config.allmodconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/build.config.allmodconfig b/build.config.allmodconfig index a48f8d420208..e5cbe7faa60a 100644 --- a/build.config.allmodconfig +++ b/build.config.allmodconfig @@ -9,6 +9,7 @@ function update_config() { -d CPU_BIG_ENDIAN \ -d DYNAMIC_FTRACE \ -e UNWINDER_FRAME_POINTER \ + -d WERROR \ (cd ${OUT_DIR} && \ make O=${OUT_DIR} $archsubarch CROSS_COMPILE=${CROSS_COMPILE} "${TOOL_ARGS[@]}" ${MAKE_ARGS} olddefconfig) From 25280f263d3c5f3d29fd00fc7bb032e99a17cfaa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Nov 2022 09:43:39 +0100 Subject: [PATCH 152/186] UPSTREAM: io_uring: kill goto error handling in io_sqpoll_wait_sq() Hunk extracted from commit 70aacfe66136809d7f080f89c492c278298719f4 upstream. If the sqpoll thread has died, the out condition doesn't remove the waiting task from the waitqueue. The goto and check are not needed, just make it a break condition after setting the error value. That ensures that we always remove ourselves from sqo_sq_wait waitqueue. Bug: 259534862 Reported-by: Xingyuan Mo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0f544353fec8e717d37724d95b92538e1de79e86) Signed-off-by: Lee Jones Change-Id: I453c3e23a2f0c5ce6a8dd73dac020ec6f32994ca --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5538906e47fe..7baab885c6f2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9029,7 +9029,7 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) if (unlikely(ctx->sqo_dead)) { ret = -EOWNERDEAD; - goto out; + break; } if (!io_sqring_full(ctx)) @@ -9039,7 +9039,6 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } From 3112d6f5021988af4a150be1142ee68419463f38 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Aug 2021 10:23:48 +0800 Subject: [PATCH 153/186] BACKPORT: f2fs: extent cache: support unaligned extent Compressed inode may suffer read performance issue due to it can not use extent cache, so I propose to add this unaligned extent support to improve it. Currently, it only works in readonly format f2fs image. Unaligned extent: in one compressed cluster, physical block number will be less than logical block number, so we add an extra physical block length in extent info in order to indicate such extent status. The idea is if one whole cluster blocks are contiguous physically, once its mapping info was readed at first time, we will cache an unaligned (or aligned) extent info entry in extent cache, it expects that the mapping info will be hitted when rereading cluster. Merge policy: - Aligned extents can be merged. - Aligned extent and unaligned extent can not be merged. Bug: 264453689 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim (cherry picked from commit 627371ed31cf9f56ba66ad8dfabbc7bc9b7a4816) Change-Id: I106279145558f38dfa295c3e99fa03f6fcd306f4 --- fs/f2fs/compress.c | 24 ++++++++++++++++++++++++ fs/f2fs/data.c | 38 +++++++++++++++++++++++++++----------- fs/f2fs/extent_cache.c | 41 +++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 20 ++++++++++++++++++++ fs/f2fs/node.c | 20 ++++++++++++++++++++ 5 files changed, 132 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 12c789f879a1..3116b38c2f6b 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1660,6 +1660,30 @@ void f2fs_put_page_dic(struct page *page) f2fs_put_dic(dic); } +/* + * check whether cluster blocks are contiguous, and add extent cache entry + * only if cluster blocks are logically and physically contiguous. + */ +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +{ + bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + int i = compressed ? 1 : 0; + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { + block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + break; + if (first_blkaddr + i - (compressed ? 1 : 0) != blkaddr) + return 0; + } + + return compressed ? i - 1 : i; +} + const struct address_space_operations f2fs_compress_aops = { .releasepage = f2fs_release_page, .invalidatepage = f2fs_invalidate_page, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 867b2b72ec67..279c2bf4d244 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1151,7 +1151,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) { - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; struct inode *inode = dn->inode; if (f2fs_lookup_extent_cache(inode, index, &ei)) { @@ -1168,7 +1168,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei = {0,0,0}; + struct extent_info ei = {0, }; int err; page = f2fs_grab_cache_page(mapping, index, for_write); @@ -1466,7 +1466,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; blkcnt_t prealloc; - struct extent_info ei = {0,0,0}; + struct extent_info ei = {0, }; block_t blkaddr; unsigned int start_pgofs; @@ -2156,6 +2156,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, sector_t last_block_in_file; const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; + struct extent_info ei = {0, }; + bool from_dnode = true; int i; int ret = 0; @@ -2188,6 +2190,12 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (f2fs_cluster_is_empty(cc)) goto out; + if (f2fs_lookup_extent_cache(inode, start_idx, &ei)) + from_dnode = false; + + if (!from_dnode) + goto skip_reading_dnode; + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); if (ret) @@ -2195,11 +2203,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); +skip_reading_dnode: for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = data_blkaddr(dn.inode, dn.node_page, - dn.ofs_in_node + i); + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, + dn.ofs_in_node + i) : + ei.blk + i - 1; if (!__is_valid_data_blkaddr(blkaddr)) break; @@ -2209,6 +2219,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto out_put_dnode; } cc->nr_cpages++; + + if (!from_dnode && i >= ei.c_len) + break; } /* nothing to decompress */ @@ -2228,8 +2241,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, block_t blkaddr; struct bio_post_read_ctx *ctx; - blkaddr = data_blkaddr(dn.inode, dn.node_page, - dn.ofs_in_node + i + 1); + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, + dn.ofs_in_node + i + 1) : + ei.blk + i; f2fs_wait_on_block_writeback(inode, blkaddr); @@ -2274,13 +2288,15 @@ submit_and_realloc: *last_block_in_bio = blkaddr; } - f2fs_put_dnode(&dn); + if (from_dnode) + f2fs_put_dnode(&dn); *bio_ret = bio; return 0; out_put_dnode: - f2fs_put_dnode(&dn); + if (from_dnode) + f2fs_put_dnode(&dn); out: for (i = 0; i < cc->cluster_size; i++) { if (cc->rpages[i]) { @@ -2584,7 +2600,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; - struct extent_info ei = {0,0,0}; + struct extent_info ei = {0, }; struct node_info ni; bool ipu_force = false; int err = 0; @@ -3265,7 +3281,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, struct dnode_of_data dn; struct page *ipage; bool locked = false; - struct extent_info ei = {0,0,0}; + struct extent_info ei = {0, }; int err = 0; int flag; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 3ebf976a682d..b120589d8517 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -661,6 +661,47 @@ static void f2fs_update_extent_tree_range(struct inode *inode, f2fs_mark_inode_dirty_sync(inode, true); } +#ifdef CONFIG_F2FS_FS_COMPRESSION +void f2fs_update_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, unsigned int llen, + unsigned int c_len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_node *en = NULL; + struct extent_node *prev_en = NULL, *next_en = NULL; + struct extent_info ei; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + bool leftmost = false; + + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen); + + /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return; + + write_lock(&et->lock); + + en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, + (struct rb_entry *)et->cached_en, fofs, + (struct rb_entry **)&prev_en, + (struct rb_entry **)&next_en, + &insert_p, &insert_parent, false, + &leftmost); + if (en) + goto unlock_out; + + set_extent_info(&ei, fofs, blkaddr, llen); + ei.c_len = c_len; + + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, + insert_p, insert_parent, leftmost); +unlock_out: + write_unlock(&et->lock); +} +#endif + unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { struct extent_tree *et, *next; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d1af6cc71581..441c05968aa2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -585,6 +585,9 @@ struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ u32 blk; /* start block address of the extent */ +#ifdef CONFIG_F2FS_FS_COMPRESSION + unsigned int c_len; /* physical extent length of compressed blocks */ +#endif }; struct extent_node { @@ -805,6 +808,9 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->fofs = fofs; ei->blk = blk; ei->len = len; +#ifdef CONFIG_F2FS_FS_COMPRESSION + ei->c_len = 0; +#endif } static inline bool __is_discard_mergeable(struct discard_info *back, @@ -829,6 +835,12 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur, static inline bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (back->c_len && back->len != back->c_len) + return false; + if (front->c_len && front->len != front->c_len) + return false; +#endif return (back->fofs + back->len == front->fofs && back->blk + back->len == front->blk); } @@ -4146,12 +4158,16 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +void f2fs_update_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, unsigned int llen, + unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); void f2fs_put_page_dic(struct page *page); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4206,6 +4222,7 @@ static inline void f2fs_put_page_dic(struct page *page) { WARN_ON_ONCE(1); } +static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; } @@ -4221,6 +4238,9 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, unsigned int llen, + unsigned int c_len) { } #endif static inline int set_compress_context(struct inode *inode) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ad96f7060c4d..2c71bcd2eee0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -846,6 +846,26 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) dn->ofs_in_node = offset[level]; dn->node_page = npage[level]; dn->data_blkaddr = f2fs_data_blkaddr(dn); + + if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && + f2fs_sb_has_readonly(sbi)) { + unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + block_t blkaddr; + + if (!c_len) + goto out; + + blkaddr = f2fs_data_blkaddr(dn); + if (blkaddr == COMPRESS_ADDR) + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + 1); + + f2fs_update_extent_tree_range_compressed(dn->inode, + index, blkaddr, + F2FS_I(dn->inode)->i_cluster_size, + c_len); + } +out: return 0; release_pages: From f6b4d18df0db11ddb18515e736ae9d4961c8a00c Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 5 Sep 2022 12:59:17 +0800 Subject: [PATCH 154/186] BACKPORT: f2fs: fix race condition on setting FI_NO_EXTENT flag The following scenarios exist. process A: process B: ->f2fs_drop_extent_tree ->f2fs_update_extent_cache_range ->f2fs_update_extent_tree_range ->write_lock ->set_inode_flag ->is_inode_flag_set ->__free_extent_tree // Shouldn't // have been // cleaned up // here ->write_lock In this case, the "FI_NO_EXTENT" flag is set between f2fs_update_extent_tree_range and is_inode_flag_set by other process. it leads to clearing the whole exten tree which should not have happened. And we fix it by move the setting it to the range of write_lock. Bug: 264453689 Fixes: 5f281fab9b9a3 ("f2fs: disable extent_cache for fcollapse/finsert inodes") Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim (cherry picked from commit a597af3b5a967e810cc8155abaa49abf10d6c417) Change-Id: If36c3556c9062d46509f704f0491a7e6ad652ae6 --- fs/f2fs/extent_cache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index b120589d8517..3a0d9401cb9b 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -803,9 +803,8 @@ void f2fs_drop_extent_tree(struct inode *inode) if (!f2fs_may_extent_tree(inode)) return; - set_inode_flag(inode, FI_NO_EXTENT); - write_lock(&et->lock); + set_inode_flag(inode, FI_NO_EXTENT); __free_extent_tree(sbi, et); if (et->largest.len) { et->largest.len = 0; From 02cb04cb05a8f694346cd0c7988d6d432f7bc811 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 19 Sep 2022 19:57:09 +0800 Subject: [PATCH 155/186] BACKPORT: f2fs: add "c_len" into trace_f2fs_update_extent_tree_range for compressed file The trace_f2fs_update_extent_tree_range could not record compressed block length in the cluster of compress file and we just add it. Bug: 264453689 Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim (cherry picked from commit a95694e33ce998e808b3a72164e8df3b5e0faf87) Change-Id: Ic3702b2735be27cf1eab34191313b162805a010a --- fs/f2fs/extent_cache.c | 4 ++-- include/trace/events/f2fs.h | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 3a0d9401cb9b..d837750f14f6 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -543,7 +543,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (!et) return; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len); + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0); write_lock(&et->lock); @@ -674,7 +674,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; bool leftmost = false; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen); + trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len); /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ if (is_inode_flag_set(inode, FI_NO_EXTENT)) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index df293bc7f03b..02c1ccc4f925 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1586,9 +1586,10 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, TRACE_EVENT(f2fs_update_extent_tree_range, TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr, - unsigned int len), + unsigned int len, + unsigned int c_len), - TP_ARGS(inode, pgofs, blkaddr, len), + TP_ARGS(inode, pgofs, blkaddr, len, c_len), TP_STRUCT__entry( __field(dev_t, dev) @@ -1596,6 +1597,7 @@ TRACE_EVENT(f2fs_update_extent_tree_range, __field(unsigned int, pgofs) __field(u32, blk) __field(unsigned int, len) + __field(unsigned int, c_len) ), TP_fast_assign( @@ -1604,14 +1606,17 @@ TRACE_EVENT(f2fs_update_extent_tree_range, __entry->pgofs = pgofs; __entry->blk = blkaddr; __entry->len = len; + __entry->c_len = c_len; ), TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " - "blkaddr = %u, len = %u", + "blkaddr = %u, len = %u, " + "c_len = %u", show_dev_ino(__entry), __entry->pgofs, __entry->blk, - __entry->len) + __entry->len, + __entry->c_len) ); TRACE_EVENT(f2fs_shrink_extent_tree, From b29b3bd7e10d890e56e3e4a4fc31a575b9beb8db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 09:36:43 -0800 Subject: [PATCH 156/186] BACKPORT: f2fs: specify extent cache for read explicitly Let's descrbie it's read extent cache. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit 29539ed310a92b5eb1e6ce8906087536b6259d58) Change-Id: I9cb258b7ee152ead1458ef8adb3e542532f152ce --- fs/f2fs/extent_cache.c | 4 ++-- fs/f2fs/f2fs.h | 10 +++++----- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 2 +- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 12 ++++++------ 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d837750f14f6..3129347a0f66 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -382,7 +382,7 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) if (!i_ext || !i_ext->len) return; - get_extent_info(&ei, i_ext); + get_read_extent_info(&ei, i_ext); write_lock(&et->lock); if (atomic_read(&et->node_cnt)) @@ -709,7 +709,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) unsigned int node_cnt = 0, tree_cnt = 0; int remained; - if (!test_opt(sbi, EXTENT_CACHE)) + if (!test_opt(sbi, READ_EXTENT_CACHE)) return 0; if (!atomic_read(&sbi->total_zombie_tree)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 441c05968aa2..09ccdb0d599f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -84,7 +84,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 -#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 +#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_USRQUOTA 0x00080000 @@ -568,7 +568,7 @@ enum { #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ /* number of extent info in extent cache we try to shrink */ -#define EXTENT_CACHE_SHRINK_NUMBER 128 +#define READ_EXTENT_CACHE_SHRINK_NUMBER 128 struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ @@ -786,7 +786,7 @@ struct f2fs_inode_info { unsigned int i_cluster_size; /* cluster size */ }; -static inline void get_extent_info(struct extent_info *ext, +static inline void get_read_extent_info(struct extent_info *ext, struct f2fs_extent *i_ext) { ext->fofs = le32_to_cpu(i_ext->fofs); @@ -794,7 +794,7 @@ static inline void get_extent_info(struct extent_info *ext, ext->len = le32_to_cpu(i_ext->len); } -static inline void set_raw_extent(struct extent_info *ext, +static inline void set_raw_read_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { i_ext->fofs = cpu_to_le32(ext->fofs); @@ -4314,7 +4314,7 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (!test_opt(sbi, EXTENT_CACHE) || + if (!test_opt(sbi, READ_EXTENT_CACHE) || is_inode_flag_set(inode, FI_NO_EXTENT) || (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(sbi))) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 29bf3e215f52..0b54b678137f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -590,7 +590,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (et) { read_lock(&et->lock); - set_raw_extent(&et->largest, &ri->i_ext); + set_raw_read_extent(&et->largest, &ri->i_ext); read_unlock(&et->lock); } else { memset(&ri->i_ext, 0, sizeof(ri->i_ext)); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2c71bcd2eee0..a55b3676233c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -83,7 +83,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); - } else if (type == EXTENT_CACHE) { + } else if (type == READ_EXTENT_CACHE) { mem_size = (atomic_read(&sbi->total_ext_tree) * sizeof(struct extent_tree) + atomic_read(&sbi->total_ext_node) * diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index ff14a6e5ac1c..2c152e677a06 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -148,7 +148,7 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ - EXTENT_CACHE, /* indicates extent cache */ + READ_EXTENT_CACHE, /* indicates read extent cache */ INMEM_PAGES, /* indicates inmemory pages */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fdb41cb3fd68..ef599fbab740 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -536,8 +536,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) return; /* try to shrink extent cache when there is no enough memory */ - if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) - f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE)) + f2fs_shrink_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 89d90d366754..98da6a8636e0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -753,10 +753,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, FASTBOOT); break; case Opt_extent_cache: - set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: - clear_opt(sbi, EXTENT_CACHE); + clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: clear_opt(sbi, INLINE_DATA); @@ -1817,7 +1817,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",nobarrier"); if (test_opt(sbi, FASTBOOT)) seq_puts(seq, ",fastboot"); - if (test_opt(sbi, EXTENT_CACHE)) + if (test_opt(sbi, READ_EXTENT_CACHE)) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); @@ -1922,7 +1922,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, READ_EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, MERGE_CHECKPOINT); @@ -2042,7 +2042,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false, need_stop_gc = false; bool need_restart_ckpt = false, need_stop_ckpt = false; bool need_restart_flush = false, need_stop_flush = false; - bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); @@ -2132,7 +2132,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } /* disallow enable/disable extent_cache dynamically */ - if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { + if (no_read_extent_cache == !!test_opt(sbi, READ_EXTENT_CACHE)) { err = -EINVAL; f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; From bf3cafe7f197b599848f075c3976b3ce195fa9a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 09:44:58 -0800 Subject: [PATCH 157/186] BACKPORT: f2fs: move internal functions into extent_cache.c No functional change. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit f458f39556cec5660a5508fbbeea4917bc46bc9c) Change-Id: I6171ab293b214a2eeb3f420517216371ee29557e --- fs/f2fs/extent_cache.c | 88 +++++++++++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 69 +-------------------------------- 2 files changed, 81 insertions(+), 76 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 3129347a0f66..55319f1ed9d1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -15,6 +15,77 @@ #include "node.h" #include +static void __set_extent_info(struct extent_info *ei, + unsigned int fofs, unsigned int len, + block_t blk, bool keep_clen) +{ + ei->fofs = fofs; + ei->blk = blk; + ei->len = len; + + if (keep_clen) + return; + +#ifdef CONFIG_F2FS_FS_COMPRESSION + ei->c_len = 0; +#endif +} + +static bool f2fs_may_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&sbi->s_list)) + return false; + + if (!test_opt(sbi, READ_EXTENT_CACHE) || + is_inode_flag_set(inode, FI_NO_EXTENT) || + (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(sbi))) + return false; + + return S_ISREG(inode->i_mode); +} + +static void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) +{ + if (en->ei.len <= et->largest.len) + return; + + et->largest = en->ei; + et->largest_updated = true; +} + +static bool __is_extent_mergeable(struct extent_info *back, + struct extent_info *front) +{ +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (back->c_len && back->len != back->c_len) + return false; + if (front->c_len && front->len != front->c_len) + return false; +#endif + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); +} + +static bool __is_back_mergeable(struct extent_info *cur, + struct extent_info *back) +{ + return __is_extent_mergeable(back, cur); +} + +static bool __is_front_mergeable(struct extent_info *cur, + struct extent_info *front) +{ + return __is_extent_mergeable(cur, front); +} + static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, unsigned int ofs) { @@ -590,16 +661,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) { if (parts) { - set_extent_info(&ei, end, - end - dei.fofs + dei.blk, - org_end - end); + __set_extent_info(&ei, + end, org_end - end, + end - dei.fofs + dei.blk, false); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); next_en = en1; } else { - en->ei.fofs = end; - en->ei.blk += end - dei.fofs; - en->ei.len -= end - dei.fofs; + __set_extent_info(&en->ei, + end, en->ei.len - (end - dei.fofs), + en->ei.blk + (end - dei.fofs), true); next_en = en; } parts++; @@ -631,8 +702,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, /* 3. update extent in extent cache */ if (blkaddr) { - - set_extent_info(&ei, fofs, blkaddr, len); + __set_extent_info(&ei, fofs, len, blkaddr, false); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -691,7 +761,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - set_extent_info(&ei, fofs, blkaddr, llen); + __set_extent_info(&ei, fofs, llen, blkaddr, true); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 09ccdb0d599f..3ecb53f2f67f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -584,7 +584,7 @@ struct rb_entry { struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ - u32 blk; /* start block address of the extent */ + block_t blk; /* start block address of the extent */ #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned int c_len; /* physical extent length of compressed blocks */ #endif @@ -802,17 +802,6 @@ static inline void set_raw_read_extent(struct extent_info *ext, i_ext->len = cpu_to_le32(ext->len); } -static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, - u32 blk, unsigned int len) -{ - ei->fofs = fofs; - ei->blk = blk; - ei->len = len; -#ifdef CONFIG_F2FS_FS_COMPRESSION - ei->c_len = 0; -#endif -} - static inline bool __is_discard_mergeable(struct discard_info *back, struct discard_info *front, unsigned int max_len) { @@ -832,41 +821,6 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur, return __is_discard_mergeable(cur, front, max_len); } -static inline bool __is_extent_mergeable(struct extent_info *back, - struct extent_info *front) -{ -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (back->c_len && back->len != back->c_len) - return false; - if (front->c_len && front->len != front->c_len) - return false; -#endif - return (back->fofs + back->len == front->fofs && - back->blk + back->len == front->blk); -} - -static inline bool __is_back_mergeable(struct extent_info *cur, - struct extent_info *back) -{ - return __is_extent_mergeable(back, cur); -} - -static inline bool __is_front_mergeable(struct extent_info *cur, - struct extent_info *front) -{ - return __is_extent_mergeable(cur, front); -} - -extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct extent_tree *et, - struct extent_node *en) -{ - if (en->ei.len > et->largest.len) { - et->largest = en->ei; - et->largest_updated = true; - } -} - /* * For free nid management */ @@ -2492,6 +2446,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } +extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { @@ -4310,26 +4265,6 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); -static inline bool f2fs_may_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, READ_EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT) || - (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi))) - return false; - - /* - * for recovered files during mount do not create extents - * if shrinker is not registered. - */ - if (list_empty(&sbi->s_list)) - return false; - - return S_ISREG(inode->i_mode); -} - #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, block_t blkaddr) From 561e9febb3fe985a68bca33c35b3395ef141334b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 10:01:18 -0800 Subject: [PATCH 158/186] BACKPORT: f2fs: remove unnecessary __init_extent_tree Added into the caller. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit 0b461f459f0f53ff29b0ff98d4c18808b4248dcc) Change-Id: Ic52ba22c00055bfe85bb789e6fd057d5fa84c00f --- fs/f2fs/extent_cache.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 55319f1ed9d1..adee87d4b18c 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -385,21 +385,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) return et; } -static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, struct extent_info *ei) -{ - struct rb_node **p = &et->root.rb_root.rb_node; - struct extent_node *en; - - en = __attach_extent_node(sbi, et, ei, NULL, p, true); - if (!en) - return NULL; - - et->largest = en->ei; - et->cached_en = en; - return en; -} - static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et) { @@ -459,8 +444,12 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) if (atomic_read(&et->node_cnt)) goto out; - en = __init_extent_tree(sbi, et, &ei); + en = __attach_extent_node(sbi, et, &ei, NULL, + &et->root.rb_root.rb_node, true); if (en) { + et->largest = en->ei; + et->cached_en = en; + spin_lock(&sbi->extent_lock); list_add_tail(&en->list, &sbi->extent_list); spin_unlock(&sbi->extent_lock); From 72e9dd90cf433ca66645732af5e073f37060df34 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Nov 2022 09:26:29 -0800 Subject: [PATCH 159/186] BACKPORT: f2fs: refactor extent_cache to support for read and more This patch prepares extent_cache to be ready for addition. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit 7cf42d77c7242d23988215297bdd2d215e208b6f) Change-Id: I5ca06c274529187b804ddd4b0834dc44fe6aa8ad --- fs/f2fs/data.c | 18 +- fs/f2fs/debug.c | 65 +++-- fs/f2fs/extent_cache.c | 465 +++++++++++++++++++++--------------- fs/f2fs/f2fs.h | 111 ++++++--- fs/f2fs/file.c | 8 +- fs/f2fs/gc.c | 4 +- fs/f2fs/inode.c | 6 +- fs/f2fs/node.c | 8 +- fs/f2fs/segment.c | 3 +- fs/f2fs/shrinker.c | 19 +- include/trace/events/f2fs.h | 62 +++-- 11 files changed, 466 insertions(+), 303 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 279c2bf4d244..991b4f9593b1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1085,7 +1085,7 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { dn->data_blkaddr = blkaddr; f2fs_set_data_blkaddr(dn); - f2fs_update_extent_cache(dn); + f2fs_update_read_extent_cache(dn); } /* dn->ofs_in_node will be returned with up-to-date last block pointer */ @@ -1154,7 +1154,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) struct extent_info ei = {0, }; struct inode *inode = dn->inode; - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn->data_blkaddr = ei.blk + index - ei.fofs; return 0; } @@ -1175,7 +1175,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (!page) return ERR_PTR(-ENOMEM); - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { @@ -1480,7 +1480,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, pgofs = (pgoff_t)map->m_lblk; end = pgofs + maxblocks; - if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) { if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1654,7 +1654,7 @@ skip: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_extent_cache_range(&dn, + f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } @@ -1679,7 +1679,7 @@ sync_out: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_extent_cache_range(&dn, + f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } @@ -2190,7 +2190,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (f2fs_cluster_is_empty(cc)) goto out; - if (f2fs_lookup_extent_cache(inode, start_idx, &ei)) + if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei)) from_dnode = false; if (!from_dnode) @@ -2607,7 +2607,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_extent_cache(inode, page->index, &ei)) { + f2fs_lookup_read_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, @@ -3332,7 +3332,7 @@ restart: } else if (locked) { err = f2fs_get_block(&dn, index); } else { - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 53ed1e9191f0..75564077f2e9 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -72,15 +72,23 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->main_area_zones = si->main_area_sections / le32_to_cpu(raw_super->secs_per_zone); - /* validation check of the segment numbers */ + /* general extent cache stats */ + for (i = 0; i < NR_EXTENT_CACHES; i++) { + struct extent_tree_info *eti = &sbi->extent_tree[i]; + + si->hit_cached[i] = atomic64_read(&sbi->read_hit_cached[i]); + si->hit_rbtree[i] = atomic64_read(&sbi->read_hit_rbtree[i]); + si->total_ext[i] = atomic64_read(&sbi->total_hit_ext[i]); + si->hit_total[i] = si->hit_cached[i] + si->hit_rbtree[i]; + si->ext_tree[i] = atomic_read(&eti->total_ext_tree); + si->zombie_tree[i] = atomic_read(&eti->total_zombie_tree); + si->ext_node[i] = atomic_read(&eti->total_ext_node); + } + /* read extent_cache only */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); - si->hit_cached = atomic64_read(&sbi->read_hit_cached); - si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); - si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; - si->total_ext = atomic64_read(&sbi->total_hit_ext); - si->ext_tree = atomic_read(&sbi->total_ext_tree); - si->zombie_tree = atomic_read(&sbi->total_zombie_tree); - si->ext_node = atomic_read(&sbi->total_ext_node); + si->hit_total[EX_READ] += si->hit_largest; + + /* validation check of the segment numbers */ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); @@ -299,10 +307,16 @@ get_cache: si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); - si->cache_mem += atomic_read(&sbi->total_ext_tree) * + + for (i = 0; i < NR_EXTENT_CACHES; i++) { + struct extent_tree_info *eti = &sbi->extent_tree[i]; + + si->ext_mem[i] = atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree); - si->cache_mem += atomic_read(&sbi->total_ext_node) * + si->ext_mem[i] += atomic_read(&eti->total_ext_node) * sizeof(struct extent_node); + si->cache_mem += si->ext_mem[i]; + } si->page_mem = 0; if (sbi->node_inode) { @@ -471,16 +485,18 @@ static int stat_show(struct seq_file *s, void *v) si->skipped_atomic_files[BG_GC]); seq_printf(s, "BG skip : IO: %u, Other: %u\n", si->io_skip_bggc, si->other_skip_bggc); - seq_puts(s, "\nExtent Cache:\n"); + seq_puts(s, "\nExtent Cache (Read):\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", - si->hit_largest, si->hit_cached, - si->hit_rbtree); + si->hit_largest, si->hit_cached[EX_READ], + si->hit_rbtree[EX_READ]); seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", - !si->total_ext ? 0 : - div64_u64(si->hit_total * 100, si->total_ext), - si->hit_total, si->total_ext); + !si->total_ext[EX_READ] ? 0 : + div64_u64(si->hit_total[EX_READ] * 100, + si->total_ext[EX_READ]), + si->hit_total[EX_READ], si->total_ext[EX_READ]); seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", - si->ext_tree, si->zombie_tree, si->ext_node); + si->ext_tree[EX_READ], si->zombie_tree[EX_READ], + si->ext_node[EX_READ]); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - DIO (R: %4d, W: %4d)\n", si->nr_dio_read, si->nr_dio_write); @@ -546,8 +562,10 @@ static int stat_show(struct seq_file *s, void *v) (si->base_mem + si->cache_mem + si->page_mem) >> 10); seq_printf(s, " - static: %llu KB\n", si->base_mem >> 10); - seq_printf(s, " - cached: %llu KB\n", + seq_printf(s, " - cached all: %llu KB\n", si->cache_mem >> 10); + seq_printf(s, " - read extent cache: %llu KB\n", + si->ext_mem[EX_READ] >> 10); seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } @@ -579,10 +597,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; - atomic64_set(&sbi->total_hit_ext, 0); - atomic64_set(&sbi->read_hit_rbtree, 0); + /* general extent cache stats */ + for (i = 0; i < NR_EXTENT_CACHES; i++) { + atomic64_set(&sbi->total_hit_ext[i], 0); + atomic64_set(&sbi->read_hit_rbtree[i], 0); + atomic64_set(&sbi->read_hit_cached[i], 0); + } + + /* read extent_cache only */ atomic64_set(&sbi->read_hit_largest, 0); - atomic64_set(&sbi->read_hit_cached, 0); atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_inode, 0); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index adee87d4b18c..e2ef1b99511b 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -17,21 +17,37 @@ static void __set_extent_info(struct extent_info *ei, unsigned int fofs, unsigned int len, - block_t blk, bool keep_clen) + block_t blk, bool keep_clen, + enum extent_type type) { ei->fofs = fofs; - ei->blk = blk; ei->len = len; - if (keep_clen) - return; - + if (type == EX_READ) { + ei->blk = blk; + if (keep_clen) + return; #ifdef CONFIG_F2FS_FS_COMPRESSION - ei->c_len = 0; + ei->c_len = 0; #endif + } } -static bool f2fs_may_extent_tree(struct inode *inode) +static bool __may_read_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, READ_EXTENT_CACHE)) + return false; + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(sbi)) + return false; + return S_ISREG(inode->i_mode); +} + +static bool __may_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -42,18 +58,16 @@ static bool f2fs_may_extent_tree(struct inode *inode) if (list_empty(&sbi->s_list)) return false; - if (!test_opt(sbi, READ_EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT) || - (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi))) - return false; - - return S_ISREG(inode->i_mode); + if (type == EX_READ) + return __may_read_extent_tree(inode); + return false; } static void __try_update_largest_extent(struct extent_tree *et, struct extent_node *en) { + if (et->type != EX_READ) + return; if (en->ei.len <= et->largest.len) return; @@ -62,28 +76,31 @@ static void __try_update_largest_extent(struct extent_tree *et, } static bool __is_extent_mergeable(struct extent_info *back, - struct extent_info *front) + struct extent_info *front, enum extent_type type) { + if (type == EX_READ) { #ifdef CONFIG_F2FS_FS_COMPRESSION - if (back->c_len && back->len != back->c_len) - return false; - if (front->c_len && front->len != front->c_len) - return false; + if (back->c_len && back->len != back->c_len) + return false; + if (front->c_len && front->len != front->c_len) + return false; #endif - return (back->fofs + back->len == front->fofs && - back->blk + back->len == front->blk); + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); + } + return false; } static bool __is_back_mergeable(struct extent_info *cur, - struct extent_info *back) + struct extent_info *back, enum extent_type type) { - return __is_extent_mergeable(back, cur); + return __is_extent_mergeable(back, cur, type); } static bool __is_front_mergeable(struct extent_info *cur, - struct extent_info *front) + struct extent_info *front, enum extent_type type) { - return __is_extent_mergeable(cur, front); + return __is_extent_mergeable(cur, front, type); } static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, @@ -308,6 +325,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, struct rb_node *parent, struct rb_node **p, bool leftmost) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en; en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); @@ -321,16 +339,18 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, rb_link_node(&en->rb_node, parent, p); rb_insert_color_cached(&en->rb_node, &et->root, leftmost); atomic_inc(&et->node_cnt); - atomic_inc(&sbi->total_ext_node); + atomic_inc(&eti->total_ext_node); return en; } static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; + rb_erase_cached(&en->rb_node, &et->root); atomic_dec(&et->node_cnt); - atomic_dec(&sbi->total_ext_node); + atomic_dec(&eti->total_ext_node); if (et->cached_en == en) et->cached_en = NULL; @@ -346,41 +366,46 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, static void __release_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { - spin_lock(&sbi->extent_lock); + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; + + spin_lock(&eti->extent_lock); f2fs_bug_on(sbi, list_empty(&en->list)); list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); } -static struct extent_tree *__grab_extent_tree(struct inode *inode) +static struct extent_tree *__grab_extent_tree(struct inode *inode, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et; nid_t ino = inode->i_ino; - mutex_lock(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, ino); + mutex_lock(&eti->extent_tree_lock); + et = radix_tree_lookup(&eti->extent_tree_root, ino); if (!et) { et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); - f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; + et->type = type; et->root = RB_ROOT_CACHED; et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); atomic_set(&et->node_cnt, 0); - atomic_inc(&sbi->total_ext_tree); + atomic_inc(&eti->total_ext_tree); } else { - atomic_dec(&sbi->total_zombie_tree); + atomic_dec(&eti->total_zombie_tree); list_del_init(&et->list); } - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); /* never died until evict_inode */ - F2FS_I(inode)->extent_tree = et; + F2FS_I(inode)->extent_tree[type] = et; return et; } @@ -414,35 +439,38 @@ static void __drop_largest_extent(struct extent_tree *et, } /* return true, if inode page is changed */ -static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!f2fs_may_extent_tree(inode)) { - /* drop largest extent */ - if (i_ext && i_ext->len) { + if (!__may_extent_tree(inode, type)) { + /* drop largest read extent */ + if (type == EX_READ && i_ext && i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); - return; } - return; + goto out; } - et = __grab_extent_tree(inode); + et = __grab_extent_tree(inode, type); if (!i_ext || !i_ext->len) - return; + goto out; + + BUG_ON(type != EX_READ); get_read_extent_info(&ei, i_ext); write_lock(&et->lock); if (atomic_read(&et->node_cnt)) - goto out; + goto unlock_out; en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); @@ -450,37 +478,40 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) et->largest = en->ei; et->cached_en = en; - spin_lock(&sbi->extent_lock); - list_add_tail(&en->list, &sbi->extent_list); - spin_unlock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); + list_add_tail(&en->list, &eti->extent_list); + spin_unlock(&eti->extent_lock); } -out: +unlock_out: write_unlock(&et->lock); +out: + if (type == EX_READ && !F2FS_I(inode)->extent_tree[EX_READ]) + set_inode_flag(inode, FI_NO_EXTENT); } void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) { - __f2fs_init_extent_tree(inode, ipage); - - if (!F2FS_I(inode)->extent_tree) - set_inode_flag(inode, FI_NO_EXTENT); + /* initialize read cache */ + __f2fs_init_extent_tree(inode, ipage, EX_READ); } -static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree_info *eti = &sbi->extent_tree[type]; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en; bool ret = false; f2fs_bug_on(sbi, !et); - trace_f2fs_lookup_extent_tree_start(inode, pgofs); + trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); read_lock(&et->lock); - if (et->largest.fofs <= pgofs && + if (type == EX_READ && + et->largest.fofs <= pgofs && et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; @@ -494,23 +525,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; if (en == et->cached_en) - stat_inc_cached_node_hit(sbi); + stat_inc_cached_node_hit(sbi, type); else - stat_inc_rbtree_node_hit(sbi); + stat_inc_rbtree_node_hit(sbi, type); *ei = en->ei; - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); ret = true; out: - stat_inc_total_hit(sbi); + stat_inc_total_hit(sbi, type); read_unlock(&et->lock); - trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei); + if (type == EX_READ) + trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); return ret; } @@ -519,18 +551,20 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_node *prev_ex, struct extent_node *next_ex) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en = NULL; - if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { + if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { prev_ex->ei.len += ei->len; ei = &prev_ex->ei; en = prev_ex; } - if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { + if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { next_ex->ei.fofs = ei->fofs; - next_ex->ei.blk = ei->blk; next_ex->ei.len += ei->len; + if (et->type == EX_READ) + next_ex->ei.blk = ei->blk; if (en) __release_extent_node(sbi, et, prev_ex); @@ -542,12 +576,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, __try_update_largest_extent(et, en); - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); return en; } @@ -557,6 +591,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent, bool leftmost) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -579,48 +614,51 @@ do_insert: __try_update_largest_extent(et, en); /* update in global extent list */ - spin_lock(&sbi->extent_lock); - list_add_tail(&en->list, &sbi->extent_list); + spin_lock(&eti->extent_lock); + list_add_tail(&en->list, &eti->extent_list); et->cached_en = en; - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); return en; } -static void f2fs_update_extent_tree_range(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int len) +static void __update_extent_tree_range(struct inode *inode, + struct extent_info *tei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en = NULL, *en1 = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei, dei, prev; struct rb_node **insert_p = NULL, *insert_parent = NULL; + unsigned int fofs = tei->fofs, len = tei->len; unsigned int end = fofs + len; - unsigned int pos = (unsigned int)fofs; bool updated = false; bool leftmost = false; if (!et) return; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0); - + if (type == EX_READ) + trace_f2fs_update_read_extent_tree_range(inode, fofs, len, + tei->blk, 0); write_lock(&et->lock); - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; + if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } + + prev = et->largest; + dei.len = 0; + + /* + * drop largest extent before lookup, in case it's already + * been shrunk from extent tree + */ + __drop_largest_extent(et, fofs, len); } - prev = et->largest; - dei.len = 0; - - /* - * drop largest extent before lookup, in case it's already - * been shrunk from extent tree - */ - __drop_largest_extent(et, fofs, len); - /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, (struct rb_entry *)et->cached_en, fofs, @@ -640,26 +678,30 @@ static void f2fs_update_extent_tree_range(struct inode *inode, dei = en->ei; org_end = dei.fofs + dei.len; - f2fs_bug_on(sbi, pos >= org_end); + f2fs_bug_on(sbi, fofs >= org_end); - if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { - en->ei.len = pos - en->ei.fofs; + if (fofs > dei.fofs && (type != EX_READ || + fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { + en->ei.len = fofs - en->ei.fofs; prev_en = en; parts = 1; } - if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) { + if (end < org_end && (type != EX_READ || + org_end - end >= F2FS_MIN_EXTENT_LEN)) { if (parts) { __set_extent_info(&ei, end, org_end - end, - end - dei.fofs + dei.blk, false); + end - dei.fofs + dei.blk, false, + type); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); next_en = en1; } else { __set_extent_info(&en->ei, end, en->ei.len - (end - dei.fofs), - en->ei.blk + (end - dei.fofs), true); + en->ei.blk + (end - dei.fofs), true, + type); next_en = en; } parts++; @@ -689,9 +731,11 @@ static void f2fs_update_extent_tree_range(struct inode *inode, en = next_en; } - /* 3. update extent in extent cache */ - if (blkaddr) { - __set_extent_info(&ei, fofs, len, blkaddr, false); + /* 3. update extent in read extent cache */ + BUG_ON(type != EX_READ); + + if (tei->blk) { + __set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -721,19 +765,20 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } #ifdef CONFIG_F2FS_FS_COMPRESSION -void f2fs_update_extent_tree_range_compressed(struct inode *inode, +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; struct extent_node *en = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei; struct rb_node **insert_p = NULL, *insert_parent = NULL; bool leftmost = false; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len); + trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, + blkaddr, c_len); /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ if (is_inode_flag_set(inode, FI_NO_EXTENT)) @@ -750,7 +795,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - __set_extent_info(&ei, fofs, llen, blkaddr, true); + __set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) @@ -761,24 +806,43 @@ unlock_out: } #endif -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { + struct extent_info ei; + + if (!__may_extent_tree(dn->inode, type)) + return; + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + dn->ofs_in_node; + ei.len = 1; + + if (type == EX_READ) { + if (dn->data_blkaddr == NEW_ADDR) + ei.blk = NULL_ADDR; + else + ei.blk = dn->data_blkaddr; + } + __update_extent_tree_range(dn->inode, &ei, type); +} + +static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, + enum extent_type type) +{ + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et, *next; struct extent_node *en; unsigned int node_cnt = 0, tree_cnt = 0; int remained; - if (!test_opt(sbi, READ_EXTENT_CACHE)) - return 0; - - if (!atomic_read(&sbi->total_zombie_tree)) + if (!atomic_read(&eti->total_zombie_tree)) goto free_node; - if (!mutex_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&eti->extent_tree_lock)) goto out; /* 1. remove unreferenced extent tree */ - list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { + list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); node_cnt += __free_extent_tree(sbi, et); @@ -786,61 +850,100 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) } f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); list_del_init(&et->list); - radix_tree_delete(&sbi->extent_tree_root, et->ino); + radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - atomic_dec(&sbi->total_zombie_tree); + atomic_dec(&eti->total_ext_tree); + atomic_dec(&eti->total_zombie_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) goto unlock_out; cond_resched(); } - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); free_node: /* 2. remove LRU extent entries */ - if (!mutex_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&eti->extent_tree_lock)) goto out; remained = nr_shrink - (node_cnt + tree_cnt); - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); for (; remained > 0; remained--) { - if (list_empty(&sbi->extent_list)) + if (list_empty(&eti->extent_list)) break; - en = list_first_entry(&sbi->extent_list, + en = list_first_entry(&eti->extent_list, struct extent_node, list); et = en->et; if (!write_trylock(&et->lock)) { /* refresh this extent node's position in extent list */ - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); continue; } list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); write_unlock(&et->lock); node_cnt++; - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); unlock_out: - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); out: - trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); return node_cnt + tree_cnt; } -unsigned int f2fs_destroy_extent_node(struct inode *inode) +/* read extent cache operations */ +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (!__may_extent_tree(inode, EX_READ)) + return false; + + return __lookup_extent_tree(inode, pgofs, ei, EX_READ); +} + +void f2fs_update_read_extent_cache(struct dnode_of_data *dn) +{ + return __update_extent_cache(dn, EX_READ); +} + +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, block_t blkaddr, unsigned int len) +{ + struct extent_info ei = { + .fofs = fofs, + .len = len, + .blk = blkaddr, + }; + + if (!__may_extent_tree(dn->inode, EX_READ)) + return; + + __update_extent_tree_range(dn->inode, &ei, EX_READ); +} + +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + if (!test_opt(sbi, READ_EXTENT_CACHE)) + return 0; + + return __shrink_extent_tree(sbi, nr_shrink, EX_READ); +} + +static unsigned int __destroy_extent_node(struct inode *inode, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et || !atomic_read(&et->node_cnt)) @@ -853,31 +956,44 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) return node_cnt; } -void f2fs_drop_extent_tree(struct inode *inode) +void f2fs_destroy_extent_node(struct inode *inode) +{ + __destroy_extent_node(inode, EX_READ); +} + +static void __drop_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; - if (!f2fs_may_extent_tree(inode)) + if (!__may_extent_tree(inode, type)) return; write_lock(&et->lock); - set_inode_flag(inode, FI_NO_EXTENT); __free_extent_tree(sbi, et); - if (et->largest.len) { - et->largest.len = 0; - updated = true; + if (type == EX_READ) { + set_inode_flag(inode, FI_NO_EXTENT); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } } write_unlock(&et->lock); if (updated) f2fs_mark_inode_dirty_sync(inode, true); } -void f2fs_destroy_extent_tree(struct inode *inode) +void f2fs_drop_extent_tree(struct inode *inode) +{ + __drop_extent_tree(inode, EX_READ); +} + +static void __destroy_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree_info *eti = &sbi->extent_tree[type]; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et) @@ -885,76 +1001,49 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (inode->i_nlink && !is_bad_inode(inode) && atomic_read(&et->node_cnt)) { - mutex_lock(&sbi->extent_tree_lock); - list_add_tail(&et->list, &sbi->zombie_list); - atomic_inc(&sbi->total_zombie_tree); - mutex_unlock(&sbi->extent_tree_lock); + mutex_lock(&eti->extent_tree_lock); + list_add_tail(&et->list, &eti->zombie_list); + atomic_inc(&eti->total_zombie_tree); + mutex_unlock(&eti->extent_tree_lock); return; } /* free all extent info belong to this extent tree */ - node_cnt = f2fs_destroy_extent_node(inode); + node_cnt = __destroy_extent_node(inode, type); /* delete extent tree entry in radix tree */ - mutex_lock(&sbi->extent_tree_lock); + mutex_lock(&eti->extent_tree_lock); f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); - radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); + radix_tree_delete(&eti->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - mutex_unlock(&sbi->extent_tree_lock); + atomic_dec(&eti->total_ext_tree); + mutex_unlock(&eti->extent_tree_lock); - F2FS_I(inode)->extent_tree = NULL; + F2FS_I(inode)->extent_tree[type] = NULL; - trace_f2fs_destroy_extent_tree(inode, node_cnt); + trace_f2fs_destroy_extent_tree(inode, node_cnt, type); } -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +void f2fs_destroy_extent_tree(struct inode *inode) { - if (!f2fs_may_extent_tree(inode)) - return false; - - return f2fs_lookup_extent_tree(inode, pgofs, ei); + __destroy_extent_tree(inode, EX_READ); } -void f2fs_update_extent_cache(struct dnode_of_data *dn) +static void __init_extent_tree_info(struct extent_tree_info *eti) { - pgoff_t fofs; - block_t blkaddr; - - if (!f2fs_may_extent_tree(dn->inode)) - return; - - if (dn->data_blkaddr == NEW_ADDR) - blkaddr = NULL_ADDR; - else - blkaddr = dn->data_blkaddr; - - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); -} - -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t fofs, block_t blkaddr, unsigned int len) - -{ - if (!f2fs_may_extent_tree(dn->inode)) - return; - - f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); + INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); + mutex_init(&eti->extent_tree_lock); + INIT_LIST_HEAD(&eti->extent_list); + spin_lock_init(&eti->extent_lock); + atomic_set(&eti->total_ext_tree, 0); + INIT_LIST_HEAD(&eti->zombie_list); + atomic_set(&eti->total_zombie_tree, 0); + atomic_set(&eti->total_ext_node, 0); } void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { - INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); - mutex_init(&sbi->extent_tree_lock); - INIT_LIST_HEAD(&sbi->extent_list); - spin_lock_init(&sbi->extent_lock); - atomic_set(&sbi->total_ext_tree, 0); - INIT_LIST_HEAD(&sbi->zombie_list); - atomic_set(&sbi->total_zombie_tree, 0); - atomic_set(&sbi->total_ext_node, 0); + __init_extent_tree_info(&sbi->extent_tree[EX_READ]); } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3ecb53f2f67f..fff32fc07c2b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -570,6 +570,12 @@ enum { /* number of extent info in extent cache we try to shrink */ #define READ_EXTENT_CACHE_SHRINK_NUMBER 128 +/* extent cache type */ +enum extent_type { + EX_READ, + NR_EXTENT_CACHES, +}; + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -584,10 +590,17 @@ struct rb_entry { struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ - block_t blk; /* start block address of the extent */ + union { + /* read extent_cache */ + struct { + /* start block address of the extent */ + block_t blk; #ifdef CONFIG_F2FS_FS_COMPRESSION - unsigned int c_len; /* physical extent length of compressed blocks */ + /* physical extent length of compressed blocks */ + unsigned int c_len; #endif + }; + }; }; struct extent_node { @@ -599,13 +612,25 @@ struct extent_node { struct extent_tree { nid_t ino; /* inode number */ + enum extent_type type; /* keep the extent tree type */ struct rb_root_cached root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ - struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ bool largest_updated; /* largest extent updated */ + struct extent_info largest; /* largest cached extent for EX_READ */ +}; + +struct extent_tree_info { + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct mutex extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + atomic_t total_ext_tree; /* extent tree count */ + struct list_head zombie_list; /* extent zombie tree list */ + atomic_t total_zombie_tree; /* extent zombie tree count */ + atomic_t total_ext_node; /* extent info count */ }; /* @@ -764,7 +789,8 @@ struct f2fs_inode_info { struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ - struct extent_tree *extent_tree; /* cached extent_tree entry */ + struct extent_tree *extent_tree[NR_EXTENT_CACHES]; + /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -1562,14 +1588,7 @@ struct f2fs_sb_info { struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ - struct radix_tree_root extent_tree_root;/* cache extent cache entries */ - struct mutex extent_tree_lock; /* locking extent radix tree */ - struct list_head extent_list; /* lru list for shrinker */ - spinlock_t extent_lock; /* locking extent lru list */ - atomic_t total_ext_tree; /* extent tree count */ - struct list_head zombie_list; /* extent zombie tree list */ - atomic_t total_zombie_tree; /* extent zombie tree count */ - atomic_t total_ext_node; /* extent info count */ + struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -1650,10 +1669,14 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ - atomic64_t total_hit_ext; /* # of lookup extent cache */ - atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */ - atomic64_t read_hit_largest; /* # of hit largest extent node */ - atomic64_t read_hit_cached; /* # of hit cached extent node */ + /* # of lookup extent cache */ + atomic64_t total_hit_ext[NR_EXTENT_CACHES]; + /* # of hit rbtree extent node */ + atomic64_t read_hit_rbtree[NR_EXTENT_CACHES]; + /* # of hit cached extent node */ + atomic64_t read_hit_cached[NR_EXTENT_CACHES]; + /* # of hit largest extent node in read extent cache */ + atomic64_t read_hit_largest; atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ @@ -3736,9 +3759,17 @@ struct f2fs_stat_info { struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - unsigned long long hit_largest, hit_cached, hit_rbtree; - unsigned long long hit_total, total_ext; - int ext_tree, zombie_tree, ext_node; + unsigned long long hit_cached[NR_EXTENT_CACHES]; + unsigned long long hit_rbtree[NR_EXTENT_CACHES]; + unsigned long long total_ext[NR_EXTENT_CACHES]; + unsigned long long hit_total[NR_EXTENT_CACHES]; + int ext_tree[NR_EXTENT_CACHES]; + int zombie_tree[NR_EXTENT_CACHES]; + int ext_node[NR_EXTENT_CACHES]; + /* to count memory footprint */ + unsigned long long ext_mem[NR_EXTENT_CACHES]; + /* for read extent cache */ + unsigned long long hit_largest; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; int inmem_pages; @@ -3799,10 +3830,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) -#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) -#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) +#define stat_inc_total_hit(sbi, type) (atomic64_inc(&(sbi)->total_hit_ext[type])) +#define stat_inc_rbtree_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_rbtree[type])) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) -#define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached)) +#define stat_inc_cached_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_cached[type])) #define stat_inc_inline_xattr(inode) \ do { \ if (f2fs_has_inline_xattr(inode)) \ @@ -3928,10 +3959,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sbi) do { } while (0) -#define stat_inc_rbtree_node_hit(sbi) do { } while (0) +#define stat_inc_total_hit(sbi, type) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi, type) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) -#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi, type) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) #define stat_dec_inline_xattr(inode) do { } while (0) #define stat_inc_inline_inode(inode) do { } while (0) @@ -4036,20 +4067,23 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root, bool check_key); -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); void f2fs_drop_extent_tree(struct inode *inode); -unsigned int f2fs_destroy_extent_node(struct inode *inode); +void f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei); -void f2fs_update_extent_cache(struct dnode_of_data *dn); -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t fofs, block_t blkaddr, unsigned int len); void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi); int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); +/* read extent cache ops */ +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_read_extent_cache(struct dnode_of_data *dn); +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, block_t blkaddr, unsigned int len); +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + /* * sysfs.c */ @@ -4113,9 +4147,9 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); -void f2fs_update_extent_tree_range_compressed(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int llen, - unsigned int c_len); +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); @@ -4193,9 +4227,10 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) -static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int llen, - unsigned int c_len) { } +static inline void f2fs_update_read_extent_tree_range_compressed( + struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len) { } #endif static inline int set_compress_context(struct inode *inode) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 36d8f0376f76..18da58068820 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -607,7 +607,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) */ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; - f2fs_update_extent_cache_range(dn, fofs, 0, len); + f2fs_update_read_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); } dn->ofs_in_node = ofs; @@ -1430,7 +1430,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, f2fs_set_data_blkaddr(dn); } - f2fs_update_extent_cache_range(dn, start, 0, index - start); + f2fs_update_read_extent_cache_range(dn, start, 0, index - start); return ret; } @@ -2590,7 +2590,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map = { .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; @@ -2622,7 +2622,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * lookup mapping info in extent cache, skip defragmenting if physical * block addresses are continuous. */ - if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { if (ei.fofs + ei.len >= pg_end) goto out; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 30949eac81c3..fe9ea12e0a5f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1054,7 +1054,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, @@ -1072,7 +1072,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (!page) return -ENOMEM; - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0b54b678137f..cedad8613f89 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -260,8 +260,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (F2FS_I(inode)->extent_tree) { - struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest; + if (fi->extent_tree[EX_READ]) { + struct extent_info *ei = &fi->extent_tree[EX_READ]->largest; if (ei->len && (!f2fs_is_valid_blkaddr(sbi, ei->blk, @@ -571,7 +571,7 @@ retry: void f2fs_update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a55b3676233c..4e5cbd856df7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -84,9 +84,11 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == READ_EXTENT_CACHE) { - mem_size = (atomic_read(&sbi->total_ext_tree) * + struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + + mem_size = (atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree) + - atomic_read(&sbi->total_ext_node) * + atomic_read(&eti->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == INMEM_PAGES) { @@ -860,7 +862,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + 1); - f2fs_update_extent_tree_range_compressed(dn->inode, + f2fs_update_read_extent_tree_range_compressed(dn->inode, index, blkaddr, F2FS_I(dn->inode)->i_cluster_size, c_len); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ef599fbab740..512781ad3e61 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -537,7 +537,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) /* try to shrink extent cache when there is no enough memory */ if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE)) - f2fs_shrink_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); + f2fs_shrink_read_extent_tree(sbi, + READ_EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index dd3c3c7a90ec..33c490e69ae3 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -28,10 +28,13 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) return count > 0 ? count : 0; } -static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) +static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi, + enum extent_type type) { - return atomic_read(&sbi->total_zombie_tree) + - atomic_read(&sbi->total_ext_node); + struct extent_tree_info *eti = &sbi->extent_tree[type]; + + return atomic_read(&eti->total_zombie_tree) + + atomic_read(&eti->total_ext_node); } unsigned long f2fs_shrink_count(struct shrinker *shrink, @@ -53,8 +56,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, } spin_unlock(&f2fs_list_lock); - /* count extent cache entries */ - count += __count_extent_cache(sbi); + /* count read extent cache entries */ + count += __count_extent_cache(sbi, EX_READ); /* count clean nat cache entries */ count += __count_nat_entries(sbi); @@ -99,8 +102,8 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, sbi->shrinker_run_no = run_no; - /* shrink extent cache entries */ - freed += f2fs_shrink_extent_tree(sbi, nr >> 1); + /* shrink read extent cache entries */ + freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1); /* shrink clean nat cache entries */ if (freed < nr) @@ -130,7 +133,7 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi) void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) { - f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); + f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); spin_lock(&f2fs_list_lock); list_del_init(&sbi->s_list); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 02c1ccc4f925..81cb234cfaf6 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -52,6 +52,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); TRACE_DEFINE_ENUM(CP_TRIMMED); TRACE_DEFINE_ENUM(CP_PAUSE); TRACE_DEFINE_ENUM(CP_RESIZE); +TRACE_DEFINE_ENUM(EX_READ); #define show_block_type(type) \ __print_symbolic(type, \ @@ -1526,28 +1527,31 @@ TRACE_EVENT(f2fs_issue_flush, TRACE_EVENT(f2fs_lookup_extent_tree_start, - TP_PROTO(struct inode *inode, unsigned int pgofs), + TP_PROTO(struct inode *inode, unsigned int pgofs, enum extent_type type), - TP_ARGS(inode, pgofs), + TP_ARGS(inode, pgofs, type), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(unsigned int, pgofs) + __field(enum extent_type, type) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pgofs = pgofs; + __entry->type = type; ), - TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u", + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s", show_dev_ino(__entry), - __entry->pgofs) + __entry->pgofs, + __entry->type == EX_READ ? "Read" : "N/A") ); -TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, +TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, TP_PROTO(struct inode *inode, unsigned int pgofs, struct extent_info *ei), @@ -1561,8 +1565,8 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, __field(ino_t, ino) __field(unsigned int, pgofs) __field(unsigned int, fofs) - __field(u32, blk) __field(unsigned int, len) + __field(u32, blk) ), TP_fast_assign( @@ -1570,26 +1574,26 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, __entry->ino = inode->i_ino; __entry->pgofs = pgofs; __entry->fofs = ei->fofs; - __entry->blk = ei->blk; __entry->len = ei->len; + __entry->blk = ei->blk; ), TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " - "ext_info(fofs: %u, blk: %u, len: %u)", + "read_ext_info(fofs: %u, len: %u, blk: %u)", show_dev_ino(__entry), __entry->pgofs, __entry->fofs, - __entry->blk, - __entry->len) + __entry->len, + __entry->blk) ); -TRACE_EVENT(f2fs_update_extent_tree_range, +TRACE_EVENT(f2fs_update_read_extent_tree_range, - TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr, - unsigned int len, + TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, + block_t blkaddr, unsigned int c_len), - TP_ARGS(inode, pgofs, blkaddr, len, c_len), + TP_ARGS(inode, pgofs, len, blkaddr, c_len), TP_STRUCT__entry( __field(dev_t, dev) @@ -1604,67 +1608,73 @@ TRACE_EVENT(f2fs_update_extent_tree_range, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->pgofs = pgofs; - __entry->blk = blkaddr; __entry->len = len; + __entry->blk = blkaddr; __entry->c_len = c_len; ), TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " - "blkaddr = %u, len = %u, " - "c_len = %u", + "len = %u, blkaddr = %u, c_len = %u", show_dev_ino(__entry), __entry->pgofs, - __entry->blk, __entry->len, + __entry->blk, __entry->c_len) ); TRACE_EVENT(f2fs_shrink_extent_tree, TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, - unsigned int tree_cnt), + unsigned int tree_cnt, enum extent_type type), - TP_ARGS(sbi, node_cnt, tree_cnt), + TP_ARGS(sbi, node_cnt, tree_cnt, type), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, node_cnt) __field(unsigned int, tree_cnt) + __field(enum extent_type, type) ), TP_fast_assign( __entry->dev = sbi->sb->s_dev; __entry->node_cnt = node_cnt; __entry->tree_cnt = tree_cnt; + __entry->type = type; ), - TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u", + TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u, type = %s", show_dev(__entry->dev), __entry->node_cnt, - __entry->tree_cnt) + __entry->tree_cnt, + __entry->type == EX_READ ? "Read" : "N/A") ); TRACE_EVENT(f2fs_destroy_extent_tree, - TP_PROTO(struct inode *inode, unsigned int node_cnt), + TP_PROTO(struct inode *inode, unsigned int node_cnt, + enum extent_type type), - TP_ARGS(inode, node_cnt), + TP_ARGS(inode, node_cnt, type), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(unsigned int, node_cnt) + __field(enum extent_type, type) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->node_cnt = node_cnt; + __entry->type = type; ), - TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u", + TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s", show_dev_ino(__entry), - __entry->node_cnt) + __entry->node_cnt, + __entry->type == EX_READ ? "Read" : "N/A") ); DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, From d6ba4dceab763d99617760d8f0b1cd05451d675c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 2 Dec 2022 13:51:09 -0800 Subject: [PATCH 160/186] BACKPORT: f2fs: allocate the extent_cache by default Let's allocate it to remove the runtime complexity. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit 693658e0c0eca25087c1ffb318d85f8d392d6d27) Change-Id: Ib46b6edd4f4a6232f3451498ee5c2f246dd37682 --- fs/f2fs/extent_cache.c | 38 +++++++++++++++++++------------------- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/inode.c | 6 ++++-- fs/f2fs/namei.c | 4 ++-- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e2ef1b99511b..73616ba7ea73 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -47,20 +47,23 @@ static bool __may_read_extent_tree(struct inode *inode) return S_ISREG(inode->i_mode); } +static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) +{ + if (type == EX_READ) + return __may_read_extent_tree(inode); + return false; +} + static bool __may_extent_tree(struct inode *inode, enum extent_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - /* * for recovered files during mount do not create extents * if shrinker is not registered. */ - if (list_empty(&sbi->s_list)) + if (list_empty(&F2FS_I_SB(inode)->s_list)) return false; - if (type == EX_READ) - return __may_read_extent_tree(inode); - return false; + return __init_may_extent_tree(inode, type); } static void __try_update_largest_extent(struct extent_tree *et, @@ -438,20 +441,18 @@ static void __drop_largest_extent(struct extent_tree *et, } } -/* return true, if inode page is changed */ -static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, - enum extent_type type) +void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree_info *eti = &sbi->extent_tree[type]; - struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; + struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!__may_extent_tree(inode, type)) { + if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ - if (type == EX_READ && i_ext && i_ext->len) { + if (i_ext && i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); @@ -459,13 +460,11 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, goto out; } - et = __grab_extent_tree(inode, type); + et = __grab_extent_tree(inode, EX_READ); if (!i_ext || !i_ext->len) goto out; - BUG_ON(type != EX_READ); - get_read_extent_info(&ei, i_ext); write_lock(&et->lock); @@ -485,14 +484,15 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage, unlock_out: write_unlock(&et->lock); out: - if (type == EX_READ && !F2FS_I(inode)->extent_tree[EX_READ]) + if (!F2FS_I(inode)->extent_tree[EX_READ]) set_inode_flag(inode, FI_NO_EXTENT); } -void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) +void f2fs_init_extent_tree(struct inode *inode) { /* initialize read cache */ - __f2fs_init_extent_tree(inode, ipage, EX_READ); + if (__init_may_extent_tree(inode, EX_READ)) + __grab_extent_tree(inode, EX_READ); } static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fff32fc07c2b..3dc413423be1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4067,7 +4067,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root, bool check_key); -void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); +void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); @@ -4076,6 +4076,7 @@ int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); /* read extent cache ops */ +void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage); bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei); void f2fs_update_read_extent_cache(struct dnode_of_data *dn); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cedad8613f89..935bcb160602 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -380,8 +380,6 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - f2fs_init_extent_tree(inode, node_page); - get_inline_info(inode, ri); fi->i_extra_isize = f2fs_has_extra_attr(inode) ? @@ -469,6 +467,10 @@ static int do_read_inode(struct inode *inode) F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; + + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_page); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c0ca487c6a16..14b2eb8a65e0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -105,8 +105,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } F2FS_I(inode)->i_inline_xattr_size = xattr_size; - f2fs_init_extent_tree(inode, NULL); - F2FS_I(inode)->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); @@ -133,6 +131,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) f2fs_set_inode_flags(inode); + f2fs_init_extent_tree(inode); + trace_f2fs_new_inode(inode, 0); return inode; From aa064914fdc05cae8b7aebb76ed799a53d9bcac1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 1 Dec 2022 17:37:15 -0800 Subject: [PATCH 161/186] BACKPORT: f2fs: add block_age-based extent cache This patch introduces a runtime hot/cold data separation method for f2fs, in order to improve the accuracy for data temperature classification, reduce the garbage collection overhead after long-term data updates. Enhanced hot/cold data separation can record data block update frequency as "age" of the extent per inode, and take use of the age info to indicate better temperature type for data block allocation: - It records total data blocks allocated since mount; - When file extent has been updated, it calculate the count of data blocks allocated since last update as the age of the extent; - Before the data block allocated, it searches for the age info and chooses the suitable segment for allocation. Test and result: - Prepare: create about 30000 files * 3% for cold files (with cold file extension like .apk, from 3M to 10M) * 50% for warm files (with random file extension like .FcDxq, from 1K to 4M) * 47% for hot files (with hot file extension like .db, from 1K to 256K) - create(5%)/random update(90%)/delete(5%) the files * total write amount is about 70G * fsync will be called for .db files, and buffered write will be used for other files The storage of test device is large enough(128G) so that it will not switch to SSR mode during the test. Benefit: dirty segment count increment reduce about 14% - before: Dirty +21110 - after: Dirty +18286 Bug: 264453689 Signed-off-by: qixiaoyu1 Signed-off-by: xiongping1 Signed-off-by: Jaegeuk Kim (cherry picked from commit 729055d7f1e665c57c1c90b093501cb3eb47a876) Change-Id: I8a62846bd3d44f7243300fa9653dbb623b46a96c --- Documentation/ABI/testing/sysfs-fs-f2fs | 14 ++ Documentation/filesystems/f2fs.rst | 4 + fs/f2fs/debug.c | 21 +++ fs/f2fs/extent_cache.c | 183 +++++++++++++++++++++++- fs/f2fs/f2fs.h | 38 +++++ fs/f2fs/file.c | 1 + fs/f2fs/inode.c | 1 + fs/f2fs/node.c | 10 +- fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 33 +++++ fs/f2fs/shrinker.c | 10 +- fs/f2fs/super.c | 14 ++ fs/f2fs/sysfs.c | 24 ++++ include/trace/events/f2fs.h | 86 ++++++++++- 14 files changed, 430 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 163d38c0ba64..9c7ddb4d331f 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -514,3 +514,17 @@ Date: July 2021 Contact: "Daeho Jeong" Description: You can control for which gc mode the "gc_reclaimed_segments" node shows. Refer to the description of the modes in "gc_reclaimed_segments". + +What: /sys/fs/f2fs//hot_data_age_threshold +Date: November 2022 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the age threshold to indicate + the data blocks as hot. By default it was initialized as 262144 blocks + (equals to 1GB). + +What: /sys/fs/f2fs//warm_data_age_threshold +Date: November 2022 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the age threshold to indicate + the data blocks as warm. By default it was initialized as 2621440 blocks + (equals to 10GB). diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index b91e5a8444d5..f122a2b7c7d1 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -300,6 +300,10 @@ inlinecrypt When possible, encrypt/decrypt the contents of encrypted Documentation/block/inline-encryption.rst. atgc Enable age-threshold garbage collection, it provides high effectiveness and efficiency on background GC. +age_extent_cache Enable an age extent cache based on rb-tree. It records + data block update frequency of the extent per inode, in + order to provide better temperature hints for data block + allocation. ======================== ============================================================ Debugfs Entries diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 75564077f2e9..f33e64acaf2b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -88,6 +88,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_total[EX_READ] += si->hit_largest; + /* block age extent_cache only */ + si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks); + /* validation check of the segment numbers */ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); @@ -497,6 +500,22 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree[EX_READ], si->zombie_tree[EX_READ], si->ext_node[EX_READ]); + seq_puts(s, "\nExtent Cache (Block Age):\n"); + seq_printf(s, " - Allocated Data Blocks: %llu\n", + si->allocated_data_blocks); + seq_printf(s, " - Hit Count: L1:%llu L2:%llu\n", + si->hit_cached[EX_BLOCK_AGE], + si->hit_rbtree[EX_BLOCK_AGE]); + seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", + !si->total_ext[EX_BLOCK_AGE] ? 0 : + div64_u64(si->hit_total[EX_BLOCK_AGE] * 100, + si->total_ext[EX_BLOCK_AGE]), + si->hit_total[EX_BLOCK_AGE], + si->total_ext[EX_BLOCK_AGE]); + seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", + si->ext_tree[EX_BLOCK_AGE], + si->zombie_tree[EX_BLOCK_AGE], + si->ext_node[EX_BLOCK_AGE]); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - DIO (R: %4d, W: %4d)\n", si->nr_dio_read, si->nr_dio_write); @@ -566,6 +585,8 @@ static int stat_show(struct seq_file *s, void *v) si->cache_mem >> 10); seq_printf(s, " - read extent cache: %llu KB\n", si->ext_mem[EX_READ] >> 10); + seq_printf(s, " - block age extent cache: %llu KB\n", + si->ext_mem[EX_BLOCK_AGE] >> 10); seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 73616ba7ea73..9a6db0b1b0eb 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -6,6 +6,10 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim * Chao Yu + * + * block_age-based extent cache added by: + * Copyright (c) 2022 xiaomi Co., Ltd. + * http://www.xiaomi.com/ */ #include @@ -18,6 +22,7 @@ static void __set_extent_info(struct extent_info *ei, unsigned int fofs, unsigned int len, block_t blk, bool keep_clen, + unsigned long age, unsigned long last_blocks, enum extent_type type) { ei->fofs = fofs; @@ -30,6 +35,9 @@ static void __set_extent_info(struct extent_info *ei, #ifdef CONFIG_F2FS_FS_COMPRESSION ei->c_len = 0; #endif + } else if (type == EX_BLOCK_AGE) { + ei->age = age; + ei->last_blocks = last_blocks; } } @@ -47,10 +55,27 @@ static bool __may_read_extent_tree(struct inode *inode) return S_ISREG(inode->i_mode); } +static bool __may_age_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, AGE_EXTENT_CACHE)) + return false; + /* don't cache block age info for cold file */ + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return false; + if (file_is_cold(inode)) + return false; + + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); +} + static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) { if (type == EX_READ) return __may_read_extent_tree(inode); + else if (type == EX_BLOCK_AGE) + return __may_age_extent_tree(inode); return false; } @@ -90,6 +115,11 @@ static bool __is_extent_mergeable(struct extent_info *back, #endif return (back->fofs + back->len == front->fofs && back->blk + back->len == front->blk); + } else if (type == EX_BLOCK_AGE) { + return (back->fofs + back->len == front->fofs && + abs(back->age - front->age) <= SAME_AGE_REGION && + abs(back->last_blocks - front->last_blocks) <= + SAME_AGE_REGION); } return false; } @@ -488,11 +518,22 @@ out: set_inode_flag(inode, FI_NO_EXTENT); } +void f2fs_init_age_extent_tree(struct inode *inode) +{ + if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) + return; + __grab_extent_tree(inode, EX_BLOCK_AGE); +} + void f2fs_init_extent_tree(struct inode *inode) { /* initialize read cache */ if (__init_may_extent_tree(inode, EX_READ)) __grab_extent_tree(inode, EX_READ); + + /* initialize block age cache */ + if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) + __grab_extent_tree(inode, EX_BLOCK_AGE); } static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, @@ -543,6 +584,8 @@ out: if (type == EX_READ) trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); + else if (type == EX_BLOCK_AGE) + trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); return ret; } @@ -641,6 +684,10 @@ static void __update_extent_tree_range(struct inode *inode, if (type == EX_READ) trace_f2fs_update_read_extent_tree_range(inode, fofs, len, tei->blk, 0); + else if (type == EX_BLOCK_AGE) + trace_f2fs_update_age_extent_tree_range(inode, fofs, len, + tei->age, tei->last_blocks); + write_lock(&et->lock); if (type == EX_READ) { @@ -693,6 +740,7 @@ static void __update_extent_tree_range(struct inode *inode, __set_extent_info(&ei, end, org_end - end, end - dei.fofs + dei.blk, false, + dei.age, dei.last_blocks, type); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); @@ -701,6 +749,7 @@ static void __update_extent_tree_range(struct inode *inode, __set_extent_info(&en->ei, end, en->ei.len - (end - dei.fofs), en->ei.blk + (end - dei.fofs), true, + dei.age, dei.last_blocks, type); next_en = en; } @@ -731,11 +780,15 @@ static void __update_extent_tree_range(struct inode *inode, en = next_en; } + if (type == EX_BLOCK_AGE) + goto update_age_extent_cache; + /* 3. update extent in read extent cache */ BUG_ON(type != EX_READ); if (tei->blk) { - __set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ); + __set_extent_info(&ei, fofs, len, tei->blk, false, + 0, 0, EX_READ); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -757,7 +810,17 @@ static void __update_extent_tree_range(struct inode *inode, et->largest_updated = false; updated = true; } + goto out_read_extent_cache; +update_age_extent_cache: + if (!tei->last_blocks) + goto out_read_extent_cache; + __set_extent_info(&ei, fofs, len, 0, false, + tei->age, tei->last_blocks, EX_BLOCK_AGE); + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, + insert_p, insert_parent, leftmost); +out_read_extent_cache: write_unlock(&et->lock); if (updated) @@ -795,7 +858,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - __set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ); + __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) @@ -806,6 +869,72 @@ unlock_out: } #endif +static unsigned long long __calculate_block_age(unsigned long long new, + unsigned long long old) +{ + unsigned long long diff; + + diff = (new >= old) ? new - (new - old) : new + (old - new); + + return div_u64(diff * LAST_AGE_WEIGHT, 100); +} + +/* This returns a new age and allocated blocks in ei */ +static int __get_new_block_age(struct inode *inode, struct extent_info *ei) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + loff_t f_size = i_size_read(inode); + unsigned long long cur_blocks = + atomic64_read(&sbi->allocated_data_blocks); + + /* + * When I/O is not aligned to a PAGE_SIZE, update will happen to the last + * file block even in seq write. So don't record age for newly last file + * block here. + */ + if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && + ei->blk == NEW_ADDR) + return -EINVAL; + + if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { + unsigned long long cur_age; + + if (cur_blocks >= ei->last_blocks) + cur_age = cur_blocks - ei->last_blocks; + else + /* allocated_data_blocks overflow */ + cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; + + if (ei->age) + ei->age = __calculate_block_age(cur_age, ei->age); + else + ei->age = cur_age; + ei->last_blocks = cur_blocks; + WARN_ON(ei->age > cur_blocks); + return 0; + } + + f2fs_bug_on(sbi, ei->blk == NULL_ADDR); + + /* the data block was allocated for the first time */ + if (ei->blk == NEW_ADDR) + goto out; + + if (__is_valid_data_blkaddr(ei->blk) && + !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { + f2fs_bug_on(sbi, 1); + return -EINVAL; + } +out: + /* + * init block age with zero, this can happen when the block age extent + * was reclaimed due to memory constraint or system reboot + */ + ei->age = 0; + ei->last_blocks = cur_blocks; + return 0; +} + static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { struct extent_info ei; @@ -822,6 +951,10 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ ei.blk = NULL_ADDR; else ei.blk = dn->data_blkaddr; + } else if (type == EX_BLOCK_AGE) { + ei.blk = dn->data_blkaddr; + if (__get_new_block_age(dn->inode, &ei)) + return; } __update_extent_tree_range(dn->inode, &ei, type); } @@ -939,6 +1072,43 @@ unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrin return __shrink_extent_tree(sbi, nr_shrink, EX_READ); } +/* block age extent cache operations */ +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (!__may_extent_tree(inode, EX_BLOCK_AGE)) + return false; + + return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); +} + +void f2fs_update_age_extent_cache(struct dnode_of_data *dn) +{ + return __update_extent_cache(dn, EX_BLOCK_AGE); +} + +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len) +{ + struct extent_info ei = { + .fofs = fofs, + .len = len, + }; + + if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) + return; + + __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); +} + +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + if (!test_opt(sbi, AGE_EXTENT_CACHE)) + return 0; + + return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); +} + static unsigned int __destroy_extent_node(struct inode *inode, enum extent_type type) { @@ -959,6 +1129,7 @@ static unsigned int __destroy_extent_node(struct inode *inode, void f2fs_destroy_extent_node(struct inode *inode) { __destroy_extent_node(inode, EX_READ); + __destroy_extent_node(inode, EX_BLOCK_AGE); } static void __drop_extent_tree(struct inode *inode, enum extent_type type) @@ -987,6 +1158,7 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) void f2fs_drop_extent_tree(struct inode *inode) { __drop_extent_tree(inode, EX_READ); + __drop_extent_tree(inode, EX_BLOCK_AGE); } static void __destroy_extent_tree(struct inode *inode, enum extent_type type) @@ -1027,6 +1199,7 @@ static void __destroy_extent_tree(struct inode *inode, enum extent_type type) void f2fs_destroy_extent_tree(struct inode *inode) { __destroy_extent_tree(inode, EX_READ); + __destroy_extent_tree(inode, EX_BLOCK_AGE); } static void __init_extent_tree_info(struct extent_tree_info *eti) @@ -1044,6 +1217,12 @@ static void __init_extent_tree_info(struct extent_tree_info *eti) void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { __init_extent_tree_info(&sbi->extent_tree[EX_READ]); + __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); + + /* initialize for block age extents */ + atomic64_set(&sbi->allocated_data_blocks, 0); + sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; + sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3dc413423be1..c2075b87ec23 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,6 +99,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 #define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_MOUNT_COMPRESS_CACHE 0x40000000 +#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -570,9 +571,22 @@ enum { /* number of extent info in extent cache we try to shrink */ #define READ_EXTENT_CACHE_SHRINK_NUMBER 128 +/* number of age extent info in extent cache we try to shrink */ +#define AGE_EXTENT_CACHE_SHRINK_NUMBER 128 +#define LAST_AGE_WEIGHT 30 +#define SAME_AGE_REGION 1024 + +/* + * Define data block with age less than 1GB as hot data + * define data block with age less than 10GB but more than 1GB as warm data + */ +#define DEF_HOT_DATA_AGE_THRESHOLD 262144 +#define DEF_WARM_DATA_AGE_THRESHOLD 2621440 + /* extent cache type */ enum extent_type { EX_READ, + EX_BLOCK_AGE, NR_EXTENT_CACHES, }; @@ -600,6 +614,13 @@ struct extent_info { unsigned int c_len; #endif }; + /* block age extent_cache */ + struct { + /* block age of the extent */ + unsigned long long age; + /* last total blocks allocated */ + unsigned long long last_blocks; + }; }; }; @@ -1589,6 +1610,11 @@ struct f2fs_sb_info { /* for extent tree cache */ struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; + atomic64_t allocated_data_blocks; /* for block age extent_cache */ + + /* The threshold used for hot and warm data seperation*/ + unsigned int hot_data_age_threshold; + unsigned int warm_data_age_threshold; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -3770,6 +3796,8 @@ struct f2fs_stat_info { unsigned long long ext_mem[NR_EXTENT_CACHES]; /* for read extent cache */ unsigned long long hit_largest; + /* for block age extent cache */ + unsigned long long allocated_data_blocks; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; int inmem_pages; @@ -4085,6 +4113,16 @@ void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); +/* block age extent cache ops */ +void f2fs_init_age_extent_tree(struct inode *inode); +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_age_extent_cache(struct dnode_of_data *dn); +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len); +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + /* * sysfs.c */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 18da58068820..d6ded768ac09 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -608,6 +608,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); + f2fs_update_age_extent_cache_range(dn, fofs, nr_free); dec_valid_block_count(sbi, dn->inode, nr_free); } dn->ofs_in_node = ofs; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 935bcb160602..5bf4f1cccd71 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -470,6 +470,7 @@ static int do_read_inode(struct inode *inode) /* Need all the flag bits */ f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_age_extent_tree(inode); f2fs_put_page(node_page, 1); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4e5cbd856df7..230c63c99b55 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -58,7 +58,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) avail_ram = val.totalram - val.totalhigh; /* - * give 25%, 25%, 50%, 50%, 50% memory for each components respectively + * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively */ if (type == FREE_NIDS) { mem_size = (nm_i->nid_cnt[FREE_NID] * @@ -83,14 +83,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); - } else if (type == READ_EXTENT_CACHE) { - struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) { + enum extent_type etype = type == READ_EXTENT_CACHE ? + EX_READ : EX_BLOCK_AGE; + struct extent_tree_info *eti = &sbi->extent_tree[etype]; mem_size = (atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree) + atomic_read(&eti->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; - res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == INMEM_PAGES) { /* it allows 20% / total_ram for inmemory pages */ mem_size = get_pages(sbi, F2FS_INMEM_PAGES); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2c152e677a06..3de7891c8e79 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -149,6 +149,7 @@ enum mem_type { DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ READ_EXTENT_CACHE, /* indicates read extent cache */ + AGE_EXTENT_CACHE, /* indicates age extent cache */ INMEM_PAGES, /* indicates inmemory pages */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 512781ad3e61..63245ab3a27c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -540,6 +540,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) f2fs_shrink_read_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER); + /* try to shrink age extent cache when there is no enough memory */ + if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE)) + f2fs_shrink_age_extent_tree(sbi, + AGE_EXTENT_CACHE_SHRINK_NUMBER); + /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); @@ -3293,10 +3298,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) } } +static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_info ei; + + if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { + if (!ei.age) + return NO_CHECK_TYPE; + if (ei.age <= sbi->hot_data_age_threshold) + return CURSEG_HOT_DATA; + if (ei.age <= sbi->warm_data_age_threshold) + return CURSEG_WARM_DATA; + return CURSEG_COLD_DATA; + } + return NO_CHECK_TYPE; +} + static int __get_segment_type_6(struct f2fs_io_info *fio) { if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; + int type; if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return CURSEG_COLD_DATA_PINNED; @@ -3311,6 +3334,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) } if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; + + type = __get_age_segment_type(inode, fio->page->index); + if (type != NO_CHECK_TYPE) + return type; + if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || f2fs_is_atomic_file(inode) || @@ -3422,6 +3450,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); + if (IS_DATASEG(type)) + atomic64_inc(&sbi->allocated_data_blocks); + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { @@ -3543,6 +3574,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn, struct f2fs_summary sum; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); + if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO) + f2fs_update_age_extent_cache(dn); set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 33c490e69ae3..83d6fb97dcae 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -59,6 +59,9 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, /* count read extent cache entries */ count += __count_extent_cache(sbi, EX_READ); + /* count block age extent cache entries */ + count += __count_extent_cache(sbi, EX_BLOCK_AGE); + /* count clean nat cache entries */ count += __count_nat_entries(sbi); @@ -102,8 +105,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, sbi->shrinker_run_no = run_no; + /* shrink extent cache entries */ + freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2); + /* shrink read extent cache entries */ - freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1); + freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2); /* shrink clean nat cache entries */ if (freed < nr) @@ -134,6 +140,8 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi) void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) { f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); + f2fs_shrink_age_extent_tree(sbi, + __count_extent_cache(sbi, EX_BLOCK_AGE)); spin_lock(&f2fs_list_lock); list_del_init(&sbi->s_list); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 98da6a8636e0..9650528d8f65 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -154,6 +154,7 @@ enum { Opt_atgc, Opt_gc_merge, Opt_nogc_merge, + Opt_age_extent_cache, Opt_err, }; @@ -229,6 +230,7 @@ static match_table_t f2fs_tokens = { {Opt_atgc, "atgc"}, {Opt_gc_merge, "gc_merge"}, {Opt_nogc_merge, "nogc_merge"}, + {Opt_age_extent_cache, "age_extent_cache"}, {Opt_err, NULL}, }; @@ -1148,6 +1150,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_nogc_merge: clear_opt(sbi, GC_MERGE); break; + case Opt_age_extent_cache: + set_opt(sbi, AGE_EXTENT_CACHE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1821,6 +1826,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); + if (test_opt(sbi, AGE_EXTENT_CACHE)) + seq_puts(seq, ",age_extent_cache"); if (test_opt(sbi, DATA_FLUSH)) seq_puts(seq, ",data_flush"); @@ -2043,6 +2050,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_ckpt = false, need_stop_ckpt = false; bool need_restart_flush = false, need_stop_flush = false; bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); + bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE); bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); @@ -2137,6 +2145,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; } + /* disallow enable/disable age extent_cache dynamically */ + if (no_age_extent_cache == !!test_opt(sbi, AGE_EXTENT_CACHE)) { + err = -EINVAL; + f2fs_warn(sbi, "switch age_extent_cache option is not allowed"); + goto restore_opts; + } if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { err = -EINVAL; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 23eba7514c9c..c9217d7a941f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -549,6 +549,24 @@ out: return count; } + if (!strcmp(a->attr.name, "hot_data_age_threshold")) { + if (t == 0 || t >= sbi->warm_data_age_threshold) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "warm_data_age_threshold")) { + if (t == 0 || t <= sbi->hot_data_age_threshold) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -778,6 +796,10 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); +/* For block age extent cache */ +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold); + #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent_sleep_time), @@ -853,6 +875,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(atgc_age_threshold), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), + ATTR_LIST(hot_data_age_threshold), + ATTR_LIST(warm_data_age_threshold), NULL, }; ATTRIBUTE_GROUPS(f2fs); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 81cb234cfaf6..e927889c7fd5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -53,6 +53,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); TRACE_DEFINE_ENUM(CP_PAUSE); TRACE_DEFINE_ENUM(CP_RESIZE); TRACE_DEFINE_ENUM(EX_READ); +TRACE_DEFINE_ENUM(EX_BLOCK_AGE); #define show_block_type(type) \ __print_symbolic(type, \ @@ -163,6 +164,11 @@ TRACE_DEFINE_ENUM(EX_READ); { COMPRESS_ZSTD, "ZSTD" }, \ { COMPRESS_LZORLE, "LZO-RLE" }) +#define show_extent_type(type) \ + __print_symbolic(type, \ + { EX_READ, "Read" }, \ + { EX_BLOCK_AGE, "Block Age" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -1548,7 +1554,7 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start, TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s", show_dev_ino(__entry), __entry->pgofs, - __entry->type == EX_READ ? "Read" : "N/A") + show_extent_type(__entry->type)) ); TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, @@ -1587,6 +1593,45 @@ TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, __entry->blk) ); +TRACE_EVENT_CONDITION(f2fs_lookup_age_extent_tree_end, + + TP_PROTO(struct inode *inode, unsigned int pgofs, + struct extent_info *ei), + + TP_ARGS(inode, pgofs, ei), + + TP_CONDITION(ei), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, fofs) + __field(unsigned int, len) + __field(unsigned long long, age) + __field(unsigned long long, blocks) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->fofs = ei->fofs; + __entry->len = ei->len; + __entry->age = ei->age; + __entry->blocks = ei->last_blocks; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "age_ext_info(fofs: %u, len: %u, age: %llu, blocks: %llu)", + show_dev_ino(__entry), + __entry->pgofs, + __entry->fofs, + __entry->len, + __entry->age, + __entry->blocks) +); + TRACE_EVENT(f2fs_update_read_extent_tree_range, TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, @@ -1622,6 +1667,41 @@ TRACE_EVENT(f2fs_update_read_extent_tree_range, __entry->c_len) ); +TRACE_EVENT(f2fs_update_age_extent_tree_range, + + TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, + unsigned long long age, + unsigned long long last_blks), + + TP_ARGS(inode, pgofs, len, age, last_blks), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(unsigned int, pgofs) + __field(unsigned int, len) + __field(unsigned long long, age) + __field(unsigned long long, blocks) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgofs = pgofs; + __entry->len = len; + __entry->age = age; + __entry->blocks = last_blks; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " + "len = %u, age = %llu, blocks = %llu", + show_dev_ino(__entry), + __entry->pgofs, + __entry->len, + __entry->age, + __entry->blocks) +); + TRACE_EVENT(f2fs_shrink_extent_tree, TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, @@ -1647,7 +1727,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree, show_dev(__entry->dev), __entry->node_cnt, __entry->tree_cnt, - __entry->type == EX_READ ? "Read" : "N/A") + show_extent_type(__entry->type)) ); TRACE_EVENT(f2fs_destroy_extent_tree, @@ -1674,7 +1754,7 @@ TRACE_EVENT(f2fs_destroy_extent_tree, TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s", show_dev_ino(__entry), __entry->node_cnt, - __entry->type == EX_READ ? "Read" : "N/A") + show_extent_type(__entry->type)) ); DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, From 073b997b02e7df2eec191d1a5278e20e73cfe126 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Dec 2022 14:05:44 -0800 Subject: [PATCH 162/186] BACKPORT: f2fs: initialize extent_cache parameter This can avoid confusing tracepoint values. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit b5825de803e7cf56262f5b3a8bc692d61acfe653) Change-Id: Ia43e2c1fd405a11fc4122b68f05f40c10f47f263 --- fs/f2fs/data.c | 2 +- fs/f2fs/extent_cache.c | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 991b4f9593b1..cce1a178106f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2156,7 +2156,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, sector_t last_block_in_file; const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; - struct extent_info ei = {0, }; + struct extent_info ei = {}; bool from_dnode = true; int i; int ret = 0; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 9a6db0b1b0eb..2d45da3d8dcc 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -937,7 +937,7 @@ out: static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { - struct extent_info ei; + struct extent_info ei = {}; if (!__may_extent_tree(dn->inode, type)) return; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d6ded768ac09..ef49b3684c0c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2591,7 +2591,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map = { .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; - struct extent_info ei = {0, }; + struct extent_info ei = {}; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 63245ab3a27c..744aa9274b37 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3301,7 +3301,7 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_info ei; + struct extent_info ei = {}; if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { if (!ei.age) From 937ed4edda40580f289543bca376ead46a5bd7a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Dec 2022 14:41:54 -0800 Subject: [PATCH 163/186] BACKPORT: f2fs: don't mix to use union values in extent_info Let's explicitly use the defined values in block_age case only. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit 354a326851a615c7fd8f5a6bda72afc9f051c264) Change-Id: I4011cbf10117a9023ef6fc507726020159ead72d --- fs/f2fs/extent_cache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 2d45da3d8dcc..3c77b9a982e4 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -880,7 +880,8 @@ static unsigned long long __calculate_block_age(unsigned long long new, } /* This returns a new age and allocated blocks in ei */ -static int __get_new_block_age(struct inode *inode, struct extent_info *ei) +static int __get_new_block_age(struct inode *inode, struct extent_info *ei, + block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t f_size = i_size_read(inode); @@ -893,7 +894,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) * block here. */ if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && - ei->blk == NEW_ADDR) + blkaddr == NEW_ADDR) return -EINVAL; if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { @@ -914,14 +915,14 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei) return 0; } - f2fs_bug_on(sbi, ei->blk == NULL_ADDR); + f2fs_bug_on(sbi, blkaddr == NULL_ADDR); /* the data block was allocated for the first time */ - if (ei->blk == NEW_ADDR) + if (blkaddr == NEW_ADDR) goto out; - if (__is_valid_data_blkaddr(ei->blk) && - !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { + if (__is_valid_data_blkaddr(blkaddr) && + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_bug_on(sbi, 1); return -EINVAL; } @@ -952,8 +953,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ else ei.blk = dn->data_blkaddr; } else if (type == EX_BLOCK_AGE) { - ei.blk = dn->data_blkaddr; - if (__get_new_block_age(dn->inode, &ei)) + if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) return; } __update_extent_tree_range(dn->inode, &ei, type); From 39b8fee3c007ea11f13930ce22b4ff7b4f5afe0e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Dec 2022 16:36:36 -0800 Subject: [PATCH 164/186] BACKPORT: f2fs: should use a temp extent_info for lookup Otherwise, __lookup_extent_tree() will override the given extent_info which will be used by caller. Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit db640d99b1ed4745eaf3af9ea1910996cddaf30c) Change-Id: Ib37a9ec57c24cfe303ee23a5e90618e6e0dabe61 --- fs/f2fs/extent_cache.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 3c77b9a982e4..e9665c0cc386 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -887,6 +887,7 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, loff_t f_size = i_size_read(inode); unsigned long long cur_blocks = atomic64_read(&sbi->allocated_data_blocks); + struct extent_info tei = *ei; /* only fofs and len are valid */ /* * When I/O is not aligned to a PAGE_SIZE, update will happen to the last @@ -897,17 +898,17 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, blkaddr == NEW_ADDR) return -EINVAL; - if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { + if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { unsigned long long cur_age; - if (cur_blocks >= ei->last_blocks) - cur_age = cur_blocks - ei->last_blocks; + if (cur_blocks >= tei.last_blocks) + cur_age = cur_blocks - tei.last_blocks; else /* allocated_data_blocks overflow */ - cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; + cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; - if (ei->age) - ei->age = __calculate_block_age(cur_age, ei->age); + if (tei.age) + ei->age = __calculate_block_age(cur_age, tei.age); else ei->age = cur_age; ei->last_blocks = cur_blocks; From 6e50bbff175ea5de46d23673fd146af3f7dfcec4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Dec 2022 16:14:10 -0800 Subject: [PATCH 165/186] BACKPORT: f2fs: let's avoid panic if extent_tree is not created This patch avoids the below panic. pc : __lookup_extent_tree+0xd8/0x760 lr : f2fs_do_write_data_page+0x104/0x87c sp : ffffffc010cbb3c0 x29: ffffffc010cbb3e0 x28: 0000000000000000 x27: ffffff8803e7f020 x26: ffffff8803e7ed40 x25: ffffff8803e7f020 x24: ffffffc010cbb460 x23: ffffffc010cbb480 x22: 0000000000000000 x21: 0000000000000000 x20: ffffffff22e90900 x19: 0000000000000000 x18: ffffffc010c5d080 x17: 0000000000000000 x16: 0000000000000020 x15: ffffffdb1acdbb88 x14: ffffff888759e2b0 x13: 0000000000000000 x12: ffffff802da49000 x11: 000000000a001200 x10: ffffff8803e7ed40 x9 : ffffff8023195800 x8 : ffffff802da49078 x7 : 0000000000000001 x6 : 0000000000000000 x5 : 0000000000000006 x4 : ffffffc010cbba28 x3 : 0000000000000000 x2 : ffffffc010cbb480 x1 : 0000000000000000 x0 : ffffff8803e7ed40 Call trace: __lookup_extent_tree+0xd8/0x760 f2fs_do_write_data_page+0x104/0x87c f2fs_write_single_data_page+0x420/0xb60 f2fs_write_cache_pages+0x418/0xb1c __f2fs_write_data_pages+0x428/0x58c f2fs_write_data_pages+0x30/0x40 do_writepages+0x88/0x190 __writeback_single_inode+0x48/0x448 writeback_sb_inodes+0x468/0x9e8 __writeback_inodes_wb+0xb8/0x2a4 wb_writeback+0x33c/0x740 wb_do_writeback+0x2b4/0x400 wb_workfn+0xe4/0x34c process_one_work+0x24c/0x5bc worker_thread+0x3e8/0xa50 kthread+0x150/0x1b4 Bug: 264453689 Signed-off-by: Jaegeuk Kim (cherry picked from commit 24af2f08d60039427995f78150963743dcb080de) Change-Id: I7594e80fb7df0dff3f494e79be763a9870c8f063 --- fs/f2fs/extent_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e9665c0cc386..567d07f6effe 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -545,7 +545,8 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_node *en; bool ret = false; - f2fs_bug_on(sbi, !et); + if (!et) + return false; trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); From ab89185ddb154ad928d5c29f24388ca6825b3098 Mon Sep 17 00:00:00 2001 From: Kever Yang Date: Mon, 9 Jan 2023 10:19:33 +0800 Subject: [PATCH 166/186] ANDROID: GKI: rockchip: Update symbols Leaf changes summary: 0 artifacts changed Changed leaf types summary: 0 leaf type changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed, 2 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed, 0 Added variable 2 Added functions: [A] 'function void drm_hdcp_update_content_protection(drm_connector*, u64)' [A] 'function void sdhci_reset_tuning(sdhci_host*)' Bug: 239396464 Signed-off-by: Kever Yang Change-Id: I9d15fb674c1bd308e88ad34352092deef60eafcc --- android/abi_gki_aarch64.xml | 11 +++++++++++ android/abi_gki_aarch64_rockchip | 14 +++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index bda4150b97dd..b511374aa82e 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -2261,6 +2261,7 @@ + @@ -4780,6 +4781,7 @@ + @@ -129317,6 +129319,11 @@ + + + + + @@ -142184,6 +142191,10 @@ + + + + diff --git a/android/abi_gki_aarch64_rockchip b/android/abi_gki_aarch64_rockchip index ec0b9c854e41..aabe57eaca5f 100644 --- a/android/abi_gki_aarch64_rockchip +++ b/android/abi_gki_aarch64_rockchip @@ -1087,6 +1087,7 @@ scsi_ioctl_block_when_processing_errors sdev_prefix_printk sdhci_add_host + sdhci_execute_tuning sdhci_get_property sdhci_pltfm_clk_get_max_clock sdhci_pltfm_free @@ -2442,9 +2443,11 @@ component_unbind_all devm_of_phy_get_by_index driver_find_device + drm_add_modes_noedid drm_atomic_commit drm_atomic_get_connector_state drm_atomic_get_plane_state + drm_atomic_helper_bridge_propagate_bus_fmt drm_atomic_helper_check drm_atomic_helper_check_plane_state drm_atomic_helper_cleanup_planes @@ -2477,6 +2480,10 @@ __drm_atomic_state_free drm_bridge_chain_mode_set drm_bridge_get_edid + drm_connector_attach_content_protection_property + drm_connector_list_iter_begin + drm_connector_list_iter_end + drm_connector_list_iter_next drm_connector_list_update drm_crtc_cleanup drm_crtc_enable_color_mgmt @@ -2522,6 +2529,7 @@ drm_gem_unmap_dma_buf drm_get_format_info drm_get_format_name + drm_hdcp_update_content_protection drm_helper_mode_fill_fb_struct drm_kms_helper_poll_enable drm_kms_helper_poll_fini @@ -2547,6 +2555,7 @@ drm_mode_prune_invalid drm_mode_set_crtcinfo drm_modeset_lock_all + drm_modeset_unlock drm_modeset_unlock_all drm_mode_sort drm_mode_validate_size @@ -2560,6 +2569,7 @@ drm_plane_create_zpos_property drm_prime_get_contiguous_size __drm_printfn_seq_file + drm_property_blob_put drm_property_create drm_property_create_bitmask drm_property_create_bool @@ -2567,6 +2577,8 @@ drm_property_create_object drm_property_create_range drm_property_destroy + drm_property_lookup_blob + drm_property_replace_blob __drm_puts_seq_file drm_rect_calc_hscale drm_self_refresh_helper_cleanup @@ -2608,7 +2620,6 @@ sdhci_cqe_irq sdhci_dumpregs sdhci_enable_clk - sdhci_execute_tuning sdhci_pltfm_unregister sdhci_set_power_and_bus_voltage sdhci_set_uhs_signaling @@ -2620,6 +2631,7 @@ sdhci_adma_write_desc sdhci_remove_host sdhci_request + sdhci_reset_tuning # required by sg.ko blk_get_request From c83ab50b6e835f5f63100153229944a10ea90763 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 14 Apr 2022 10:52:23 +0800 Subject: [PATCH 167/186] BACKPORT: ext4: fix use-after-free in ext4_rename_dir_prepare commit 0be698ecbe4471fcad80e81ec6a05001421041b3 upstream. We got issue as follows: EXT4-fs (loop0): mounted filesystem without journal. Opts: ,errors=continue ext4_get_first_dir_block: bh->b_data=0xffff88810bee6000 len=34478 ext4_get_first_dir_block: *parent_de=0xffff88810beee6ae bh->b_data=0xffff88810bee6000 ext4_rename_dir_prepare: [1] parent_de=0xffff88810beee6ae ================================================================== BUG: KASAN: use-after-free in ext4_rename_dir_prepare+0x152/0x220 Read of size 4 at addr ffff88810beee6ae by task rep/1895 CPU: 13 PID: 1895 Comm: rep Not tainted 5.10.0+ #241 Call Trace: dump_stack+0xbe/0xf9 print_address_description.constprop.0+0x1e/0x220 kasan_report.cold+0x37/0x7f ext4_rename_dir_prepare+0x152/0x220 ext4_rename+0xf44/0x1ad0 ext4_rename2+0x11c/0x170 vfs_rename+0xa84/0x1440 do_renameat2+0x683/0x8f0 __x64_sys_renameat+0x53/0x60 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f45a6fc41c9 RSP: 002b:00007ffc5a470218 EFLAGS: 00000246 ORIG_RAX: 0000000000000108 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f45a6fc41c9 RDX: 0000000000000005 RSI: 0000000020000180 RDI: 0000000000000005 RBP: 00007ffc5a470240 R08: 00007ffc5a470160 R09: 0000000020000080 R10: 00000000200001c0 R11: 0000000000000246 R12: 0000000000400bb0 R13: 00007ffc5a470320 R14: 0000000000000000 R15: 0000000000000000 The buggy address belongs to the page: page:00000000440015ce refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x10beee flags: 0x200000000000000() raw: 0200000000000000 ffffea00043ff4c8 ffffea0004325608 0000000000000000 raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88810beee580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88810beee600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff88810beee680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88810beee700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88810beee780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint ext4_rename_dir_prepare: [2] parent_de->inode=3537895424 ext4_rename_dir_prepare: [3] dir=0xffff888124170140 ext4_rename_dir_prepare: [4] ino=2 ext4_rename_dir_prepare: ent->dir->i_ino=2 parent=-757071872 Reason is first directory entry which 'rec_len' is 34478, then will get illegal parent entry. Now, we do not check directory entry after read directory block in 'ext4_get_first_dir_block'. To solve this issue, check directory entry in 'ext4_get_first_dir_block'. [ Trigger an ext4_error() instead of just warning if the directory is missing a '.' or '..' entry. Also make sure we return an error code if the file system is corrupted. -TYT ] Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220414025223.4113128-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman [ta: Adapt patch to cope with the android specific changes introduced in commit 705a3e5b1852 ("ANDROID: ext4: Handle casefolding with encryption"). Pass zero value for lblk when calling ext4_check_dir_entry().] Cc: Daniel Rosenberg Reported-and-tested-by: syzbot+a07b88e6427ec1c97aa5@syzkaller.appspotmail.com Signed-off-by: Tudor Ambarus Change-Id: I9d4218ffa0ddae2aa75aa4755221ef7f856b04e9 --- fs/ext4/namei.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6b27be96b6e5..3a71f883e73f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3647,6 +3647,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { + struct ext4_dir_entry_2 *de; + unsigned int offset; + /* The first directory block must not be a hole, so * treat it as DIRENT_HTREE */ @@ -3655,9 +3658,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, *retval = PTR_ERR(bh); return NULL; } - *parent_de = ext4_next_entry( - (struct ext4_dir_entry_2 *)bh->b_data, - inode->i_sb->s_blocksize); + + de = (struct ext4_dir_entry_2 *) bh->b_data; + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, 0, 0) || + le32_to_cpu(de->inode) != inode->i_ino || + strcmp(".", de->name)) { + EXT4_ERROR_INODE(inode, "directory missing '.'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + offset = ext4_rec_len_from_disk(de->rec_len, + inode->i_sb->s_blocksize); + de = ext4_next_entry(de, inode->i_sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, 0, offset) || + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { + EXT4_ERROR_INODE(inode, "directory missing '..'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + *parent_de = de; + return bh; } From 7d1d982f57843d0b494f903f90be7cb75b572261 Mon Sep 17 00:00:00 2001 From: Luca Stefani Date: Thu, 22 Dec 2022 14:10:49 +0100 Subject: [PATCH 168/186] UPSTREAM: pstore: Properly assign mem_type property If mem-type is specified in the device tree it would end up overriding the record_size field instead of populating mem_type. As record_size is currently parsed after the improper assignment with default size 0 it continued to work as expected regardless of the value found in the device tree. Simply changing the target field of the struct is enough to get mem-type working as expected. Fixes: 9d843e8fafc7 ("pstore: Add mem_type property DT parsing support") Cc: stable@vger.kernel.org Signed-off-by: Luca Stefani Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221222131049.286288-1-luca@osomprivacy.com Fixes: 1c4893edfe02 ("FROMGIT: pstore: Add mem_type property DT parsing support") Change-Id: Iea62cd7b26effc158500f097474d4df316285ac4 (cherry picked from commit beca3e311a49cd3c55a056096531737d7afa4361 ) Signed-off-by: Luca Stefani --- fs/pstore/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7183fe4b5f72..df2950500334 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -679,7 +679,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, field = value; \ } - parse_u32("mem-type", pdata->record_size, pdata->mem_type); + parse_u32("mem-type", pdata->mem_type, pdata->mem_type); parse_u32("record-size", pdata->record_size, 0); parse_u32("console-size", pdata->console_size, 0); parse_u32("ftrace-size", pdata->ftrace_size, 0); From 0761373d2f04c6a7a6c4ffe0d7eb12e9666ce239 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 29 Nov 2021 18:53:09 -0800 Subject: [PATCH 169/186] UPSTREAM: usb: dwc3: gadget: Skip checking Update Transfer status If we're not setting CMDACT (from "No Response" Update Transfer command), then there's no point in checking for the command status. So skip it. This can reduce a register read delay and improve performance. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3dc31cf11581ae3ee82d9202dda3fc17d897d786.1638240306.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit bc27117c7cdd4b57d0f079620b483f3b229074bd) Bug: 263189538 Change-Id: Ia06408ffeeb561683662f00938bd9ab2f866eb89 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b222ffd8f8ef..89c8c4202aac 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -357,6 +357,12 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, cmd |= DWC3_DEPCMD_CMDACT; dwc3_writel(dep->regs, DWC3_DEPCMD, cmd); + + if (!(cmd & DWC3_DEPCMD_CMDACT)) { + ret = 0; + goto skip_status; + } + do { reg = dwc3_readl(dep->regs, DWC3_DEPCMD); if (!(reg & DWC3_DEPCMD_CMDACT)) { @@ -398,6 +404,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, cmd_status = -ETIMEDOUT; } +skip_status: trace_dwc3_gadget_ep_cmd(dep, cmd, params, cmd_status); if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { From 15b61c98d6dc9fae6f63d68d49b0e97c45edc3f6 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 29 Nov 2021 18:53:16 -0800 Subject: [PATCH 170/186] UPSTREAM: usb: dwc3: gadget: Ignore Update Transfer cmd params The controller doesn't check for Update Transfer command parameters DEPCMDPAR{0,1,2}. Writing to these registers is unnecessary. Ignoring this improves performance slightly by removing the register write delay. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/997d9ebf38c6bba920d4ee77bd8c77bf81978a55.1638240306.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a02a26eb0aeaa2859a8b2f182bd5b5f7228ee1fe) Bug: 263189538 Change-Id: Ie30380e71dfd7febb71eb0c3c725f336f4d6da9a Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 89c8c4202aac..b3e096744d48 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -331,9 +331,17 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, } } - dwc3_writel(dep->regs, DWC3_DEPCMDPAR0, params->param0); - dwc3_writel(dep->regs, DWC3_DEPCMDPAR1, params->param1); - dwc3_writel(dep->regs, DWC3_DEPCMDPAR2, params->param2); + /* + * For some commands such as Update Transfer command, DEPCMDPARn + * registers are reserved. Since the driver often sends Update Transfer + * command, don't write to DEPCMDPARn to avoid register write delays and + * improve performance. + */ + if (DWC3_DEPCMD_CMD(cmd) != DWC3_DEPCMD_UPDATETRANSFER) { + dwc3_writel(dep->regs, DWC3_DEPCMDPAR0, params->param0); + dwc3_writel(dep->regs, DWC3_DEPCMDPAR1, params->param1); + dwc3_writel(dep->regs, DWC3_DEPCMDPAR2, params->param2); + } /* * Synopsys Databook 2.60a states in section 6.3.2.5.6 of that if we're From d099b9a70e5382c5bbf131d71c0383cca0bc8d73 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 29 Nov 2021 18:53:22 -0800 Subject: [PATCH 171/186] UPSTREAM: usb: dwc3: gadget: Skip reading GEVNTSIZn The driver knows what it needs to set for GEVNTSIZn, and the controller doesn't modify this register unless there's a hard reset. To save a few microseconds of register read in read-modify-write operation, simply do register write with the expected values. This can improve performance when there are many interrupts generated, which the driver needs to check and handle. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/efddf4ee5821c4bc5ae7ad90d629ec7a0ebcbf9a.1638240306.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 76c4c95dac0da03f64d8b59a8834faf8020af8dc) Bug: 263189538 Change-Id: I7dec7b568f12b1529ab1e52a600056be98910f02 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b3e096744d48..4de2f1d3d35f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4201,7 +4201,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) struct dwc3 *dwc = evt->dwc; irqreturn_t ret = IRQ_NONE; int left; - u32 reg; left = evt->count; @@ -4232,9 +4231,8 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) ret = IRQ_HANDLED; /* Unmask interrupt */ - reg = dwc3_readl(dwc->regs, DWC3_GEVNTSIZ(0)); - reg &= ~DWC3_GEVNTSIZ_INTMASK; - dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), reg); + dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), + DWC3_GEVNTSIZ_SIZE(evt->length)); if (dwc->imod_interval) { dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), DWC3_GEVNTCOUNT_EHB); @@ -4268,7 +4266,6 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) struct dwc3 *dwc = evt->dwc; u32 amount; u32 count; - u32 reg; if (pm_runtime_suspended(dwc->dev)) { pm_runtime_get(dwc->dev); @@ -4295,9 +4292,8 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) evt->flags |= DWC3_EVENT_PENDING; /* Mask interrupt */ - reg = dwc3_readl(dwc->regs, DWC3_GEVNTSIZ(0)); - reg |= DWC3_GEVNTSIZ_INTMASK; - dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), reg); + dwc3_writel(dwc->regs, DWC3_GEVNTSIZ(0), + DWC3_GEVNTSIZ_INTMASK | DWC3_GEVNTSIZ_SIZE(evt->length)); amount = min(count, evt->length - evt->lpos); memcpy(evt->cache + evt->lpos, evt->buf + evt->lpos, amount); From 33b5613a43848bc4fb0d401ba22ce2789136c69a Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Sun, 6 Mar 2022 22:12:50 +0100 Subject: [PATCH 172/186] UPSTREAM: usb: dwc3: gadget: ep_queue simplify isoc start condition To improve reading the code this patch moves the cases to start_isoc or return the function under one common condition check. Reviewed-by: Thinh Nguyen Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220306211251.2281335-2-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 26d27a1080a7d0e8f0c1a56dc50aae336f9525d1) Bug: 263189538 Change-Id: I60ada6edcc6f1a505f2a3d86fc4a6d8b7d22f089 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4de2f1d3d35f..f03646b827f1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1900,13 +1900,11 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) * errors which will force us issue EndTransfer command. */ if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) { - if (!(dep->flags & DWC3_EP_PENDING_REQUEST) && - !(dep->flags & DWC3_EP_TRANSFER_STARTED)) - return 0; - - if ((dep->flags & DWC3_EP_PENDING_REQUEST)) { - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) { + if ((dep->flags & DWC3_EP_PENDING_REQUEST)) return __dwc3_gadget_start_isoc(dep); + + return 0; } } From 802b4745f707458ddc7c0b3cbc0eef1da83d9e2f Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 21 Dec 2022 21:37:56 +0530 Subject: [PATCH 173/186] UPSTREAM: usb: dwc3: gadget: move cmd_endtransfer to extra function This patch adds the extra function __dwc3_stop_active_transfer to consolidate the same codepath. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220306211251.2281335-3-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e192cc7b52399d1b073f88cd3ba128b74d3a57f1) Bug: 263189538 Change-Id: Icbc4c63e380e2a88973e739b5513528cc5dbde66 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 70 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f03646b827f1..9bfcfc7ccc38 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1653,6 +1653,40 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) return DWC3_DSTS_SOFFN(reg); } +/** + * __dwc3_stop_active_transfer - stop the current active transfer + * @dep: isoc endpoint + * @force: set forcerm bit in the command + * @interrupt: command complete interrupt after End Transfer command + * + * When setting force, the ForceRM bit will be set. In that case + * the controller won't update the TRB progress on command + * completion. It also won't clear the HWO bit in the TRB. + * The command will also not complete immediately in that case. + */ +static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) +{ + struct dwc3_gadget_ep_cmd_params params; + u32 cmd; + int ret; + + cmd = DWC3_DEPCMD_ENDTRANSFER; + cmd |= force ? DWC3_DEPCMD_HIPRI_FORCERM : 0; + cmd |= interrupt ? DWC3_DEPCMD_CMDIOC : 0; + cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); + memset(¶ms, 0, sizeof(params)); + ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); + WARN_ON_ONCE(ret); + dep->resource_index = 0; + + if (!interrupt) + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + else if (!ret) + dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + + return ret; +} + /** * dwc3_gadget_start_isoc_quirk - workaround invalid frame number * @dep: isoc endpoint @@ -1828,21 +1862,8 @@ static int __dwc3_gadget_start_isoc(struct dwc3_ep *dep) * status, issue END_TRANSFER command and retry on the next XferNotReady * event. */ - if (ret == -EAGAIN) { - struct dwc3_gadget_ep_cmd_params params; - u32 cmd; - - cmd = DWC3_DEPCMD_ENDTRANSFER | - DWC3_DEPCMD_CMDIOC | - DWC3_DEPCMD_PARAM(dep->resource_index); - - dep->resource_index = 0; - memset(¶ms, 0, sizeof(params)); - - ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); - if (!ret) - dep->flags |= DWC3_EP_END_TRANSFER_PENDING; - } + if (ret == -EAGAIN) + ret = __dwc3_stop_active_transfer(dep, false, true); return ret; } @@ -3648,10 +3669,6 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { - struct dwc3_gadget_ep_cmd_params params; - u32 cmd; - int ret; - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) @@ -3683,20 +3700,7 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, * * This mode is NOT available on the DWC_usb31 IP. */ - - cmd = DWC3_DEPCMD_ENDTRANSFER; - cmd |= force ? DWC3_DEPCMD_HIPRI_FORCERM : 0; - cmd |= interrupt ? DWC3_DEPCMD_CMDIOC : 0; - cmd |= DWC3_DEPCMD_PARAM(dep->resource_index); - memset(¶ms, 0, sizeof(params)); - ret = dwc3_send_gadget_ep_cmd(dep, cmd, ¶ms); - WARN_ON_ONCE(ret); - dep->resource_index = 0; - - if (!interrupt) - dep->flags &= ~DWC3_EP_TRANSFER_STARTED; - else - dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + __dwc3_stop_active_transfer(dep, force, interrupt); } EXPORT_SYMBOL_GPL(dwc3_stop_active_transfer); From a79e848e5299ab2a3c86f4e83e2e3946e5208639 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Wed, 21 Dec 2022 21:51:36 +0530 Subject: [PATCH 174/186] UPSTREAM: usb: dwc3: ep0: Don't prepare beyond Setup stage Since we can't guarantee that the host won't send new Setup packet before going through the device-initiated disconnect, don't prepare beyond the Setup stage and keep the device in EP0_SETUP_PHASE. This ensures that the device-initated disconnect sequence can go through gracefully. Note that the controller won't service the End Transfer command if it can't DMA out the Setup packet. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/6bacec56ecabb2c6e49a09cedfcac281fdc97de0.16505938 29.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c96683798e272366866a5c0ce3073c0b5a256db7) Bug: 263189538 Change-Id: Ice2d6837acebd312e732527504f4013f33a8fe6d Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/ep0.c | 2 +- drivers/usb/dwc3/gadget.c | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 9b6ebc3c902d..5af4d3794e88 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -813,7 +813,7 @@ static void dwc3_ep0_inspect_setup(struct dwc3 *dwc, int ret = -EINVAL; u32 len; - if (!dwc->gadget_driver) + if (!dwc->gadget_driver || !dwc->connected) goto out; trace_dwc3_ctrl_req(ctrl); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 9bfcfc7ccc38..c6398dfd1627 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2493,6 +2493,23 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) spin_lock_irqsave(&dwc->lock, flags); dwc->connected = false; + /* + * Per databook, when we want to stop the gadget, if a control transfer + * is still in process, complete it and get the core into setup phase. + */ + if (dwc->ep0state != EP0_SETUP_PHASE) { + int ret; + + reinit_completion(&dwc->ep0_in_setup); + + spin_unlock_irqrestore(&dwc->lock, flags); + ret = wait_for_completion_timeout(&dwc->ep0_in_setup, + msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); + spin_lock_irqsave(&dwc->lock, flags); + if (ret == 0) + dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); + } + /* * In the Synopsys DesignWare Cores USB3 Databook Rev. 3.30a * Section 4.1.8 Table 4-7, it states that for a device-initiated @@ -2524,19 +2541,6 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) vdwc->softconnect = is_on; - /* - * Per databook, when we want to stop the gadget, if a control transfer - * is still in process, complete it and get the core into setup phase. - */ - if (!is_on && dwc->ep0state != EP0_SETUP_PHASE) { - reinit_completion(&dwc->ep0_in_setup); - - ret = wait_for_completion_timeout(&dwc->ep0_in_setup, - msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); - if (ret == 0) - dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); - } - /* * Avoid issuing a runtime resume if the device is already in the * suspended state during gadget disconnect. DWC3 gadget was already From 4678fabb6e69f56d1616c4e3b77aedbf87efd641 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 21 Apr 2022 19:22:57 -0700 Subject: [PATCH 175/186] UPSTREAM: usb: dwc3: gadget: Only End Transfer for ep0 data phase The driver shouldn't be able to issue End Transfer to the control endpoint at anytime. Typically we should only do so in error cases such as invalid/unexpected direction of Data Phase as described in the control transfer flow of the programming guide. It _may_ end started data phase during controller deinitialization from soft disconnect or driver removal. However, that should not happen because the driver should be maintained in EP0_SETUP_PHASE during driver tear-down. On soft-connect, the controller should be reset from a soft-reset and there should be no issue starting the control endpoint. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3c6643678863a26702e4115e9e19d7d94a30d49c.1650593829.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ace17b6ee4f92ab0375d12a1b42494f8590a96b6) Bug: 263189538 Change-Id: Ifd58ee4b1f213db72d6bcc7137c96ee2a399e21c Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index c6398dfd1627..22840e1373b9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3673,6 +3673,17 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { + struct dwc3 *dwc = dep->dwc; + + /* + * Only issue End Transfer command to the control endpoint of a started + * Data Phase. Typically we should only do so in error cases such as + * invalid/unexpected direction as described in the control transfer + * flow of the programming guide. + */ + if (dep->number <= 1 && dwc->ep0state != EP0_DATA_PHASE) + return; + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) From 7b90f7b05e1991454bd7c5aac259d0d12807e71c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 21 Apr 2022 19:23:03 -0700 Subject: [PATCH 176/186] UPSTREAM: usb: dwc3: gadget: Delay issuing End Transfer If the controller hasn't DMA'ed the Setup data from its fifo, it won't process the End Transfer command. Polling for the command completion may block the driver from servicing the Setup phase and cause a timeout. Previously we only check and delay issuing End Transfer in the case of endpoint dequeue. Let's do that for all End Transfer scenarios. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/2fcf3b5d90068d549589a57a27a79f76c6769b04.1650593829.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f66eef8fb8989a7193cafc3870f7c7b2b97f16cb) Bug: 263189538 Change-Id: I58dc40ff7defdd03f52da900ee0b179ffe55d2cc Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 22840e1373b9..83b4d3cd0f0c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2045,16 +2045,6 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, if (r == req) { struct dwc3_request *t; - /* - * If a Setup packet is received but yet to DMA out, the controller will - * not process the End Transfer command of any endpoint. Polling of its - * DEPCMD.CmdAct may block setting up TRB for Setup packet, causing a - * timeout. Delay issuing the End Transfer command until the Setup TRB is - * prepared. - */ - if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) - dep->flags |= DWC3_EP_DELAY_STOP; - /* wait until it is processed */ dwc3_stop_active_transfer(dep, true, true); @@ -3689,6 +3679,18 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; + /* + * If a Setup packet is received but yet to DMA out, the controller will + * not process the End Transfer command of any endpoint. Polling of its + * DEPCMD.CmdAct may block setting up TRB for Setup packet, causing a + * timeout. Delay issuing the End Transfer command until the Setup TRB is + * prepared. + */ + if (dwc->ep0state != EP0_SETUP_PHASE && !dwc->delayed_status) { + dep->flags |= DWC3_EP_DELAY_STOP; + return; + } + /* * NOTICE: We are violating what the Databook says about the * EndTransfer command. Ideally we would _always_ wait for the From 767a360826692da89ef2335ae62b81ee4a007416 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 23 May 2022 14:39:48 -0700 Subject: [PATCH 177/186] UPSTREAM: usb: dwc3: gadget: Fix IN endpoint max packet size allocation The current logic to assign the max packet limit for IN endpoints attempts to take the default HW value and apply the optimal endpoint settings based on it. However, if the default value reports a TxFIFO size large enough for only one max packet, it will divide the value and assign a smaller ep max packet limit. For example, if the default TxFIFO size fits 1024B, current logic will assign 1024/3 = 341B to ep max packet size. If function drivers attempt to request for an endpoint with a wMaxPacketSize of 1024B (SS BULK max packet size) then it will fail, as the gadget is unable to find an endpoint which can fit the requested size. Functionally, if the TxFIFO has enough space to fit one max packet, it will be sufficient, at least when initializing the endpoints. Fixes: d94ea5319813 ("usb: dwc3: gadget: Properly set maxpacket limit") Cc: stable Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220523213948.22142-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9c1e916960c1192e746bf615e4dae25423473a64) Bug: 263189538 Change-Id: I56995b3d5bbfa8dc61a0e4084d6109d90951244d Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 83b4d3cd0f0c..29ed10733254 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2965,6 +2965,7 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep) struct dwc3 *dwc = dep->dwc; u32 mdwidth; int size; + int maxpacket; mdwidth = dwc3_mdwidth(dwc); @@ -2977,21 +2978,24 @@ static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep) else size = DWC31_GTXFIFOSIZ_TXFDEP(size); - /* FIFO Depth is in MDWDITH bytes. Multiply */ - size *= mdwidth; - /* - * To meet performance requirement, a minimum TxFIFO size of 3x - * MaxPacketSize is recommended for endpoints that support burst and a - * minimum TxFIFO size of 2x MaxPacketSize for endpoints that don't - * support burst. Use those numbers and we can calculate the max packet - * limit as below. + * maxpacket size is determined as part of the following, after assuming + * a mult value of one maxpacket: + * DWC3 revision 280A and prior: + * fifo_size = mult * (max_packet / mdwidth) + 1; + * maxpacket = mdwidth * (fifo_size - 1); + * + * DWC3 revision 290A and onwards: + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1 + * maxpacket = mdwidth * ((fifo_size - 1) - 1) - mdwidth; */ - if (dwc->maximum_speed >= USB_SPEED_SUPER) - size /= 3; + if (DWC3_VER_IS_PRIOR(DWC3, 290A)) + maxpacket = mdwidth * (size - 1); else - size /= 2; + maxpacket = mdwidth * ((size - 1) - 1) - mdwidth; + /* Functionally, space for one max packet is sufficient */ + size = min_t(int, maxpacket, 1024); usb_ep_set_maxpacket_limit(&dep->endpoint, size); dep->endpoint.max_streams = 16; From 75a4f0b5e1f4cb19b69472d465edc5f3a7c5ea73 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 11 Jul 2022 18:44:03 -0700 Subject: [PATCH 178/186] UPSTREAM: usb: dwc3: ep0: Properly handle setup_packet_pending scenario in data stage During a 3 stage SETUP transfer, if the host sends another SETUP token before completing the status phase, it signifies that the host has aborted the current control transfer. Currently, if a setup_packet_pending is received, there are no subsequent calls to dwc3_ep0_out_start() to fetch the new SETUP packet. This leads to a stall on EP0, as host does not expect another STATUS phase as it has aborted the current transfer. Fix this issue by explicitly stalling and restarting EP0, as well as resetting the trb_enqueue indexes. (without this, there is a chance the SETUP TRB is set up on trb_endqueue == 1) Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20220712014403.2977-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5e76ee96be8f7bbf9416a5edddc8c064e7e7c6ac) Bug: 263189538 Change-Id: Ia0cc555ca8cd94048308406d975b3b4f4aa6ca54 Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/ep0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 5af4d3794e88..7f01513df0f2 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -239,6 +239,8 @@ void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) dwc3_gadget_giveback(dep, req, -ECONNRESET); } + dwc->eps[0]->trb_enqueue = 0; + dwc->eps[1]->trb_enqueue = 0; dwc->ep0state = EP0_SETUP_PHASE; dwc3_ep0_out_start(dwc); } @@ -1139,6 +1141,11 @@ static void dwc3_ep0_xfernotready(struct dwc3 *dwc, if (dwc->ep0_next_event != DWC3_EP0_NRDY_STATUS) return; + if (dwc->setup_packet_pending) { + dwc3_ep0_stall_and_restart(dwc); + return; + } + dwc->ep0state = EP0_STATUS_PHASE; if (dwc->delayed_status) { From d53fb78733b6686fde47ef96a064d6b8b8a77bcd Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 20 Jul 2022 23:35:23 +0200 Subject: [PATCH 179/186] UPSTREAM: usb: dwc3: gadget: conditionally remove requests The functions stop_active_transfers and ep_disable are both calling remove_requests. This functions in both cases will giveback the requests with status ESHUTDOWN, which also represents an physical disconnection. For ep_disable this is not true. This patch adds the status parameter to remove_requests and sets the status to ECONNRESET on ep_disable. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20220720213523.1055897-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b44c0e7fef51ee7e8ca8c6efbf706f5613787100) Bug: 263189538 Change-Id: I1bd7a42e6f2f99a0ce021ef3c94dc630ae9260df Signed-off-by: Krishna Kurapati --- drivers/usb/dwc3/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 29ed10733254..8611f3c59677 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -966,7 +966,7 @@ out: return 0; } -static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) +static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status) { struct dwc3_request *req; @@ -976,19 +976,19 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) while (!list_empty(&dep->started_list)) { req = next_request(&dep->started_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } while (!list_empty(&dep->pending_list)) { req = next_request(&dep->pending_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } while (!list_empty(&dep->cancelled_list)) { req = next_request(&dep->cancelled_list); - dwc3_gadget_giveback(dep, req, -ESHUTDOWN); + dwc3_gadget_giveback(dep, req, status); } } @@ -1023,7 +1023,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } - dwc3_remove_requests(dwc, dep); + dwc3_remove_requests(dwc, dep, -ECONNRESET); dep->stream_capable = false; dep->type = 0; @@ -2326,7 +2326,7 @@ static void dwc3_stop_active_transfers(struct dwc3 *dwc) if (!dep) continue; - dwc3_remove_requests(dwc, dep); + dwc3_remove_requests(dwc, dep, -ESHUTDOWN); } } From b44330f50ced30c4bdbfbd5d0bf42ad13948987b Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:05 +0530 Subject: [PATCH 180/186] UPSTREAM: usb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait While performing fast composition switch, there is a possibility that the process of ffs_ep0_write/ffs_ep0_read get into a race condition due to ep0req being freed up from functionfs_unbind. Consider the scenario that the ffs_ep0_write calls the ffs_ep0_queue_wait by taking a lock &ffs->ev.waitq.lock. However, the functionfs_unbind isn't bounded so it can go ahead and mark the ep0req to NULL, and since there is no NULL check in ffs_ep0_queue_wait we will end up in use-after-free. Fix this by making a serialized execution between the two functions using a mutex_lock(ffs->mutex). Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-2-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Bug: 265837309 (cherry picked from commit 6a19da111057f69214b97c62fb0ac59023970850 https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git/ usb-linus) Change-Id: I1d71b96709a557caee72d1c84e3ee43969a89c13 Signed-off-by: Prashanth K --- drivers/usb/gadget/function/f_fs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index ecf298bc49ed..42a37a803849 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -279,6 +279,9 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) struct usb_request *req = ffs->ep0req; int ret; + if (!req) + return -EINVAL; + req->zero = len < le16_to_cpu(ffs->ev.setup.wLength); spin_unlock_irq(&ffs->ev.waitq.lock); @@ -1891,10 +1894,12 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; ffs->gadget = NULL; clear_bit(FFS_FL_BOUND, &ffs->flags); + mutex_unlock(&ffs->mutex); ffs_data_put(ffs); } } From 56583c9472979ca4388313085f921f79047b51d8 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:06 +0530 Subject: [PATCH 181/186] UPSTREAM: usb: gadget: f_fs: Ensure ep0req is dequeued before free_request As per the documentation, function usb_ep_free_request guarantees the request will not be queued or no longer be re-queued (or otherwise used). However, with the current implementation it doesn't make sure that the request in ep0 isn't reused. Fix this by dequeuing the ep0req on functionfs_unbind before freeing the request to align with the definition. Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-3-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Bug: 265837309 (cherry picked from commit ce405d561b020e5a46340eb5146805a625dcacee https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git/ usb-linus) Change-Id: Idbc5f13cedac59cbc893d229383b01662f4e4a03 Signed-off-by: Prashanth K --- drivers/usb/gadget/function/f_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 42a37a803849..f49f8706fbd3 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1894,6 +1894,8 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + /* dequeue before freeing ep0req */ + usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req); mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; From 3519fc246df0ea4f3f53053b41213591d5490313 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 18 Feb 2022 17:29:44 -0800 Subject: [PATCH 182/186] BACKPORT: kasan: split kasan_*enabled() functions into a separate header In an upcoming commit we are going to need to call kasan_hw_tags_enabled() from arch/arm64/include/asm/mte.h. This would create a circular dependency between headers if KASAN_GENERIC or KASAN_SW_TAGS is enabled: linux/kasan.h -> linux/pgtable.h -> asm/pgtable.h -> asm/mte.h -> linux/kasan.h. Break the cycle by introducing a new header linux/kasan-enabled.h with the kasan_*enabled() functions that can be included from asm/mte.h. Link: https://linux-review.googlesource.com/id/I5b0d96c6ed0026fc790899e14d42b2fac6ab568e Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20220219012945.894950-1-pcc@google.com Signed-off-by: Will Deacon Change-Id: I984365534f5ed76ed0ff79fce62b09afbbc92982 Bug: 265364138 (cherry picked from commit f9b5e46f4097eb298f68e5b02f70697a90a44739) [Zhenhua: Resolved minor conflicts in include/linux/kasan.h] Change-Id: I631ae7d32ed560dd22917a4691c498e234b4f51d Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- include/linux/kasan-enabled.h | 33 +++++++++++++++++++++++++++++++++ include/linux/kasan.h | 24 ++---------------------- 2 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 include/linux/kasan-enabled.h diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h new file mode 100644 index 000000000000..4b6615375022 --- /dev/null +++ b/include/linux/kasan-enabled.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_ENABLED_H +#define _LINUX_KASAN_ENABLED_H + +#ifdef CONFIG_KASAN_HW_TAGS + +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool kasan_enabled(void) +{ + return IS_ENABLED(CONFIG_KASAN); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + +#endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b59675cc19b7..99dc14b5ad44 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,8 @@ #define _LINUX_KASAN_H #include +#include +#include #include #include @@ -82,33 +84,11 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); - -static __always_inline bool kasan_enabled(void) -{ - return static_branch_likely(&kasan_flag_enabled); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return kasan_enabled(); -} - void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ -static inline bool kasan_enabled(void) -{ - return IS_ENABLED(CONFIG_KASAN); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return false; -} - static __always_inline void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { From ce6cc743caf2cbfbc3f9747cfe2de84f0f527466 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 1 Mar 2022 15:45:18 +0000 Subject: [PATCH 183/186] UPSTREAM: kasan: fix a missing header include of static_keys.h The kasan-enabled.h header relies on static keys, so make sure to include the header to avoid compilation errors (with JUMP_LABEL=n). It fixes the following: ./include/linux/kasan-enabled.h:9:1: warning: data definition has no type or storage class 9 | DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); | ^~~~~~~~~~~~~~~~~~~~~~~~ error: type defaults to 'int' in declaration of 'DECLARE_STATIC_KEY_FALSE' [-Werror=implicit-int] Fixes: f9b5e46f4097eb29 ("kasan: split kasan_*enabled() functions into a separate header") Cc: Peter Collingbourne Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Acked-by: Andrey Konovalov Signed-off-by: Joey Gouly Link: https://lore.kernel.org/r/20220301154518.19456-1-joey.gouly@arm.com Signed-off-by: Will Deacon Bug: 265364138 (cherry picked from commit d8fd5a1e78db375f2246d43df7833fec07a221cd) Change-Id: Id33a67919113839503630b7364af1bdea3cfcedf Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- include/linux/kasan-enabled.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h index 4b6615375022..6f612d69ea0c 100644 --- a/include/linux/kasan-enabled.h +++ b/include/linux/kasan-enabled.h @@ -2,6 +2,8 @@ #ifndef _LINUX_KASAN_ENABLED_H #define _LINUX_KASAN_ENABLED_H +#include + #ifdef CONFIG_KASAN_HW_TAGS DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); From b10e8ec405d8fd60fd4cc80b9108a758b196a0c2 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 9 May 2022 18:20:53 -0700 Subject: [PATCH 184/186] BACKPORT: printk: stop including cache.h from printk.h An inclusion of cache.h in printk.h was added in 2014 in commit c28aa1f0a847 ("printk/cache: mark printk_once test variable __read_mostly") in order to bring in the definition of __read_mostly. The usage of __read_mostly was later removed in commit 3ec25826ae33 ("printk: Tie printk_once / printk_deferred_once into .data.once for reset") which made the inclusion of cache.h unnecessary, so remove it. We have a small amount of code that depended on the inclusion of cache.h from printk.h; fix that code to include the appropriate header. This fixes a circular inclusion on arm64 (linux/printk.h -> linux/cache.h -> asm/cache.h -> linux/kasan-enabled.h -> linux/static_key.h -> linux/jump_label.h -> linux/bug.h -> asm/bug.h -> linux/printk.h) that would otherwise be introduced by the next patch. Build tested using {allyesconfig,defconfig} x {arm64,x86_64}. Link: https://linux-review.googlesource.com/id/I8fd51f72c9ef1f2d6afd3b2cbc875aa4792c1fba Link: https://lkml.kernel.org/r/20220427195820.1716975-1-pcc@google.com Signed-off-by: Peter Collingbourne Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: David Rientjes Cc: Dmitry Vyukov Cc: Eric W. Biederman Cc: Herbert Xu Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Kees Cook Cc: Pekka Enberg Cc: Roman Gushchin Cc: Vlastimil Babka Signed-off-by: Andrew Morton Bug: 265364138 (cherry picked from commit 534aa1dc975ac883ad89110534585a96630802a0) [Zhenhua: Resolved minor compile issue in include/linux/debug_locks.h] Change-Id: I46182e781b64561a1ebd5405628a317d4f6cb789 Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- arch/arm64/include/asm/mte-kasan.h | 1 + arch/arm64/include/asm/percpu.h | 1 + arch/csky/include/asm/processor.h | 2 +- include/linux/debug_locks.h | 1 + include/linux/jump_label.h | 2 ++ include/linux/printk.h | 1 - kernel/bpf/bpf_lru_list.h | 1 + 7 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 30ef96cc168e..6537d2deb728 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -6,6 +6,7 @@ #define __ASM_MTE_KASAN_H #include +#include #include #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 8f1661603b78..b9ba19dbdb69 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -10,6 +10,7 @@ #include #include #include +#include static inline void set_my_cpu_offset(unsigned long off) { diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 4800f6563abb..baf9388d5952 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -6,9 +6,9 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index edb5c186b0b7..ad06852cbd5d 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -5,6 +5,7 @@ #include #include #include +#include struct task_struct; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index e203fdb020c4..a1b067f0c6ad 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -190,6 +190,8 @@ struct module; #ifdef BUILD_FIPS140_KO +#include + static inline int static_key_count(struct static_key *key) { return atomic_read(&key->enabled); diff --git a/include/linux/printk.h b/include/linux/printk.h index f589b8b60806..14d13ecaa8f8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include extern const char linux_banner[]; diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 6b12f06ee18c..4ea227c9c1ad 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -4,6 +4,7 @@ #ifndef __BPF_LRU_LIST_H_ #define __BPF_LRU_LIST_H_ +#include #include #include From ca53b8f1b44ac5a5a9eec34e4c88c8c2cba0d49d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 9 May 2022 18:20:53 -0700 Subject: [PATCH 185/186] BACKPORT: mm: make minimum slab alignment a runtime property When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum slab alignment to 16. This happens even if MTE is not supported in hardware or disabled via kasan=off, which creates an unnecessary memory overhead in those cases. Eliminate this overhead by making the minimum slab alignment a runtime property and only aligning to 16 if KASAN is enabled at runtime. On a DragonBoard 845c (non-MTE hardware) with a kernel built with CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android boot I see the following Slab measurements in /proc/meminfo (median of 3 reboots): Before: 169020 kB After: 167304 kB [akpm@linux-foundation.org: make slab alignment type `unsigned int' to avoid casting] Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead Link: https://lkml.kernel.org/r/20220427195820.1716975-2-pcc@google.com Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: David Rientjes Reviewed-by: Catalin Marinas Acked-by: Vlastimil Babka Cc: Pekka Enberg Cc: Roman Gushchin Cc: Joonsoo Kim Cc: Herbert Xu Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Eric W. Biederman Cc: Kees Cook Signed-off-by: Andrew Morton Bug: 265364138 (cherry picked from commit d949a8155d139aa890795b802004a196b7f00598) [Zhenhua: fold 587cfd8e66df3515 ("ANDROID: fix alignment of struct shash_desc member") into this change, to keep ABI compatibility] Change-Id: I3749f8de65ef3619724e68a9affb4eefd1ebe737 Signed-off-by: Jaewon Kim Signed-off-by: Zhenhua Huang --- arch/arm64/include/asm/cache.h | 19 +++++++++++++------ include/crypto/hash.h | 2 +- include/linux/slab.h | 12 ++++++++++++ mm/slab.c | 7 +++---- mm/slab_common.c | 3 +-- mm/slob.c | 16 +++++++++++----- 6 files changed, 41 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a074459f8f2f..7c2181c72116 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -6,6 +6,7 @@ #define __ASM_CACHE_H #include +#include #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 @@ -49,15 +50,21 @@ */ #define ARCH_DMA_MINALIGN (128) -#ifdef CONFIG_KASAN_SW_TAGS -#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) -#elif defined(CONFIG_KASAN_HW_TAGS) -#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE -#endif - #ifndef __ASSEMBLY__ #include +#include + +#ifdef CONFIG_KASAN_SW_TAGS +#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) +#elif defined(CONFIG_KASAN_HW_TAGS) +static inline unsigned int arch_slab_minalign(void) +{ + return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE : + __alignof__(unsigned long long); +} +#define arch_slab_minalign() arch_slab_minalign() +#endif #define ICACHEF_ALIASING 0 #define ICACHEF_VPIPT 1 diff --git a/include/crypto/hash.h b/include/crypto/hash.h index b2bc1e46e86a..14d75caa233d 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -149,7 +149,7 @@ struct ahash_alg { struct shash_desc { struct crypto_shash *tfm; - void *__ctx[] __aligned(ARCH_SLAB_MINALIGN); + void *__ctx[] __aligned(UL(16)); }; #define HASH_MAX_DIGESTSIZE 64 diff --git a/include/linux/slab.h b/include/linux/slab.h index dd6897f62010..36217130c3ae 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -217,6 +217,18 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif +/* + * Arches can define this function if they want to decide the minimum slab + * alignment at runtime. The value returned by the function must be a power + * of two and >= ARCH_SLAB_MINALIGN. + */ +#ifndef arch_slab_minalign +static inline unsigned int arch_slab_minalign(void) +{ + return ARCH_SLAB_MINALIGN; +} +#endif + /* * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN diff --git a/mm/slab.c b/mm/slab.c index 731b868a65c8..aa4ef18ddfb6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3016,10 +3016,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if (ARCH_SLAB_MINALIGN && - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, (int)ARCH_SLAB_MINALIGN); + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { + pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp, + arch_slab_minalign()); } return objp; } diff --git a/mm/slab_common.c b/mm/slab_common.c index c751b18f7e60..05135ebb6159 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -155,8 +155,7 @@ static unsigned int calculate_alignment(slab_flags_t flags, align = max(align, ralign); } - if (align < ARCH_SLAB_MINALIGN) - align = ARCH_SLAB_MINALIGN; + align = max(align, arch_slab_minalign()); return ALIGN(align, sizeof(void *)); } diff --git a/mm/slob.c b/mm/slob.c index 7cc9805c8091..37072a7d2f42 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -469,9 +469,11 @@ static __always_inline void * __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) { unsigned int *m; - int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int minalign; void *ret; + minalign = max_t(unsigned int, ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); gfp &= gfp_allowed_mask; fs_reclaim_acquire(gfp); @@ -485,7 +487,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) * kmalloc()'d objects. */ if (is_power_of_2(size)) - align = max(minalign, (int) size); + align = max_t(unsigned int, minalign, size); if (!size) return ZERO_SIZE_PTR; @@ -547,8 +549,11 @@ void kfree(const void *block) sp = virt_to_page(block); if (PageSlab(sp)) { - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int align = max_t(unsigned int, + ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); unsigned int *m = (unsigned int *)(block - align); + slob_free(m, *m + align); } else { unsigned int order = compound_order(sp); @@ -564,7 +569,7 @@ EXPORT_SYMBOL(kfree); size_t __ksize(const void *block) { struct page *sp; - int align; + unsigned int align; unsigned int *m; BUG_ON(!block); @@ -575,7 +580,8 @@ size_t __ksize(const void *block) if (unlikely(!PageSlab(sp))) return page_size(sp); - align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + align = max_t(unsigned int, ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); m = (unsigned int *)(block - align); return SLOB_UNITS(*m) * SLOB_UNIT; } From e12e360999d84e5e56952c6c027803f662567d76 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 18 Jan 2023 15:49:24 +0000 Subject: [PATCH 186/186] ANDROID: usb: f_accessory: Check buffer size when initialised via composite When communicating with accessory devices via USBFS, the initialisation call-stack looks like: ConfigFS > Gadget ConfigFS > UDC > Gadget ConfigFS > Composite Eventually ending up in composite_dev_prepare() where memory for the data buffer is allocated and initialised. The default size used for the allocation is USB_COMP_EP0_BUFSIZ (4k). When handling bulk transfers, acc_ctrlrequest() needs to be able to handle buffers up to BULK_BUFFER_SIZE (16k). Instead of adding new generic attributes to 'struct usb_request' to track the size of the allocated buffer, we can simply split off the affected thread of execution to travel via a knowledgeable abstracted function acc_ctrlrequest_composite() where we can complete the necessary specific checks. Bug: 264029575 Signed-off-by: Lee Jones Change-Id: Ia1280f85499621d3fa57f7262b4a2c80f4be7773 --- drivers/usb/gadget/configfs.c | 4 ++-- drivers/usb/gadget/function/f_accessory.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 21dd268175c6..b6f86463a26e 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -16,7 +16,7 @@ #include #ifdef CONFIG_USB_CONFIGFS_F_ACC -extern int acc_ctrlrequest(struct usb_composite_dev *cdev, +extern int acc_ctrlrequest_composite(struct usb_composite_dev *cdev, const struct usb_ctrlrequest *ctrl); void acc_disconnect(void); #endif @@ -1572,7 +1572,7 @@ static int android_setup(struct usb_gadget *gadget, #ifdef CONFIG_USB_CONFIGFS_F_ACC if (value < 0) - value = acc_ctrlrequest(cdev, c); + value = acc_ctrlrequest_composite(cdev, c); #endif if (value < 0) diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index 3510f6d39f0c..7d35d6c58e47 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -1085,6 +1085,26 @@ err: } EXPORT_SYMBOL_GPL(acc_ctrlrequest); +int acc_ctrlrequest_composite(struct usb_composite_dev *cdev, + const struct usb_ctrlrequest *ctrl) +{ + u16 w_length = le16_to_cpu(ctrl->wLength); + + if (w_length > USB_COMP_EP0_BUFSIZ) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); + w_length = USB_COMP_EP0_BUFSIZ; + } else { + return -EINVAL; + } + } + return acc_ctrlrequest(cdev, ctrl); +} +EXPORT_SYMBOL_GPL(acc_ctrlrequest_composite); + static int __acc_function_bind(struct usb_configuration *c, struct usb_function *f, bool configfs)