From 5b2ed6e5382a313b0856ba676595ba35067219bc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 23 Nov 2016 17:43:17 -0800 Subject: [PATCH 001/199] ARC: Don't use "+l" inline asm constraint commit 3c7c7a2fc8811bc7097479f69acf2527693d7562 upstream. Apparenty this is coming in the way of gcc fix which inhibits the usage of LP_COUNT as a gpr. Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/delay.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index 08e7e2a16ac1..a36e8601114d 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -22,10 +22,11 @@ static inline void __delay(unsigned long loops) { __asm__ __volatile__( - " lp 1f \n" - " nop \n" - "1: \n" - : "+l"(loops)); + " mov lp_count, %0 \n" + " lp 1f \n" + " nop \n" + "1: \n" + : : "r"(loops)); } extern void __bad_udelay(void); From 61ab6247366349aca80330db1c708c798b1c9774 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Nov 2016 15:54:08 -0800 Subject: [PATCH 002/199] zram: fix unbalanced idr management at hot removal commit 529e71e16403830ae0d737a66c55c5f360f3576b upstream. The zram hot removal code calls idr_remove() even when zram_remove() returns an error (typically -EBUSY). This results in a leftover at the device release, eventually leading to a crash when the module is reloaded. As described in the bug report below, the following procedure would cause an Oops with zram: - provision three zram devices via modprobe zram num_devices=3 - configure a size for each device + echo "1G" > /sys/block/$zram_name/disksize - mkfs and mount zram0 only - attempt to hot remove all three devices + echo 2 > /sys/class/zram-control/hot_remove + echo 1 > /sys/class/zram-control/hot_remove + echo 0 > /sys/class/zram-control/hot_remove - zram0 removal fails with EBUSY, as expected - unmount zram0 - try zram0 hot remove again + echo 0 > /sys/class/zram-control/hot_remove - fails with ENODEV (unexpected) - unload zram kernel module + completes successfully - zram0 device node still exists - attempt to mount /dev/zram0 + mount command is killed + following BUG is encountered BUG: unable to handle kernel paging request at ffffffffa0002ba0 IP: get_disk+0x16/0x50 Oops: 0000 [#1] SMP CPU: 0 PID: 252 Comm: mount Not tainted 4.9.0-rc6 #176 Call Trace: exact_lock+0xc/0x20 kobj_lookup+0xdc/0x160 get_gendisk+0x2f/0x110 __blkdev_get+0x10c/0x3c0 blkdev_get+0x19d/0x2e0 blkdev_open+0x56/0x70 do_dentry_open.isra.19+0x1ff/0x310 vfs_open+0x43/0x60 path_openat+0x2c9/0xf30 do_filp_open+0x79/0xd0 do_sys_open+0x114/0x1e0 SyS_open+0x19/0x20 entry_SYSCALL_64_fastpath+0x13/0x94 This patch adds the proper error check in hot_remove_store() not to call idr_remove() unconditionally. Fixes: 17ec4cd98578 ("zram: don't call idr_remove() from zram_remove()") Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1010970 Link: http://lkml.kernel.org/r/20161121132140.12683-1-tiwai@suse.de Signed-off-by: Takashi Iwai Reviewed-by: David Disseldorp Reported-by: David Disseldorp Tested-by: David Disseldorp Acked-by: Minchan Kim Acked-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 370c2f76016d..1770c455dfdd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1368,7 +1368,8 @@ static ssize_t hot_remove_store(struct class *class, zram = idr_find(&zram_index_idr, dev_id); if (zram) { ret = zram_remove(zram); - idr_remove(&zram_index_idr, dev_id); + if (!ret) + idr_remove(&zram_index_idr, dev_id); } else { ret = -ENODEV; } From 995761627d97cc374f14203589b3a6385019bf0b Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 30 Nov 2016 15:54:13 -0800 Subject: [PATCH 003/199] kasan: update kasan_global for gcc 7 commit 045d599a286bc01daa3510d59272440a17b23c2e upstream. kasan_global struct is part of compiler/runtime ABI. gcc revision 241983 has added a new field to kasan_global struct. Update kernel definition of kasan_global struct to include the new field. Without this patch KASAN is broken with gcc 7. Link: http://lkml.kernel.org/r/1479219743-28682-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/compiler-gcc.h | 4 +++- mm/kasan/kasan.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index eeae401a2412..287e698c28de 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -251,7 +251,9 @@ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ -#if GCC_VERSION >= 50000 +#if GCC_VERSION >= 70000 +#define KASAN_ABI_VERSION 5 +#elif GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 #elif GCC_VERSION >= 40902 #define KASAN_ABI_VERSION 3 diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 4f6c62e5c21e..37ff0ab6a8ff 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -52,6 +52,9 @@ struct kasan_global { #if KASAN_ABI_VERSION >= 4 struct kasan_source_location *location; #endif +#if KASAN_ABI_VERSION >= 5 + char *odr_indicator; +#endif }; static inline const void *kasan_shadow_to_mem(const void *shadow_addr) From 5a5f7030194da0c47201c04b4831f1ca623779a9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 19 Nov 2016 18:42:40 -0800 Subject: [PATCH 004/199] x86/traps: Ignore high word of regs->cs in early_fixup_exception() commit fc0e81b2bea0ebceb71889b61d2240856141c9ee upstream. On the 80486 DX, it seems that some exceptions may leave garbage in the high bits of CS. This causes sporadic failures in which early_fixup_exception() refuses to fix up an exception. As far as I can tell, this has been buggy for a long time, but the problem seems to have been exacerbated by commits: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4") e1bfc11c5a6f ("x86/init: Fix cr4_init_shadow() on CR4-less machines") This appears to have broken for as long as we've had early exception handling. [ This backport should apply to kernels from 3.4 - 4.5. ] Fixes: 4c5023a3fa2e ("x86-32: Handle exception table entries during early boot") Cc: H. Peter Anvin Reported-by: Matthew Whitehead Signed-off-by: Andy Lutomirski Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 6bc9ae24b6d2..8f1a3f443f7d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -571,7 +571,7 @@ early_idt_handler_common: movl %eax,%ds movl %eax,%es - cmpl $(__KERNEL_CS),32(%esp) + cmpw $(__KERNEL_CS),32(%esp) jne 10f leal 28(%esp),%eax # Pointer to %eip From 58cebd1a08ed114e05cc9d16dee9e5423f564c82 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Apr 2016 18:02:37 +0200 Subject: [PATCH 005/199] ALSA: pcm : Call kill_fasync() in stream lock commit 3aa02cb664c5fb1042958c8d1aa8c35055a2ebc4 upstream. Currently kill_fasync() is called outside the stream lock in snd_pcm_period_elapsed(). This is potentially racy, since the stream may get released even during the irq handler is running. Although snd_pcm_release_substream() calls snd_pcm_drop(), this doesn't guarantee that the irq handler finishes, thus the kill_fasync() call outside the stream spin lock may be invoked after the substream is detached, as recently reported by KASAN. As a quick workaround, move kill_fasync() call inside the stream lock. The fasync is rarely used interface, so this shouldn't have a big impact from the performance POV. Ideally, we should implement some sync mechanism for the proper finish of stream and irq handler. But this oneliner should suffice for most cases, so far. Reported-by: Baozeng Ding Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 3a9b66c6e09c..0aca39762ed0 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1886,8 +1886,8 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) snd_timer_interrupt(substream->timer, 1); #endif _end: - snd_pcm_stream_unlock_irqrestore(substream, flags); kill_fasync(&runtime->fasync, SIGIO, POLL_IN); + snd_pcm_stream_unlock_irqrestore(substream, flags); } EXPORT_SYMBOL(snd_pcm_period_elapsed); From dfb704f96cd190239e7b86b09810a136cf25506e Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Wed, 15 Jun 2016 15:27:36 +0800 Subject: [PATCH 006/199] rcu: Fix soft lockup for rcu_nocb_kthread commit bedc1969150d480c462cdac320fa944b694a7162 upstream. Carrying out the following steps results in a softlockup in the RCU callback-offload (rcuo) kthreads: 1. Connect to ixgbevf, and set the speed to 10Gb/s. 2. Use ifconfig to bring the nic up and down repeatedly. [ 317.005148] IPv6: ADDRCONF(NETDEV_CHANGE): eth2: link becomes ready [ 368.106005] BUG: soft lockup - CPU#1 stuck for 22s! [rcuos/1:15] [ 368.106005] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 368.106005] task: ffff88057dd8a220 ti: ffff88057dd9c000 task.ti: ffff88057dd9c000 [ 368.106005] RIP: 0010:[] [] fib_table_lookup+0x14/0x390 [ 368.106005] RSP: 0018:ffff88061fc83ce8 EFLAGS: 00000286 [ 368.106005] RAX: 0000000000000001 RBX: 00000000020155c0 RCX: 0000000000000001 [ 368.106005] RDX: ffff88061fc83d50 RSI: ffff88061fc83d70 RDI: ffff880036d11a00 [ 368.106005] RBP: ffff88061fc83d08 R08: 0000000000000001 R09: 0000000000000000 [ 368.106005] R10: ffff880036d11a00 R11: ffffffff819e0900 R12: ffff88061fc83c58 [ 368.106005] R13: ffffffff816154dd R14: ffff88061fc83d08 R15: 00000000020155c0 [ 368.106005] FS: 0000000000000000(0000) GS:ffff88061fc80000(0000) knlGS:0000000000000000 [ 368.106005] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 368.106005] CR2: 00007f8c2aee9c40 CR3: 000000057b222000 CR4: 00000000000407e0 [ 368.106005] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 368.106005] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 368.106005] Stack: [ 368.106005] 00000000010000c0 ffff88057b766000 ffff8802e380b000 ffff88057af03e00 [ 368.106005] ffff88061fc83dc0 ffffffff815349a6 ffff88061fc83d40 ffffffff814ee146 [ 368.106005] ffff8802e380af00 00000000e380af00 ffffffff819e0900 020155c0010000c0 [ 368.106005] Call Trace: [ 368.106005] [ 368.106005] [ 368.106005] [] ip_route_input_noref+0x516/0xbd0 [ 368.106005] [] ? skb_release_data+0xd6/0x110 [ 368.106005] [] ? kfree_skb+0x3a/0xa0 [ 368.106005] [] ip_rcv_finish+0x29f/0x350 [ 368.106005] [] ip_rcv+0x234/0x380 [ 368.106005] [] __netif_receive_skb_core+0x676/0x870 [ 368.106005] [] __netif_receive_skb+0x18/0x60 [ 368.106005] [] process_backlog+0xae/0x180 [ 368.106005] [] net_rx_action+0x152/0x240 [ 368.106005] [] __do_softirq+0xef/0x280 [ 368.106005] [] call_softirq+0x1c/0x30 [ 368.106005] [ 368.106005] [ 368.106005] [] do_softirq+0x65/0xa0 [ 368.106005] [] local_bh_enable+0x94/0xa0 [ 368.106005] [] rcu_nocb_kthread+0x232/0x370 [ 368.106005] [] ? wake_up_bit+0x30/0x30 [ 368.106005] [] ? rcu_start_gp+0x40/0x40 [ 368.106005] [] kthread+0xcf/0xe0 [ 368.106005] [] ? kthread_create_on_node+0x140/0x140 [ 368.106005] [] ret_from_fork+0x58/0x90 [ 368.106005] [] ? kthread_create_on_node+0x140/0x140 ==================================cut here============================== It turns out that the rcuos callback-offload kthread is busy processing a very large quantity of RCU callbacks, and it is not reliquishing the CPU while doing so. This commit therefore adds an cond_resched_rcu_qs() within the loop to allow other tasks to run. Signed-off-by: Ding Tianhong [ paulmck: Substituted cond_resched_rcu_qs for cond_resched. ] Signed-off-by: Paul E. McKenney Cc: Dhaval Giani Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tree_plugin.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 630c19772630..32cbe72bf545 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2275,6 +2275,7 @@ static int rcu_nocb_kthread(void *arg) cl++; c++; local_bh_enable(); + cond_resched_rcu_qs(); list = next; } trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); From 140ff0a348970430e4eb06a63afbbe5510f9c8d1 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 2 Nov 2016 16:35:51 -0600 Subject: [PATCH 007/199] PCI: Export pcie_find_root_port commit e784930bd645e7df78c66e7872fec282b0620075 upstream. Export pcie_find_root_port() so we can use it outside of PCIe-AER error injection. Signed-off-by: Johannes Thumshirn Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aer/aer_inject.c | 14 -------------- include/linux/pci.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c index 182224acedbe..58f1419a68ae 100644 --- a/drivers/pci/pcie/aer/aer_inject.c +++ b/drivers/pci/pcie/aer/aer_inject.c @@ -283,20 +283,6 @@ out: return 0; } -static struct pci_dev *pcie_find_root_port(struct pci_dev *dev) -{ - while (1) { - if (!pci_is_pcie(dev)) - break; - if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) - return dev; - if (!dev->bus->self) - break; - dev = dev->bus->self; - } - return NULL; -} - static int find_aer_device_iter(struct device *device, void *data) { struct pcie_device **result = data; diff --git a/include/linux/pci.h b/include/linux/pci.h index e89c7ee7e803..5f37614f2451 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1802,6 +1802,20 @@ static inline int pci_pcie_type(const struct pci_dev *dev) return (pcie_caps_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; } +static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) +{ + while (1) { + if (!pci_is_pcie(dev)) + break; + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + if (!dev->bus->self) + break; + dev = dev->bus->self; + } + return NULL; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, From ac6e42d7a7201245e9a8db922ad795ef2a257568 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 23 Nov 2016 10:56:28 -0600 Subject: [PATCH 008/199] PCI: Set Read Completion Boundary to 128 iff Root Port supports it (_HPX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e42010d8207f9d15a605ceb8e321bcd9648071b0 upstream. Per PCIe spec r3.0, sec 2.3.1.1, the Read Completion Boundary (RCB) determines the naturally aligned address boundaries on which a Read Request may be serviced with multiple Completions: - For a Root Complex, RCB is 64 bytes or 128 bytes This value is reported in the Link Control Register Note: Bridges and Endpoints may implement a corresponding command bit which may be set by system software to indicate the RCB value for the Root Complex, allowing the Bridge/Endpoint to optimize its behavior when the Root Complex’s RCB is 128 bytes. - For all other system elements, RCB is 128 bytes Per sec 7.8.7, if a Root Port only supports a 64-byte RCB, the RCB of all downstream devices must be clear, indicating an RCB of 64 bytes. If the Root Port supports a 128-byte RCB, we may optionally set the RCB of downstream devices so they know they can generate larger Completions. Some BIOSes supply an _HPX that tells us to set RCB, even though the Root Port doesn't have RCB set, which may lead to Malformed TLP errors if the Endpoint generates completions larger than the Root Port can handle. The IBM x3850 X6 with BIOS version -[A8E120CUS-1.30]- 08/22/2016 supplies such an _HPX and a Mellanox MT27500 ConnectX-3 device fails to initialize: mlx4_core 0000:41:00.0: command 0xfff timed out (go bit not cleared) mlx4_core 0000:41:00.0: device is going to be reset mlx4_core 0000:41:00.0: Failed to obtain HW semaphore, aborting mlx4_core 0000:41:00.0: Fail to reset HCA ------------[ cut here ]------------ kernel BUG at drivers/net/ethernet/mellanox/mlx4/catas.c:193! After 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") and 7a1562d4f2d0 ("PCI: Apply _HPX Link Control settings to all devices with a link"), we apply _HPX settings to *all* devices, not just those hot-added after boot. Before 7a1562d4f2d0, we didn't touch the Mellanox RCB, and the device worked. After 7a1562d4f2d0, we set its RCB to 128, and it failed. Set the RCB to 128 iff the Root Port supports a 128-byte RCB. Otherwise, set RCB to 64 bytes. This effectively ignores what _HPX tells us about RCB. Note that this change only affects _HPX handling. If we have no _HPX, this does nothing with RCB. [bhelgaas: changelog, clear RCB if not set for Root Port] Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") Fixes: 7a1562d4f2d0 ("PCI: Apply _HPX Link Control settings to all devices with a link") Link: https://bugzilla.kernel.org/show_bug.cgi?id=187781 Tested-by: Frank Danapfel Signed-off-by: Johannes Thumshirn Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 9757cf9037a2..b5843c255263 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1415,6 +1415,21 @@ static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp) dev_warn(&dev->dev, "PCI-X settings not supported\n"); } +static bool pcie_root_rcb_set(struct pci_dev *dev) +{ + struct pci_dev *rp = pcie_find_root_port(dev); + u16 lnkctl; + + if (!rp) + return false; + + pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl); + if (lnkctl & PCI_EXP_LNKCTL_RCB) + return true; + + return false; +} + static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) { int pos; @@ -1444,9 +1459,20 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) ~hpp->pci_exp_devctl_and, hpp->pci_exp_devctl_or); /* Initialize Link Control Register */ - if (pcie_cap_has_lnkctl(dev)) + if (pcie_cap_has_lnkctl(dev)) { + + /* + * If the Root Port supports Read Completion Boundary of + * 128, set RCB to 128. Otherwise, clear it. + */ + hpp->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB; + hpp->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB; + if (pcie_root_rcb_set(dev)) + hpp->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB; + pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL, ~hpp->pci_exp_lnkctl_and, hpp->pci_exp_lnkctl_or); + } /* Find Advanced Error Reporting Enhanced Capability */ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); From d7a2c259fc1f41c3c030868b0344112c92f06c34 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 8 Nov 2016 18:28:24 -0800 Subject: [PATCH 009/199] mwifiex: printk() overflow with 32-byte SSIDs commit fcd2042e8d36cf644bd2d69c26378d17158b17df upstream. SSIDs aren't guaranteed to be 0-terminated. Let's cap the max length when we print them out. This can be easily noticed by connecting to a network with a 32-octet SSID: [ 3903.502925] mwifiex_pcie 0000:01:00.0: info: trying to associate to '0123456789abcdef0123456789abcdef ' bssid xx:xx:xx:xx:xx:xx Fixes: 5e6e3a92b9a4 ("wireless: mwifiex: initial commit for Marvell mwifiex driver") Signed-off-by: Brian Norris Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/cfg80211.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 4073116e6e9f..c3331d6201c3 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -2144,8 +2144,9 @@ done: is_scanning_required = 1; } else { mwifiex_dbg(priv->adapter, MSG, - "info: trying to associate to '%s' bssid %pM\n", - (char *)req_ssid.ssid, bss->bssid); + "info: trying to associate to '%.*s' bssid %pM\n", + req_ssid.ssid_len, (char *)req_ssid.ssid, + bss->bssid); memcpy(&priv->cfg_bssid, bss->bssid, ETH_ALEN); break; } @@ -2202,8 +2203,8 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, } mwifiex_dbg(adapter, INFO, - "info: Trying to associate to %s and bssid %pM\n", - (char *)sme->ssid, sme->bssid); + "info: Trying to associate to %.*s and bssid %pM\n", + (int)sme->ssid_len, (char *)sme->ssid, sme->bssid); ret = mwifiex_cfg80211_assoc(priv, sme->ssid_len, sme->ssid, sme->bssid, priv->bss_mode, sme->channel, sme, 0); @@ -2333,8 +2334,8 @@ mwifiex_cfg80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, } mwifiex_dbg(priv->adapter, MSG, - "info: trying to join to %s and bssid %pM\n", - (char *)params->ssid, params->bssid); + "info: trying to join to %.*s and bssid %pM\n", + params->ssid_len, (char *)params->ssid, params->bssid); mwifiex_set_ibss_params(priv, params); From 4fd108fa1a7438566e2b16bee74228c3227c9597 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:46:39 +0100 Subject: [PATCH 010/199] pwm: Fix device reference leak commit 0e1614ac84f1719d87bed577963bb8140d0c9ce8 upstream. Make sure to drop the reference to the parent device taken by class_find_device() after "unexporting" any children when deregistering a PWM chip. Fixes: 0733424c9ba9 ("pwm: Unexport children before chip removal") Signed-off-by: Johan Hovold Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index c20163b92991..375008e2be20 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -366,6 +366,8 @@ void pwmchip_sysfs_unexport_children(struct pwm_chip *chip) if (test_bit(PWMF_EXPORTED, &pwm->flags)) pwm_unexport_child(parent, pwm); } + + put_device(parent); } static int __init pwm_sysfs_init(void) From da643dc17f20e04c089c93a3fe7d89e5be80d1af Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 18 Oct 2016 11:27:46 +0100 Subject: [PATCH 011/199] arm64: cpufeature: Schedule enable() calls instead of calling them via IPI commit 2a6dcb2b5f3e21592ca8dfa198dcce7bec09b020 upstream. The enable() call for a cpufeature/errata is called using on_each_cpu(). This issues a cross-call IPI to get the work done. Implicitly, this stashes the running PSTATE in SPSR when the CPU receives the IPI, and restores it when we return. This means an enable() call can never modify PSTATE. To allow PAN to do this, change the on_each_cpu() call to use stop_machine(). This schedules the work on each CPU which allows us to modify PSTATE. This involves changing the protype of all the enable() functions. enable_cpu_capabilities() is called during boot and enables the feature on all online CPUs. This path now uses stop_machine(). CPU features for hotplug'd CPUs are enabled by verify_local_cpu_features() which only acts on the local CPU, and can already modify the running PSTATE as it is called from secondary_start_kernel(). Reported-by: Tony Thompson Reported-by: Vladimir Murzin Signed-off-by: James Morse Cc: Suzuki K Poulose Signed-off-by: Will Deacon [Removed enable() hunks for features/errata v4.4. doesn't have. Changed caps->enable arg in enable_cpu_capabilities()] Signed-off-by: James Morse Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/kernel/cpufeature.c | 10 +++++++++- arch/arm64/mm/fault.c | 3 ++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8136afc9df0d..8884b5d5f48c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -77,7 +77,7 @@ struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); - void (*enable)(void *); /* Called on all active CPUs */ + int (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 4acb7ca94fcd..d08559528927 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x) #endif -void cpu_enable_pan(void *__unused); +int cpu_enable_pan(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0669c63281ea..2735bf814592 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -19,7 +19,9 @@ #define pr_fmt(fmt) "CPU features: " fmt #include +#include #include +#include #include #include #include @@ -764,7 +766,13 @@ static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) for (i = 0; caps[i].desc; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) - on_each_cpu(caps[i].enable, NULL, true); + /* + * Use stop_machine() as it schedules the work allowing + * us to modify PSTATE, instead of on_each_cpu() which + * uses an IPI, giving us a PSTATE that disappears when + * we return. + */ + stop_machine(caps[i].enable, NULL, cpu_online_mask); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4c1a118c1d09..d4634e6942ca 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -606,8 +606,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, } #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void *__unused) +int cpu_enable_pan(void *__unused) { config_sctlr_el1(SCTLR_EL1_SPAN, 0); + return 0; } #endif /* CONFIG_ARM64_PAN */ From d24207a0d81cee81d0f06397074db99e12cc87c1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 18 Oct 2016 11:27:47 +0100 Subject: [PATCH 012/199] arm64: mm: Set PSTATE.PAN from the cpu_enable_pan() call commit 7209c868600bd8926e37c10b9aae83124ccc1dd8 upstream. Commit 338d4f49d6f7 ("arm64: kernel: Add support for Privileged Access Never") enabled PAN by enabling the 'SPAN' feature-bit in SCTLR_EL1. This means the PSTATE.PAN bit won't be set until the next return to the kernel from userspace. On a preemptible kernel we may schedule work that accesses userspace on a CPU before it has done this. Now that cpufeature enable() calls are scheduled via stop_machine(), we can set PSTATE.PAN from the cpu_enable_pan() call. Add WARN_ON_ONCE(in_interrupt()) to check the PSTATE value we updated is not immediately discarded. Reported-by: Tony Thompson Reported-by: Vladimir Murzin Signed-off-by: James Morse [will: fixed typo in comment] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/fault.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d4634e6942ca..247bae758e1e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -29,7 +29,9 @@ #include #include #include +#include +#include #include #include #include @@ -608,7 +610,14 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, #ifdef CONFIG_ARM64_PAN int cpu_enable_pan(void *__unused) { + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + config_sctlr_el1(SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); return 0; } #endif /* CONFIG_ARM64_PAN */ From 71710cd35a554b1f0a60f035642455beeaec0f06 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 18 Oct 2016 11:27:48 +0100 Subject: [PATCH 013/199] arm64: suspend: Reconfigure PSTATE after resume from idle commit d08544127d9fb4505635e3cb6871fd50a42947bd upstream. The suspend/resume path in kernel/sleep.S, as used by cpu-idle, does not save/restore PSTATE. As a result of this cpufeatures that were detected and have bits in PSTATE get lost when we resume from idle. UAO gets set appropriately on the next context switch. PAN will be re-enabled next time we return from user-space, but on a preemptible kernel we may run work accessing user space before this point. Add code to re-enable theses two features in __cpu_suspend_exit(). We re-use uao_thread_switch() passing current. Signed-off-by: James Morse Cc: Lorenzo Pieralisi Signed-off-by: Will Deacon [Removed UAO hooks and commit-message references: this feature is not present in v4.4] Signed-off-by: James Morse Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/suspend.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1095aa483a1c..00c1372bf57b 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,7 +1,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -110,6 +112,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + /* + * PSTATE was not saved over suspend/resume, re-enable any + * detected features that might not have been set correctly. + */ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, + CONFIG_ARM64_PAN)); + /* * Restore HW breakpoint registers to sane values * before debug exceptions are possibly reenabled From e5f84c1444ae59c85bd25ba05393c6bc87067ddb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Dec 2016 07:15:46 +0100 Subject: [PATCH 014/199] Linux 4.4.37 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 705eb9e38fce..b57ec79b4941 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 36 +SUBLEVEL = 37 EXTRAVERSION = NAME = Blurry Fish Butt From 790fd11f9ed6d2558a516280c3bd1e25b0c64c90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2016 22:24:12 -0800 Subject: [PATCH 015/199] virtio-net: add a missing synchronize_net() [ Upstream commit 963abe5c8a0273a1cf5913556da1b1189de0e57a ] It seems many drivers do not respect napi_hash_del() contract. When napi_hash_del() is used before netif_napi_del(), an RCU grace period is needed before freeing NAPI object. Fixes: 91815639d880 ("virtio-net: rx busy polling support") Signed-off-by: Eric Dumazet Cc: Jason Wang Cc: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f94ab786088f..0e2a19e58923 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1465,6 +1465,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) netif_napi_del(&vi->rq[i].napi); } + /* We called napi_hash_del() before netif_napi_del(), + * we need to respect an RCU grace period before freeing vi->rq + */ + synchronize_net(); + kfree(vi->rq); kfree(vi->sq); } From 2b54505c877f71155c7aba26550015bd1e30d3dc Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 16 Nov 2016 10:27:02 -0800 Subject: [PATCH 016/199] net: check dead netns for peernet2id_alloc() [ Upstream commit cfc44a4d147ea605d66ccb917cc24467d15ff867 ] Andrei reports we still allocate netns ID from idr after we destroy it in cleanup_net(). cleanup_net(): ... idr_destroy(&net->netns_ids); ... list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); -> rollback_registered_many() -> rtmsg_ifinfo_build_skb() -> rtnl_fill_ifinfo() -> peernet2id_alloc() After that point we should not even access net->netns_ids, we should check the death of the current netns as early as we can in peernet2id_alloc(). For net-next we can consider to avoid sending rtmsg totally, it is a good optimization for netns teardown path. Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids") Reported-by: Andrei Vagin Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Andrei Vagin Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net_namespace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b629b1..2e9a1c2818c7 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -217,6 +217,8 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; + if (atomic_read(&net->count) == 0) + return NETNSA_NSID_NOT_ASSIGNED; spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); From 49695d1e3b817743a155e63d923e1e14b97e14eb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Nov 2016 16:26:46 +0100 Subject: [PATCH 017/199] ip6_tunnel: disable caching when the traffic class is inherited [ Upstream commit b5c2d49544e5930c96e2632a7eece3f4325a1888 ] If an ip6 tunnel is configured to inherit the traffic class from the inner header, the dst_cache must be disabled or it will foul the policy routing. The issue is apprently there since at leat Linux-2.6.12-rc2. Reported-by: Liam McBirnie Cc: Liam McBirnie Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e8878886eba4..2994d1f1a661 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1043,6 +1043,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, struct ipv6_tel_txoption opt; struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; + bool use_cache = false; int mtu; unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; @@ -1070,7 +1071,15 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); - } else if (!fl6->flowi6_mark) + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does + * not depend on the current inner header value + */ + use_cache = true; + } + + if (use_cache) dst = ip6_tnl_dst_get(t); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) @@ -1134,7 +1143,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) + if (use_cache && ndst) ip6_tnl_dst_set(t, ndst); skb_dst_set(skb, dst); From acf9504ae220acbd1caa8ee4668c8a3a3b51d73a Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 17 Nov 2016 09:14:25 -0600 Subject: [PATCH 018/199] net: sky2: Fix shutdown crash [ Upstream commit 06ba3b2133dc203e1e9bc36cee7f0839b79a9e8b ] The sky2 frequently crashes during machine shutdown with: sky2_get_stats+0x60/0x3d8 [sky2] dev_get_stats+0x68/0xd8 rtnl_fill_stats+0x54/0x140 rtnl_fill_ifinfo+0x46c/0xc68 rtmsg_ifinfo_build_skb+0x7c/0xf0 rtmsg_ifinfo.part.22+0x3c/0x70 rtmsg_ifinfo+0x50/0x5c netdev_state_change+0x4c/0x58 linkwatch_do_dev+0x50/0x88 __linkwatch_run_queue+0x104/0x1a4 linkwatch_event+0x30/0x3c process_one_work+0x140/0x3e0 worker_thread+0x60/0x44c kthread+0xdc/0xf0 ret_from_fork+0x10/0x50 This is caused by the sky2 being called after it has been shutdown. A previous thread about this can be found here: https://lkml.org/lkml/2016/4/12/410 An alternative fix is to assure that IFF_UP gets cleared by calling dev_close() during shutdown. This is similar to what the bnx2/tg3/xgene and maybe others are doing to assure that the driver isn't being called following _shutdown(). Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/sky2.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5606a043063e..4b62aa1f9ff8 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume); static void sky2_shutdown(struct pci_dev *pdev) { + struct sky2_hw *hw = pci_get_drvdata(pdev); + int port; + + for (port = 0; port < hw->ports; port++) { + struct net_device *ndev = hw->dev[port]; + + rtnl_lock(); + if (netif_running(ndev)) { + dev_close(ndev); + netif_device_detach(ndev); + } + rtnl_unlock(); + } sky2_suspend(&pdev->dev); pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev)); pci_set_power_state(pdev, PCI_D3hot); From 6ef59b986190f07386b062884ab48006c16cf152 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 17 Nov 2016 15:55:26 -0800 Subject: [PATCH 019/199] af_unix: conditionally use freezable blocking calls in read [ Upstream commit 06a77b07e3b44aea2b3c0e64de420ea2cfdcbaa9 ] Commit 2b15af6f95 ("af_unix: use freezable blocking calls in read") converts schedule_timeout() to its freezable version, it was probably correct at that time, but later, commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") breaks the strong requirement for a freezable sleep, according to commit 0f9548ca1091: We shouldn't try_to_freeze if locks are held. Holding a lock can cause a deadlock if the lock is later acquired in the suspend or hibernate path (e.g. by dpm). Holding a lock can also cause a deadlock in the case of cgroup_freezer if a lock is held inside a frozen cgroup that is later acquired by a process outside that group. The pipe_lock is still held at that point. So use freezable version only for the recvmsg call path, avoid impact for Android. Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") Reported-by: Dmitry Vyukov Cc: Tejun Heo Cc: Colin Cross Cc: Rafael J. Wysocki Cc: Hannes Frederic Sowa Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 824cc1e160bc..73f75258ce46 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2194,7 +2194,8 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len) + struct sk_buff *last, unsigned int last_len, + bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); @@ -2215,7 +2216,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - timeo = freezable_schedule_timeout(timeo); + if (freezable) + timeo = freezable_schedule_timeout(timeo); + else + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) @@ -2245,7 +2249,8 @@ struct unix_stream_read_state { unsigned int splice_flags; }; -static int unix_stream_read_generic(struct unix_stream_read_state *state) +static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; @@ -2324,7 +2329,7 @@ again: mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, - last_len); + last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -2466,7 +2471,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, true); } static ssize_t skb_unix_socket_splice(struct sock *sk, @@ -2512,7 +2517,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) From aece024e38cb38eb5307e35862c5876e817efd33 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 18 Nov 2016 15:50:39 +0100 Subject: [PATCH 020/199] rtnetlink: fix FDB size computation [ Upstream commit f82ef3e10a870acc19fa04f80ef5877eaa26f41e ] Add missing NDA_VLAN attribute's size. Fixes: 1e53d5bb8878 ("net: Pass VLAN ID to rtnl_fdb_notify.") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 87b91ffbdec3..b94e165a4f79 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2600,7 +2600,10 @@ nla_put_failure: static inline size_t rtnl_fdb_nlmsg_size(void) { - return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(sizeof(u16)) + /* NDA_VLAN */ + 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) From 56366fa0ad46a59abe2460b8acb775f7f84fbf16 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 18 Nov 2016 22:13:00 +0100 Subject: [PATCH 021/199] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind() [ Upstream commit 32c231164b762dddefa13af5a0101032c70b50ef ] Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind(). Without lock, a concurrent call could modify the socket flags between the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way, a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it would then leave a stale pointer there, generating use-after-free errors when walking through the list or modifying adjacent entries. BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8 Write of size 8 by task syz-executor/10987 CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0 ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0 Call Trace: [] dump_stack+0xb3/0x118 lib/dump_stack.c:15 [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 [< inline >] print_address_description mm/kasan/report.c:194 [] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283 [< inline >] kasan_report mm/kasan/report.c:303 [] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329 [< inline >] __write_once_size ./include/linux/compiler.h:249 [< inline >] __hlist_del ./include/linux/list.h:622 [< inline >] hlist_del_init ./include/linux/list.h:637 [] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239 [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [] sock_release+0x8d/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x28c/0x780 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf9/0x170 [] do_exit+0x85e/0x2a00 [] do_group_exit+0x108/0x330 [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [] do_signal+0x7f/0x18f0 [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448 Allocated: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_kmalloc+0xad/0xe0 [ 1116.897025] [] kasan_slab_alloc+0x12/0x20 [ 1116.897025] [< inline >] slab_post_alloc_hook mm/slab.h:417 [ 1116.897025] [< inline >] slab_alloc_node mm/slub.c:2708 [ 1116.897025] [< inline >] slab_alloc mm/slub.c:2716 [ 1116.897025] [] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721 [ 1116.897025] [] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326 [ 1116.897025] [] sk_alloc+0x38/0xae0 net/core/sock.c:1388 [ 1116.897025] [] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182 [ 1116.897025] [] __sock_create+0x37b/0x640 net/socket.c:1153 [ 1116.897025] [< inline >] sock_create net/socket.c:1193 [ 1116.897025] [< inline >] SYSC_socket net/socket.c:1223 [ 1116.897025] [] SyS_socket+0xef/0x1b0 net/socket.c:1203 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0x23/0xc6 Freed: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_slab_free+0x71/0xb0 [ 1116.897025] [< inline >] slab_free_hook mm/slub.c:1352 [ 1116.897025] [< inline >] slab_free_freelist_hook mm/slub.c:1374 [ 1116.897025] [< inline >] slab_free mm/slub.c:2951 [ 1116.897025] [] kmem_cache_free+0xc8/0x330 mm/slub.c:2973 [ 1116.897025] [< inline >] sk_prot_free net/core/sock.c:1369 [ 1116.897025] [] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444 [ 1116.897025] [] sk_destruct+0x44/0x80 net/core/sock.c:1452 [ 1116.897025] [] __sk_free+0x53/0x220 net/core/sock.c:1460 [ 1116.897025] [] sk_free+0x23/0x30 net/core/sock.c:1471 [ 1116.897025] [] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589 [ 1116.897025] [] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243 [ 1116.897025] [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [ 1116.897025] [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [ 1116.897025] [] sock_release+0x8d/0x1d0 net/socket.c:570 [ 1116.897025] [] sock_close+0x16/0x20 net/socket.c:1017 [ 1116.897025] [] __fput+0x28c/0x780 fs/file_table.c:208 [ 1116.897025] [] ____fput+0x15/0x20 fs/file_table.c:244 [ 1116.897025] [] task_work_run+0xf9/0x170 [ 1116.897025] [] do_exit+0x85e/0x2a00 [ 1116.897025] [] do_group_exit+0x108/0x330 [ 1116.897025] [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [ 1116.897025] [] do_signal+0x7f/0x18f0 [ 1116.897025] [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [ 1116.897025] [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [ 1116.897025] [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Memory state around the buggy address: ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table. Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case") Reported-by: Baozeng Ding Reported-by: Andrey Konovalov Tested-by: Baozeng Ding Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 42de4ccd159f..d0e906d39642 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) @@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) read_unlock_bh(&l2tp_ip_lock); lock_sock(sk); + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9ee4ddb6b397..3c4f867d3633 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -266,8 +266,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -293,6 +291,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; From 94de6f2ffb3de28f4a6c92a52e8f8cdcb7046847 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Nov 2016 11:40:58 -0800 Subject: [PATCH 022/199] net: dsa: bcm_sf2: Ensure we re-negotiate EEE during after link change [ Upstream commit 76da8706d90d8641eeb9b8e579942ed80b6c0880 ] In case the link change and EEE is enabled or disabled, always try to re-negotiate this with the link partner. Fixes: 450b05c15f9c ("net: dsa: bcm_sf2: add support for controlling EEE") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 6f946fedbb77..0864f05633a2 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1137,6 +1137,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct ethtool_eee *p = &priv->port_sts[port].eee; u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg; @@ -1211,6 +1212,9 @@ force_link: reg |= DUPLX_MODE; core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port)); + + if (!phydev->is_pseudo_fixed_link) + p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev); } static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, From cfa7c16d4577a6e738bbd2bc83c6c8d7344e2d1a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 27 Nov 2016 01:18:01 +0100 Subject: [PATCH 023/199] net, sched: respect rcu grace period on cls destruction [ Upstream commit d936377414fadbafb4d17148d222fe45ca5442d4 ] Roi reported a crash in flower where tp->root was NULL in ->classify() callbacks. Reason is that in ->destroy() tp->root is set to NULL via RCU_INIT_POINTER(). It's problematic for some of the classifiers, because this doesn't respect RCU grace period for them, and as a result, still outstanding readers from tc_classify() will try to blindly dereference a NULL tp->root. The tp->root object is strictly private to the classifier implementation and holds internal data the core such as tc_ctl_tfilter() doesn't know about. Within some classifiers, such as cls_bpf, cls_basic, etc, tp->root is only checked for NULL in ->get() callback, but nowhere else. This is misleading and seemed to be copied from old classifier code that was not cleaned up properly. For example, d3fa76ee6b4a ("[NET_SCHED]: cls_basic: fix NULL pointer dereference") moved tp->root initialization into ->init() routine, where before it was part of ->change(), so ->get() had to deal with tp->root being NULL back then, so that was indeed a valid case, after d3fa76ee6b4a, not really anymore. We used to set tp->root to NULL long ago in ->destroy(), see 47a1a1d4be29 ("pkt_sched: remove unnecessary xchg() in packet classifiers"); but the NULLifying was reintroduced with the RCUification, but it's not correct for every classifier implementation. In the cases that are fixed here with one exception of cls_cgroup, tp->root object is allocated and initialized inside ->init() callback, which is always performed at a point in time after we allocate a new tp, which means tp and thus tp->root was not globally visible in the tp chain yet (see tc_ctl_tfilter()). Also, on destruction tp->root is strictly kfree_rcu()'ed in ->destroy() handler, same for the tp which is kfree_rcu()'ed right when we return from ->destroy() in tcf_destroy(). This means, the head object's lifetime for such classifiers is always tied to the tp lifetime. The RCU callback invocation for the two kfree_rcu() could be out of order, but that's fine since both are independent. Dropping the RCU_INIT_POINTER(tp->root, NULL) for these classifiers here means that 1) we don't need a useless NULL check in fast-path and, 2) that outstanding readers of that tp in tc_classify() can still execute under respect with RCU grace period as it is actually expected. Things that haven't been touched here: cls_fw and cls_route. They each handle tp->root being NULL in ->classify() path for historic reasons, so their ->destroy() implementation can stay as is. If someone actually cares, they could get cleaned up at some point to avoid the test in fast path. cls_u32 doesn't set tp->root to NULL. For cls_rsvp, I just added a !head should anyone actually be using/testing it, so it at least aligns with cls_fw and cls_route. For cls_flower we additionally need to defer rhashtable destruction (to a sleepable context) after RCU grace period as concurrent readers might still access it. (Note that in this case we need to hold module reference to keep work callback address intact, since we only wait on module unload for all call_rcu()s to finish.) This fixes one race to bring RCU grace period guarantees back. Next step as worked on by Cong however is to fix 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone") to get the order of unlinking the tp in tc_ctl_tfilter() for the RTM_DELTFILTER case right by moving RCU_INIT_POINTER() before tcf_destroy() and let the notification for removal be done through the prior ->delete() callback. Both are independant issues. Once we have that right, we can then clean tp->root up for a number of classifiers by not making them RCU pointers, which requires a new callback (->uninit) that is triggered from tp's RCU callback, where we just kfree() tp->root from there. Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf") Fixes: 9888faefe132 ("net: sched: cls_basic use RCU") Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU") Fixes: 77b9900ef53a ("tc: introduce Flower classifier") Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier") Fixes: 952313bd6258 ("net: sched: cls_cgroup use RCU") Reported-by: Roi Dayan Signed-off-by: Daniel Borkmann Cc: Cong Wang Cc: John Fastabend Cc: Roi Dayan Cc: Jiri Pirko Acked-by: John Fastabend Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_basic.c | 4 ---- net/sched/cls_bpf.c | 4 ---- net/sched/cls_cgroup.c | 7 +++---- net/sched/cls_flow.c | 1 - net/sched/cls_flower.c | 31 ++++++++++++++++++++++++++----- net/sched/cls_rsvp.h | 3 ++- net/sched/cls_tcindex.c | 1 - 7 files changed, 31 insertions(+), 20 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0b8c3ace671f..1bf1f4517db6 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -62,9 +62,6 @@ static unsigned long basic_get(struct tcf_proto *tp, u32 handle) struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; - if (head == NULL) - return 0UL; - list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { l = (unsigned long) f; @@ -109,7 +106,6 @@ static bool basic_destroy(struct tcf_proto *tp, bool force) tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5faaa5425f7b..3eef0215e53f 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -199,7 +199,6 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) call_rcu(&prog->rcu, __cls_bpf_delete_prog); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } @@ -210,9 +209,6 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) struct cls_bpf_prog *prog; unsigned long ret = 0UL; - if (head == NULL) - return 0UL; - list_for_each_entry(prog, &head->plist, link) { if (prog->handle == handle) { ret = (unsigned long) prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c85bd3a750c..c104c2019feb 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -130,11 +130,10 @@ static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) if (!force) return false; - - if (head) { - RCU_INIT_POINTER(tp->root, NULL); + /* Head can still be NULL due to cls_cgroup_init(). */ + if (head) call_rcu(&head->rcu, cls_cgroup_destroy_rcu); - } + return true; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index fbfec6a18839..d7ba2b4ff0f3 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -583,7 +583,6 @@ static bool flow_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); return true; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 95b021243233..e5a58c82728a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,10 @@ struct cls_fl_head { bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct cls_fl_filter { @@ -165,6 +169,24 @@ static void fl_destroy_filter(struct rcu_head *head) kfree(f); } +static void fl_destroy_sleepable(struct work_struct *work) +{ + struct cls_fl_head *head = container_of(work, struct cls_fl_head, + work); + if (head->mask_assigned) + rhashtable_destroy(&head->ht); + kfree(head); + module_put(THIS_MODULE); +} + +static void fl_destroy_rcu(struct rcu_head *rcu) +{ + struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu); + + INIT_WORK(&head->work, fl_destroy_sleepable); + schedule_work(&head->work); +} + static bool fl_destroy(struct tcf_proto *tp, bool force) { struct cls_fl_head *head = rtnl_dereference(tp->root); @@ -177,10 +199,9 @@ static bool fl_destroy(struct tcf_proto *tp, bool force) list_del_rcu(&f->list); call_rcu(&f->rcu, fl_destroy_filter); } - RCU_INIT_POINTER(tp->root, NULL); - if (head->mask_assigned) - rhashtable_destroy(&head->ht); - kfree_rcu(head, rcu); + + __module_get(THIS_MODULE); + call_rcu(&head->rcu, fl_destroy_rcu); return true; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index f9c9fc075fe6..9992dfac6938 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -152,7 +152,8 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; nhptr = ip_hdr(skb); #endif - + if (unlikely(!head)) + return -1; restart: #if RSVP_DST_LEN == 4 diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 944c8ff45055..403746b20263 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -503,7 +503,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - RCU_INIT_POINTER(tp->root, NULL); call_rcu(&p->rcu, __tcindex_destroy); return true; } From 6c42bd6a393c3c7b3be24f9bf57a26608df14739 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Mon, 28 Nov 2016 12:56:40 +0200 Subject: [PATCH 024/199] net/sched: pedit: make sure that offset is valid [ Upstream commit 95c2027bfeda21a28eb245121e6a249f38d0788e ] Add a validation function to make sure offset is valid: 1. Not below skb head (could happen when offset is negative). 2. Validate both 'offset' and 'at'. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_pedit.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index e38a7701f154..c3434e902445 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -104,6 +104,17 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) kfree(keys); } +static bool offset_valid(struct sk_buff *skb, int offset) +{ + if (offset > 0 && offset > skb->len) + return false; + + if (offset < 0 && -offset > skb_headroom(skb)) + return false; + + return true; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -130,6 +141,11 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, if (tkey->offmask) { char *d, _d; + if (!offset_valid(skb, off + tkey->at)) { + pr_info("tc filter pedit 'at' offset %d out of bounds\n", + off + tkey->at); + goto bad; + } d = skb_header_pointer(skb, off + tkey->at, 1, &_d); if (!d) @@ -142,10 +158,10 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, " offset must be on 32 bit boundaries\n"); goto bad; } - if (offset > 0 && offset > skb->len) { - pr_info("tc filter pedit" - " offset %d can't exceed pkt length %d\n", - offset, skb->len); + + if (!offset_valid(skb, off + offset)) { + pr_info("tc filter pedit offset %d out of bounds\n", + offset); goto bad; } From d1ed9c1dba632199c59bb8b1245c2f86e28c7380 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Nov 2016 19:22:12 +0800 Subject: [PATCH 025/199] netlink: Call cb->done from a worker thread [ Upstream commit 707693c8a498697aa8db240b93eb76ec62e30892 ] The cb->done interface expects to be called in process context. This was broken by the netlink RCU conversion. This patch fixes it by adding a worker struct to make the cb->done call where necessary. Fixes: 21e4902aea80 ("netlink: Lockless lookup with RCU grace...") Reported-by: Subash Abhinov Kasiviswanathan Signed-off-by: Herbert Xu Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 27 +++++++++++++++++++++++---- net/netlink/af_netlink.h | 2 ++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 28fc283c1ec1..bbf7d2e3fe17 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -924,14 +924,11 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static void netlink_sock_destruct(struct sock *sk) +static void __netlink_sock_destruct(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -960,6 +957,28 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } +static void netlink_sock_destruct_work(struct work_struct *work) +{ + struct netlink_sock *nlk = container_of(work, struct netlink_sock, + work); + + nlk->cb.done(&nlk->cb); + __netlink_sock_destruct(&nlk->sk); +} + +static void netlink_sock_destruct(struct sock *sk) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } + + __netlink_sock_destruct(sk); +} + /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 14437d9b1965..df32cb92d9fc 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -3,6 +3,7 @@ #include #include +#include #include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -53,6 +54,7 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; + struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) From baaf0c65bc8ea9c7a404b09bc8cc3b8a1e4f18df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 5 Dec 2016 15:28:21 +0800 Subject: [PATCH 026/199] netlink: Do not schedule work from sk_destruct [ Upstream commit ed5d7788a934a4b6d6d025e948ed4da496b4f12e ] It is wrong to schedule a work from sk_destruct using the socket as the memory reserve because the socket will be freed immediately after the return from sk_destruct. Instead we should do the deferral prior to sk_free. This patch does just that. Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread") Signed-off-by: Herbert Xu Tested-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bbf7d2e3fe17..360700a2f46c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -924,11 +924,13 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static void __netlink_sock_destruct(struct sock *sk) +static void netlink_sock_destruct(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -962,21 +964,7 @@ static void netlink_sock_destruct_work(struct work_struct *work) struct netlink_sock *nlk = container_of(work, struct netlink_sock, work); - nlk->cb.done(&nlk->cb); - __netlink_sock_destruct(&nlk->sk); -} - -static void netlink_sock_destruct(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - - __netlink_sock_destruct(sk); + sk_free(&nlk->sk); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on @@ -1284,8 +1272,18 @@ out_module: static void deferred_put_nlk_sk(struct rcu_head *head) { struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + struct sock *sk = &nlk->sk; - sock_put(&nlk->sk); + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } + + sk_free(sk); } static int netlink_release(struct socket *sock) From 1a15519fdcdb0f908ad1a3785178f9e18a2ebedb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Nov 2016 06:26:49 -0800 Subject: [PATCH 027/199] net/dccp: fix use-after-free in dccp_invalid_packet [ Upstream commit 648f0c28df282636c0c8a7a19ca3ce5fc80a39c3 ] pskb_may_pull() can reallocate skb->head, we need to reload dh pointer in dccp_invalid_packet() or risk use after free. Bug found by Andrey Konovalov using syzkaller. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/dccp/ipv4.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 861e1fa25d5e..0759f5b9180e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -698,6 +698,7 @@ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; unsigned int cscov; + u8 dccph_doff; if (skb->pkt_type != PACKET_HOST) return 1; @@ -719,18 +720,19 @@ int dccp_invalid_packet(struct sk_buff *skb) /* * If P.Data Offset is too small for packet type, drop packet and return */ - if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); + dccph_doff = dh->dccph_doff; + if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff); return 1; } /* * If P.Data Offset is too too large for packet, drop packet and return */ - if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); + if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) { + DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff); return 1; } - + dh = dccp_hdr(skb); /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return From 5a01eaf19858278cc22525be118fe9c6a3c86e83 Mon Sep 17 00:00:00 2001 From: Philip Pettersson Date: Wed, 30 Nov 2016 14:55:36 -0800 Subject: [PATCH 028/199] packet: fix race condition in packet_set_ring [ Upstream commit 84ac7260236a49c79eede91617700174c2c19b0c ] When packet_set_ring creates a ring buffer it will initialize a struct timer_list if the packet version is TPACKET_V3. This value can then be raced by a different thread calling setsockopt to set the version to TPACKET_V1 before packet_set_ring has finished. This leads to a use-after-free on a function pointer in the struct timer_list when the socket is closed as the previously initialized timer will not be deleted. The bug is fixed by taking lock_sock(sk) in packet_setsockopt when changing the packet version while also taking the lock at the start of packet_set_ring. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") Signed-off-by: Philip Pettersson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 34e4fcfd240b..f223d1c80ccf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3572,19 +3572,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: - po->tp_version = val; - return 0; + break; default: return -EINVAL; } + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_version = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_RESERVE: { @@ -4067,6 +4073,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; + lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { WARN(1, "Tx-ring is not supported.\n"); @@ -4148,7 +4155,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; } - lock_sock(sk); /* Detach socket from network */ spin_lock(&po->bind_lock); @@ -4197,11 +4203,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); } - release_sock(sk); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: + release_sock(sk); return err; } From c36a2a14f26dd3ed95ce75ba69fb7435d7da9bf0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 1 Dec 2016 09:45:45 -0800 Subject: [PATCH 029/199] net: bcmgenet: Utilize correct struct device for all DMA operations [ Upstream commit 8c4799ac799665065f9bf1364fd71bf4f7dc6a4a ] __bcmgenet_tx_reclaim() and bcmgenet_free_rx_buffers() are not using the same struct device during unmap that was used for the map operation, which makes DMA-API debugging warn about it. Fix this by always using &priv->pdev->dev throughout the driver, using an identical device reference for all map/unmap calls. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0fb3f8de88e9..91627561c58d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1168,6 +1168,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { struct bcmgenet_priv *priv = netdev_priv(dev); + struct device *kdev = &priv->pdev->dev; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; unsigned int pkts_compl = 0; @@ -1195,7 +1196,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, pkts_compl++; dev->stats.tx_packets++; dev->stats.tx_bytes += tx_cb_ptr->skb->len; - dma_unmap_single(&dev->dev, + dma_unmap_single(kdev, dma_unmap_addr(tx_cb_ptr, dma_addr), dma_unmap_len(tx_cb_ptr, dma_len), DMA_TO_DEVICE); @@ -1203,7 +1204,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, } else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) { dev->stats.tx_bytes += dma_unmap_len(tx_cb_ptr, dma_len); - dma_unmap_page(&dev->dev, + dma_unmap_page(kdev, dma_unmap_addr(tx_cb_ptr, dma_addr), dma_unmap_len(tx_cb_ptr, dma_len), DMA_TO_DEVICE); @@ -1754,6 +1755,7 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv, static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv) { + struct device *kdev = &priv->pdev->dev; struct enet_cb *cb; int i; @@ -1761,7 +1763,7 @@ static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv) cb = &priv->rx_cbs[i]; if (dma_unmap_addr(cb, dma_addr)) { - dma_unmap_single(&priv->dev->dev, + dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr), priv->rx_buf_len, DMA_FROM_DEVICE); dma_unmap_addr_set(cb, dma_addr, 0); From a89e2ff894bc317a49b9b46ef70eb62e05a8bf7c Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Thu, 1 Dec 2016 13:32:14 -0500 Subject: [PATCH 030/199] sh_eth: remove unchecked interrupts for RZ/A1 [ Upstream commit 33d446dbba4d4d6a77e1e900d434fa99e0f02c86 ] When streaming a lot of data and the RZ/A1 can't keep up, some status bits will get set that are not being checked or cleared which cause the following messages and the Ethernet driver to stop working. This patch fixes that issue. irq 21: nobody cared (try booting with the "irqpoll" option) handlers: [] sh_eth_interrupt Disabling IRQ #21 Fixes: db893473d313a4ad ("sh_eth: Add support for r7s72100") Signed-off-by: Chris Brandt Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/sh_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 36fc9427418f..480f3dae0780 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -832,7 +832,7 @@ static struct sh_eth_cpu_data r7s72100_data = { .ecsr_value = ECSR_ICD, .ecsipr_value = ECSIPR_ICDIP, - .eesipr_value = 0xff7f009f, + .eesipr_value = 0xe77f009f, .tx_check = EESR_TC1 | EESR_FTC, .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | From 6e682c528b3ef046a03994b076d832b2ac029042 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 2 Dec 2016 16:49:29 +0100 Subject: [PATCH 031/199] geneve: avoid use-after-free of skb->data [ Upstream commit 5b01014759991887b1e450c9def01e58c02ab81b ] geneve{,6}_build_skb can end up doing a pskb_expand_head(), which makes the ip_hdr(skb) reference we stashed earlier stale. Since it's only needed as an argument to ip_tunnel_ecn_encap(), move this directly in the function call. Fixes: 08399efc6319 ("geneve: ensure ECN info is handled properly in all tx/rx paths") Signed-off-by: Sabrina Dubroca Reviewed-by: John W. Linville Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 4827c6987ac3..f0961cbaf87e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,7 +815,6 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs4 = geneve->sock4; struct rtable *rt = NULL; - const struct iphdr *iip; /* interior IP header */ int err = -EINVAL; struct flowi4 fl4; __u8 tos, ttl; @@ -842,8 +841,6 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); skb_reset_mac_header(skb); - iip = ip_hdr(skb); - if (info) { const struct ip_tunnel_key *key = &info->key; u8 *opts = NULL; @@ -859,7 +856,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err; - tos = ip_tunnel_ecn_encap(key->tos, iip, skb); + tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; } else { @@ -869,7 +866,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err; - tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); + tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb); ttl = geneve->ttl; if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) ttl = 1; @@ -903,7 +900,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; - const struct iphdr *iip; /* interior IP header */ int err = -EINVAL; struct flowi6 fl6; __u8 prio, ttl; @@ -927,8 +923,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); skb_reset_mac_header(skb); - iip = ip_hdr(skb); - if (info) { const struct ip_tunnel_key *key = &info->key; u8 *opts = NULL; @@ -945,7 +939,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err; - prio = ip_tunnel_ecn_encap(key->tos, iip, skb); + prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; } else { udp_csum = false; @@ -954,7 +948,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err; - prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb); + prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, ip_hdr(skb), skb); ttl = geneve->ttl; if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) ttl = 1; From 77125815f058d587cac9217ac2c468038a7285c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Dec 2016 09:44:53 -0800 Subject: [PATCH 032/199] net: avoid signed overflows for SO_{SND|RCV}BUFFORCE [ Upstream commit b98b0bc8c431e3ceb4b26b0dfc8db509518fb290 ] CAP_NET_ADMIN users should not be allowed to set negative sk_sndbuf or sk_rcvbuf values, as it can lead to various memory corruptions, crashes, OOM... Note that before commit 82981930125a ("net: cleanups in sock_setsockopt()"), the bug was even more serious, since SO_SNDBUF and SO_RCVBUF were vulnerable. This needs to be backported to all known linux kernels. Again, many thanks to syzkaller team for discovering this gem. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 88f017854509..f4c0917e66b5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -745,7 +745,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -781,7 +781,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); + sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: From 06cdad2b6d921dee33c8efc84922533dfb1458c6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Dec 2016 10:34:38 -0800 Subject: [PATCH 033/199] net: ping: check minimum size on ICMP header length [ Upstream commit 0eab121ef8750a5c8637d51534d5e9143fb0633f ] Prior to commit c0371da6047a ("put iov_iter into msghdr") in v3.19, there was no check that the iovec contained enough bytes for an ICMP header, and the read loop would walk across neighboring stack contents. Since the iov_iter conversion, bad arguments are noticed, but the returned error is EFAULT. Returning EINVAL is a clearer error and also solves the problem prior to v3.19. This was found using trinity with KASAN on v3.18: BUG: KASAN: stack-out-of-bounds in memcpy_fromiovec+0x60/0x114 at addr ffffffc071077da0 Read of size 8 by task trinity-c2/9623 page:ffffffbe034b9a08 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x0() page dumped because: kasan: bad access detected CPU: 0 PID: 9623 Comm: trinity-c2 Tainted: G BU 3.18.0-dirty #15 Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) Call trace: [] dump_backtrace+0x0/0x1ac arch/arm64/kernel/traps.c:90 [] show_stack+0x10/0x1c arch/arm64/kernel/traps.c:171 [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x7c/0xd0 lib/dump_stack.c:50 [< inline >] print_address_description mm/kasan/report.c:147 [< inline >] kasan_report_error mm/kasan/report.c:236 [] kasan_report+0x380/0x4b8 mm/kasan/report.c:259 [< inline >] check_memory_region mm/kasan/kasan.c:264 [] __asan_load8+0x20/0x70 mm/kasan/kasan.c:507 [] memcpy_fromiovec+0x5c/0x114 lib/iovec.c:15 [< inline >] memcpy_from_msg include/linux/skbuff.h:2667 [] ping_common_sendmsg+0x50/0x108 net/ipv4/ping.c:674 [] ping_v4_sendmsg+0xd8/0x698 net/ipv4/ping.c:714 [] inet_sendmsg+0xe0/0x12c net/ipv4/af_inet.c:749 [< inline >] __sock_sendmsg_nosec net/socket.c:624 [< inline >] __sock_sendmsg net/socket.c:632 [] sock_sendmsg+0x124/0x164 net/socket.c:643 [< inline >] SYSC_sendto net/socket.c:1797 [] SyS_sendto+0x178/0x1d8 net/socket.c:1761 CVE-2016-8399 Reported-by: Qidan He Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ping.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index aa67e0e64b69..23160d2b3f71 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -660,6 +660,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, if (len > 0xFFFF) return -EMSGSIZE; + /* Must have at least a full ICMP header. */ + if (len < icmph_len) + return -EINVAL; + /* * Check the flags. */ From 438e91da24fa63d3b52faafd7302081e9e79207a Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Wed, 9 Nov 2016 10:43:05 +0100 Subject: [PATCH 034/199] sparc32: Fix inverted invalid_frame_pointer checks on sigreturns [ Upstream commit 07b5ab3f71d318e52c18cc3b73c1d44c908aacfa ] Signed-off-by: Andreas Larsson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index c3c12efe0bc0..9c0c8fd0b292 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv_and_exit; if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) @@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) From ed7b60db00a17a3bc4705014ebec4c035577b9e5 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 3 Nov 2016 09:19:01 -0700 Subject: [PATCH 035/199] sparc64: Fix find_node warning if numa node cannot be found [ Upstream commit 74a5ed5c4f692df2ff0a2313ea71e81243525519 ] When booting up LDOM, find_node() warns that a physical address doesn't match a NUMA node. WARNING: CPU: 0 PID: 0 at arch/sparc/mm/init_64.c:835 find_node+0xf4/0x120 find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc3 #4 Call Trace: [0000000000468ba0] __warn+0xc0/0xe0 [0000000000468c74] warn_slowpath_fmt+0x34/0x60 [00000000004592f4] find_node+0xf4/0x120 [0000000000dd0774] add_node_ranges+0x38/0xe4 [0000000000dd0b1c] numa_parse_mdesc+0x268/0x2e4 [0000000000dd0e9c] bootmem_init+0xb8/0x160 [0000000000dd174c] paging_init+0x808/0x8fc [0000000000dcb0d0] setup_arch+0x2c8/0x2f0 [0000000000dc68a0] start_kernel+0x48/0x424 [0000000000dcb374] start_early_boot+0x27c/0x28c [0000000000a32c08] tlb_fixup_done+0x4c/0x64 [0000000000027f08] 0x27f08 It is because linux use an internal structure node_masks[] to keep the best memory latency node only. However, LDOM mdesc can contain single latency-group with multiple memory latency nodes. If the address doesn't match the best latency node within node_masks[], it should check for an alternative via mdesc. The warning message should only be printed if the address doesn't match any node_masks[] nor within mdesc. To minimize the impact of searching mdesc every time, the last matched mask and index is stored in a variable. Signed-off-by: Thomas Tai Reviewed-by: Chris Hyser Reviewed-by: Liam Merwick Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/mm/init_64.c | 65 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a5331c336b2a..83bf3418fb56 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -800,6 +800,8 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); static unsigned long ra_to_pa(unsigned long addr) { @@ -819,6 +821,9 @@ static unsigned long ra_to_pa(unsigned long addr) static int find_node(unsigned long addr) { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; + static int last_index; int i; addr = ra_to_pa(addr); @@ -828,10 +833,27 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - /* The following condition has been observed on LDOM guests.*/ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" - " rule. Some physical memory will be owned by node 0."); - return 0; + /* The following condition has been observed on LDOM guests because + * node_masks only contains the best latency mask and value. + * LDOM guest's mdesc can contain a single latency group to + * cover multiple address range. Print warning message only if the + * address cannot be found in node_masks nor mdesc. + */ + if ((search_mdesc) && + ((addr & last_mem_mask.mask) != last_mem_mask.val)) { + /* find the available node in the mdesc */ + last_index = find_numa_node_for_addr(addr, &last_mem_mask); + numadbg("find_node: latency group for address 0x%lx is %d\n", + addr, last_index); + if ((last_index < 0) || (last_index >= num_node_masks)) { + /* WARN_ONCE() and use default group 0 */ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); + search_mdesc = false; + last_index = 0; + } + } + + return last_index; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1158,6 +1180,41 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask) +{ + struct mdesc_handle *md = mdesc_grab(); + u64 node, arc; + int i = 0; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) + goto out; + + mdesc_for_each_node_by_name(md, node, "group") { + mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + if ((pa & m->mask) == m->match) { + if (pnode_mask) { + pnode_mask->mask = m->mask; + pnode_mask->val = m->match; + } + mdesc_release(md); + return i; + } + } + i++; + } + +out: + mdesc_release(md); + return -1; +} + static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; From 899b60535a2af978cdd798932f688b887decf424 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Fri, 11 Nov 2016 16:41:00 -0800 Subject: [PATCH 036/199] sparc64: fix compile warning section mismatch in find_node() [ Upstream commit 87a349f9cc0908bc0cfac0c9ece3179f650ae95a ] A compile warning is introduced by a commit to fix the find_node(). This patch fix the compile warning by moving find_node() into __init section. Because find_node() is only used by memblock_nid_range() which is only used by a __init add_node_ranges(). find_node() and memblock_nid_range() should also be inside __init section. Signed-off-by: Thomas Tai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 83bf3418fb56..3d3414c14792 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -803,7 +803,7 @@ static int num_mblocks; static int find_numa_node_for_addr(unsigned long pa, struct node_mem_mask *pnode_mask); -static unsigned long ra_to_pa(unsigned long addr) +static unsigned long __init ra_to_pa(unsigned long addr) { int i; @@ -819,7 +819,7 @@ static unsigned long ra_to_pa(unsigned long addr) return addr; } -static int find_node(unsigned long addr) +static int __init find_node(unsigned long addr) { static bool search_mdesc = true; static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; @@ -856,7 +856,7 @@ static int find_node(unsigned long addr) return last_index; } -static u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; From fd1aa12c63400ca7d5b6d189ae98570f47735180 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Oct 2016 13:57:37 -0400 Subject: [PATCH 037/199] constify iov_iter_count() and iter_is_iovec() commit b57332b4105abf1d518d93886e547ee2f98cd414 upstream. [stable note, need this to prevent build warning in commit a0ac402cfcdc904f9772e1762b3fda112dcc56a0] Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- include/linux/uio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 5f9c59da978b..e2225109b816 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -101,12 +101,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); -static inline size_t iov_iter_count(struct iov_iter *i) +static inline size_t iov_iter_count(const struct iov_iter *i) { return i->count; } -static inline bool iter_is_iovec(struct iov_iter *i) +static inline bool iter_is_iovec(const struct iov_iter *i) { return !(i->type & (ITER_BVEC | ITER_KVEC)); } From d41fb2fbb28d3a1085960edada6c046becd1fafd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Dec 2016 16:18:14 -0800 Subject: [PATCH 038/199] Don't feed anything but regular iovec's to blk_rq_map_user_iov commit a0ac402cfcdc904f9772e1762b3fda112dcc56a0 upstream. In theory we could map other things, but there's a reason that function is called "user_iov". Using anything else (like splice can do) just confuses it. Reported-and-tested-by: Johannes Thumshirn Cc: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- block/blk-map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-map.c b/block/blk-map.c index f565e11f465a..69953bd97e65 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -90,6 +90,9 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, if (!iter || !iter->count) return -EINVAL; + if (!iter_is_iovec(iter)) + return -EINVAL; + iov_for_each(iov, i, *iter) { unsigned long uaddr = (unsigned long) iov.iov_base; From 25d8b7c105e13df4a0b14159a85c448158570909 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Thu, 1 Dec 2016 10:05:11 +0800 Subject: [PATCH 039/199] ipv6: Set skb->protocol properly for local output commit b4e479a96fc398ccf83bb1cffb4ffef8631beaf1 upstream. When xfrm is applied to TSO/GSO packets, it follows this path: xfrm_output() -> xfrm_output_gso() -> skb_gso_segment() where skb_gso_segment() relies on skb->protocol to function properly. This patch sets skb->protocol to ETH_P_IPV6 before dst_output() is called, fixing a bug where GSO packets sent through an ipip6 tunnel are dropped when xfrm is involved. Signed-off-by: Eli Cooper Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/output_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b5c2..1d184322a7b1 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,8 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + skb->protocol = htons(ETH_P_IPV6); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); From 2176ec1c82eae70a9d43a974455d9bfe3dcd7bd2 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Thu, 1 Dec 2016 10:05:10 +0800 Subject: [PATCH 040/199] ipv4: Set skb->protocol properly for local output commit f4180439109aa720774baafdd798b3234ab1a0d2 upstream. When xfrm is applied to TSO/GSO packets, it follows this path: xfrm_output() -> xfrm_output_gso() -> skb_gso_segment() where skb_gso_segment() relies on skb->protocol to function properly. This patch sets skb->protocol to ETH_P_IP before dst_output() is called, fixing a bug where GSO packets sent through a sit tunnel are dropped when xfrm is involved. Signed-off-by: Eli Cooper Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_output.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f2ad5216c438..2b7283303650 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -102,6 +102,9 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + skb->protocol = htons(ETH_P_IP); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); From 3bf28ce9c7499477aff4e6ecf07071978c2cabcf Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Tue, 29 Nov 2016 17:05:20 +0100 Subject: [PATCH 041/199] esp4: Fix integrity verification when ESN are used commit 7c7fedd51c02f4418e8b2eed64bdab601f882aa4 upstream. When handling inbound packets, the two halves of the sequence number stored on the skb are already in network order. Fixes: 7021b2e1cddd ("esp4: Switch to new AEAD interface") Signed-off-by: Tobias Brunner Acked-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv4/esp4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d95631d09248..20fb25e3027b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -476,7 +476,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } From 52783ada69b4741cf0122bd593493cc6c3217b9b Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Tue, 29 Nov 2016 17:05:25 +0100 Subject: [PATCH 042/199] esp6: Fix integrity verification when ESN are used commit a55e23864d381c5a4ef110df94b00b2fe121a70d upstream. When handling inbound packets, the two halves of the sequence number stored on the skb are already in network order. Fixes: 000ae7b2690e ("esp6: Switch to new AEAD interface") Signed-off-by: Tobias Brunner Acked-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/ipv6/esp6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..111ba55fd512 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -418,7 +418,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) esph = (void *)skb_push(skb, 4); *seqhi = esph->spi; esph->spi = esph->seq_no; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; aead_request_set_callback(req, 0, esp_input_done_esn, skb); } From c95b7f1fab0c76882764a5196119237c8ad436ee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 10 Dec 2016 19:08:05 +0100 Subject: [PATCH 043/199] Linux 4.4.38 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b57ec79b4941..6876efe0d735 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 37 +SUBLEVEL = 38 EXTRAVERSION = NAME = Blurry Fish Butt From 4bcea472de100311614c442f7236d0bcc4a413b3 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 1 Dec 2016 11:23:05 +1100 Subject: [PATCH 044/199] powerpc/eeh: Fix deadlock when PE frozen state can't be cleared commit 409bf7f8a02ef88db5a0f2cdcf9489914f4b8508 upstream. In eeh_reset_device(), we take the pci_rescan_remove_lock immediately after after we call eeh_reset_pe() to reset the PCI controller. We then call eeh_clear_pe_frozen_state(), which can return an error. In this case, we bail out of eeh_reset_device() without calling pci_unlock_rescan_remove(). Add a call to pci_unlock_rescan_remove() in the eeh_clear_pe_frozen_state() error path so that we don't cause a deadlock later on. Reported-by: Pradipta Ghosh Fixes: 78954700631f ("powerpc/eeh: Avoid I/O access during PE reset") Signed-off-by: Andrew Donnellan Acked-by: Russell Currey Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh_driver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index c07bfb52275e..300382e5a2cc 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -612,8 +612,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); - if (rc) + if (rc) { + pci_unlock_rescan_remove(); return rc; + } /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, From db95986057686ecd7d615ede326d63e8b24a5e92 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 6 Dec 2016 21:47:04 -0500 Subject: [PATCH 045/199] parisc: Purge TLB before setting PTE commit c78e710c1c9fbeff43dddc0aa3d0ff458e70b0cc upstream. The attached change interchanges the order of purging the TLB and setting the corresponding page table entry. TLB purges are strongly ordered. It occurred to me one night that setting the PTE first might have subtle ordering issues on SMP machines and cause random memory corruption. A TLB lock guards the insertion of user TLB entries. So after the TLB is purged, a new entry can't be inserted until the lock is released. This ensures that the new PTE value is used when the lock is released. Since making this change, no random segmentation faults have been observed on the Debian hppa buildd servers. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/pgtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index c2c43f714684..3a4ed9f91d57 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -65,9 +65,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) unsigned long flags; \ spin_lock_irqsave(&pa_tlb_lock, flags); \ old_pte = *ptep; \ - set_pte(ptep, pteval); \ if (pte_inserted(old_pte)) \ purge_tlb_entries(mm, addr); \ + set_pte(ptep, pteval); \ spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) @@ -478,8 +478,8 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } - set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); + set_pte(ptep, pte_mkold(pte)); spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -492,9 +492,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - set_pte(ptep, __pte(0)); if (pte_inserted(old_pte)) purge_tlb_entries(mm, addr); + set_pte(ptep, __pte(0)); spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; @@ -504,8 +504,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, { unsigned long flags; spin_lock_irqsave(&pa_tlb_lock, flags); - set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); + set_pte(ptep, pte_wrprotect(*ptep)); spin_unlock_irqrestore(&pa_tlb_lock, flags); } From 605f315c5a8340e61fbe627907701377d27b7510 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 6 Dec 2016 22:02:01 -0500 Subject: [PATCH 046/199] parisc: Remove unnecessary TLB purges from flush_dcache_page_asm and flush_icache_page_asm commit febe42964fe182281859b3d43d844bb25ca49367 upstream. We have four routines in pacache.S that use temporary alias pages: copy_user_page_asm(), clear_user_page_asm(), flush_dcache_page_asm() and flush_icache_page_asm(). copy_user_page_asm() and clear_user_page_asm() don't purge the TLB entry used for the operation. flush_dcache_page_asm() and flush_icache_page_asm do purge the entry. Presumably, this was thought to optimize TLB use. However, the operation is quite heavy weight on PA 1.X processors as we need to take the TLB lock and a TLB broadcast is sent to all processors. This patch removes the purges from flush_dcache_page_asm() and flush_icache_page_asm. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/pacache.S | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 675521919229..a4761b772406 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -886,19 +886,10 @@ ENTRY(flush_dcache_page_asm) fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) - cmpb,COND(<<) %r28, %r25,1b + cmpb,COND(<<) %r28, %r25,1b fdc,m r31(%r28) sync - -#ifdef CONFIG_PA20 - pdtlb,l %r0(%r25) -#else - tlb_lock %r20,%r21,%r22 - pdtlb %r0(%r25) - tlb_unlock %r20,%r21,%r22 -#endif - bv %r0(%r2) nop .exit @@ -973,17 +964,6 @@ ENTRY(flush_icache_page_asm) fic,m %r31(%sr4,%r28) sync - -#ifdef CONFIG_PA20 - pdtlb,l %r0(%r28) - pitlb,l %r0(%sr4,%r25) -#else - tlb_lock %r20,%r21,%r22 - pdtlb %r0(%r28) - pitlb %r0(%sr4,%r25) - tlb_unlock %r20,%r21,%r22 -#endif - bv %r0(%r2) nop .exit From a0bd6aa097a4c9cf920b66686515bcb174bed531 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 8 Dec 2016 21:00:46 +0100 Subject: [PATCH 047/199] parisc: Fix TLB related boot crash on SMP machines commit 24d0492b7d5d321a9c5846c8c974eba9823ffaa0 upstream. At bootup we run measurements to calculate the best threshold for when we should be using full TLB flushes instead of just flushing a specific amount of TLB entries. This performance test is run over the kernel text segment. But running this TLB performance test on the kernel text segment turned out to crash some SMP machines when the kernel text pages were mapped as huge pages. To avoid those crashes this patch simply skips this test on some SMP machines and calculates an optimal threshold based on the maximum number of available TLB entries and number of online CPUs. On a technical side, this seems to happen: The TLB measurement code uses flush_tlb_kernel_range() to flush specific TLB entries with a page size of 4k (pdtlb 0(sr1,addr)). On UP systems this purge instruction seems to work without problems even if the pages were mapped as huge pages. But on SMP systems the TLB purge instruction is broadcasted to other CPUs. Those CPUs then crash the machine because the page size is not as expected. C8000 machines with PA8800/PA8900 CPUs were not affected by this problem, because the required cache coherency prohibits to use huge pages at all. Sadly I didn't found any documentation about this behaviour, so this finding is purely based on testing with phyiscal SMP machines (A500-44 and J5000, both were 2-way boxes). Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/cache.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index fd5979f28ada..6857a104b2f9 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -375,6 +375,15 @@ void __init parisc_setup_cache_timing(void) /* calculate TLB flush threshold */ + /* On SMP machines, skip the TLB measure of kernel text which + * has been mapped as huge pages. */ + if (num_online_cpus() > 1 && !parisc_requires_coherency()) { + threshold = max(cache_info.it_size, cache_info.dt_size); + threshold *= PAGE_SIZE; + threshold /= num_online_cpus(); + goto set_tlb_threshold; + } + alltime = mfctl(16); flush_tlb_all(); alltime = mfctl(16) - alltime; @@ -393,6 +402,8 @@ void __init parisc_setup_cache_timing(void) alltime, size, rangetime); threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime); + +set_tlb_threshold: if (threshold) parisc_tlb_flush_threshold = threshold; printk(KERN_INFO "TLB flush threshold set to %lu KiB\n", From e286b6c16758adc77e960dd679073200b539803a Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 7 Dec 2016 14:44:31 -0800 Subject: [PATCH 048/199] zram: restrict add/remove attributes to root only commit 5c7e9ccd91b90d87029261f8856294ee51934cab upstream. zram hot_add sysfs attribute is a very 'special' attribute - reading from it creates a new uninitialized zram device. This file, by a mistake, can be read by a 'normal' user at the moment, while only root must be able to create a new zram device, therefore hot_add attribute must have S_IRUSR mode, not S_IRUGO. [akpm@linux-foundation.org: s/sence/sense/, reflow comment to use 80 cols] Fixes: 6566d1a32bf72 ("zram: add dynamic device add/remove functionality") Link: http://lkml.kernel.org/r/20161205155845.20129-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Reported-by: Steven Allen Acked-by: Greg Kroah-Hartman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/block/zram/zram_drv.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1770c455dfdd..1648de80e230 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1378,8 +1378,14 @@ static ssize_t hot_remove_store(struct class *class, return ret ? ret : count; } +/* + * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a + * sense that reading from this file does alter the state of your system -- it + * creates a new un-initialized zram device and returns back this device's + * device_id (or an error code if it fails to create a new device). + */ static struct class_attribute zram_control_class_attrs[] = { - __ATTR_RO(hot_add), + __ATTR(hot_add, 0400, hot_add_show, NULL), __ATTR_WO(hot_remove), __ATTR_NULL, }; From b27d9147f24a501d632916964f77c1221246fae9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Nov 2016 21:04:41 +0000 Subject: [PATCH 049/199] locking/rtmutex: Prevent dequeue vs. unlock race commit dbb26055defd03d59f678cb5f2c992abe05b064a upstream. David reported a futex/rtmutex state corruption. It's caused by the following problem: CPU0 CPU1 CPU2 l->owner=T1 rt_mutex_lock(l) lock(l->wait_lock) l->owner = T1 | HAS_WAITERS; enqueue(T2) boost() unlock(l->wait_lock) schedule() rt_mutex_lock(l) lock(l->wait_lock) l->owner = T1 | HAS_WAITERS; enqueue(T3) boost() unlock(l->wait_lock) schedule() signal(->T2) signal(->T3) lock(l->wait_lock) dequeue(T2) deboost() unlock(l->wait_lock) lock(l->wait_lock) dequeue(T3) ===> wait list is now empty deboost() unlock(l->wait_lock) lock(l->wait_lock) fixup_rt_mutex_waiters() if (wait_list_empty(l)) { owner = l->owner & ~HAS_WAITERS; l->owner = owner ==> l->owner = T1 } lock(l->wait_lock) rt_mutex_unlock(l) fixup_rt_mutex_waiters() if (wait_list_empty(l)) { owner = l->owner & ~HAS_WAITERS; cmpxchg(l->owner, T1, NULL) ===> Success (l->owner = NULL) l->owner = owner ==> l->owner = T1 } That means the problem is caused by fixup_rt_mutex_waiters() which does the RMW to clear the waiters bit unconditionally when there are no waiters in the rtmutexes rbtree. This can be fatal: A concurrent unlock can release the rtmutex in the fastpath because the waiters bit is not set. If the cmpxchg() gets in the middle of the RMW operation then the previous owner, which just unlocked the rtmutex is set as the owner again when the write takes place after the successfull cmpxchg(). The solution is rather trivial: verify that the owner member of the rtmutex has the waiters bit set before clearing it. This does not require a cmpxchg() or other atomic operations because the waiters bit can only be set and cleared with the rtmutex wait_lock held. It's also safe against the fast path unlock attempt. The unlock attempt via cmpxchg() will either see the bit set and take the slowpath or see the bit cleared and release it atomically in the fastpath. It's remarkable that the test program provided by David triggers on ARM64 and MIPS64 really quick, but it refuses to reproduce on x86-64, while the problem exists there as well. That refusal might explain that this got not discovered earlier despite the bug existing from day one of the rtmutex implementation more than 10 years ago. Thanks to David for meticulously instrumenting the code and providing the information which allowed to decode this subtle problem. Reported-by: David Daney Tested-by: David Daney Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Will Deacon Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core") Link: http://lkml.kernel.org/r/20161130210030.351136722@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rtmutex.c | 68 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8251e75dd9c0..b066724d7a5b 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) static void fixup_rt_mutex_waiters(struct rt_mutex *lock) { - if (!rt_mutex_has_waiters(lock)) - clear_rt_mutex_waiters(lock); + unsigned long owner, *p = (unsigned long *) &lock->owner; + + if (rt_mutex_has_waiters(lock)) + return; + + /* + * The rbtree has no waiters enqueued, now make sure that the + * lock->owner still has the waiters bit set, otherwise the + * following can happen: + * + * CPU 0 CPU 1 CPU2 + * l->owner=T1 + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T2) + * boost() + * unlock(l->lock) + * block() + * + * rt_mutex_lock(l) + * lock(l->lock) + * l->owner = T1 | HAS_WAITERS; + * enqueue(T3) + * boost() + * unlock(l->lock) + * block() + * signal(->T2) signal(->T3) + * lock(l->lock) + * dequeue(T2) + * deboost() + * unlock(l->lock) + * lock(l->lock) + * dequeue(T3) + * ==> wait list is empty + * deboost() + * unlock(l->lock) + * lock(l->lock) + * fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * l->owner = owner + * owner = l->owner & ~HAS_WAITERS; + * ==> l->owner = T1 + * } + * lock(l->lock) + * rt_mutex_unlock(l) fixup_rt_mutex_waiters() + * if (wait_list_empty(l) { + * owner = l->owner & ~HAS_WAITERS; + * cmpxchg(l->owner, T1, NULL) + * ===> Success (l->owner = NULL) + * + * l->owner = owner + * ==> l->owner = T1 + * } + * + * With the check for the waiter bit in place T3 on CPU2 will not + * overwrite. All tasks fiddling with the waiters bit are + * serialized by l->lock, so nothing else can modify the waiters + * bit. If the bit is set then nothing can change l->owner either + * so the simple RMW is safe. The cmpxchg() will simply fail if it + * happens in the middle of the RMW because the waiters bit is + * still set. + */ + owner = READ_ONCE(*p); + if (owner & RT_MUTEX_HAS_WAITERS) + WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); } /* From c6a5bf4cda12e4a3c097036ce7044ee65e4cf6d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Nov 2016 21:04:42 +0000 Subject: [PATCH 050/199] locking/rtmutex: Use READ_ONCE() in rt_mutex_owner() commit 1be5d4fa0af34fb7bafa205aeb59f5c7cc7a089d upstream. While debugging the rtmutex unlock vs. dequeue race Will suggested to use READ_ONCE() in rt_mutex_owner() as it might race against the cmpxchg_release() in unlock_rt_mutex_safe(). Will: "It's a minor thing which will most likely not matter in practice" Careful search did not unearth an actual problem in todays code, but it's better to be safe than surprised. Suggested-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: David Daney Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20161130210030.431379999@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rtmutex_common.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 4f5f83c7d2d3..e317e1cbb3eb 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p) static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) { - return (struct task_struct *) - ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); + unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + + return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL); } /* From c4db8a7d1e0c16287642414fe16d85d1eaeb8923 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 29 Nov 2016 20:33:28 +0000 Subject: [PATCH 051/199] perf/x86: Fix full width counter, counter overflow commit 7f612a7f0bc13a2361a152862435b7941156b6af upstream. Lukasz reported that perf stat counters overflow handling is broken on KNL/SLM. Both these parts have full_width_write set, and that does indeed have a problem. In order to deal with counter wrap, we must sample the counter at at least half the counter period (see also the sampling theorem) such that we can unambiguously reconstruct the count. However commit: 069e0c3c4058 ("perf/x86/intel: Support full width counting") sets the sampling interval to the full period, not half. Fixing that exposes another issue, in that we must not sign extend the delta value when we shift it right; the counter cannot have decremented after all. With both these issues fixed, counter overflow functions correctly again. Reported-by: Lukasz Odzioba Tested-by: Liang, Kan Tested-by: Odzioba, Lukasz Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 069e0c3c4058 ("perf/x86/intel: Support full width counting") Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a3aeb2cc361e..1a8256dd6729 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -67,7 +67,7 @@ u64 x86_perf_event_update(struct perf_event *event) int shift = 64 - x86_pmu.cntval_bits; u64 prev_raw_count, new_raw_count; int idx = hwc->idx; - s64 delta; + u64 delta; if (idx == INTEL_PMC_IDX_FIXED_BTS) return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 5f82cd59f0e5..5cc2242d77c6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -3636,7 +3636,7 @@ __init int intel_pmu_init(void) /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { - x86_pmu.max_period = x86_pmu.cntval_mask; + x86_pmu.max_period = x86_pmu.cntval_mask >> 1; x86_pmu.perfctr = MSR_IA32_PMC0; pr_cont("full-width counters, "); } From 9a3baed9103bc413a5e98e13e31cd8ae7c0b5563 Mon Sep 17 00:00:00 2001 From: tim Date: Mon, 5 Dec 2016 11:46:31 -0800 Subject: [PATCH 052/199] crypto: mcryptd - Check mcryptd algorithm compatibility commit 48a992727d82cb7db076fa15d372178743b1f4cd upstream. Algorithms not compatible with mcryptd could be spawned by mcryptd with a direct crypto_alloc_tfm invocation using a "mcryptd(alg)" name construct. This causes mcryptd to crash the kernel if an arbitrary "alg" is incompatible and not intended to be used with mcryptd. It is an issue if AF_ALG tries to spawn mcryptd(alg) to expose it externally. But such algorithms must be used internally and not be exposed. We added a check to enforce that only internal algorithms are allowed with mcryptd at the time mcryptd is spawning an algorithm. Link: http://marc.info/?l=linux-crypto-vger&m=148063683310477&w=2 Reported-by: Mikulas Patocka Signed-off-by: Tim Chen Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/mcryptd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index fe5b495a434d..a0ceb41d5ccc 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -258,18 +258,22 @@ out_free_inst: goto out; } -static inline void mcryptd_check_internal(struct rtattr **tb, u32 *type, +static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type, u32 *mask) { struct crypto_attr_type *algt; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) - return; - if ((algt->type & CRYPTO_ALG_INTERNAL)) - *type |= CRYPTO_ALG_INTERNAL; - if ((algt->mask & CRYPTO_ALG_INTERNAL)) - *mask |= CRYPTO_ALG_INTERNAL; + return false; + + *type |= algt->type & CRYPTO_ALG_INTERNAL; + *mask |= algt->mask & CRYPTO_ALG_INTERNAL; + + if (*type & *mask & CRYPTO_ALG_INTERNAL) + return true; + else + return false; } static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) @@ -498,7 +502,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, u32 mask = 0; int err; - mcryptd_check_internal(tb, &type, &mask); + if (!mcryptd_check_internal(tb, &type, &mask)) + return -EINVAL; salg = shash_attr_alg(tb[1], type, mask); if (IS_ERR(salg)) From 083021bdba1e6d480170a4ae86cddc10f88c54d5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 5 Dec 2016 11:44:23 +0100 Subject: [PATCH 053/199] can: raw: raw_setsockopt: limit number of can_filter that can be set commit 332b05ca7a438f857c61a3c21a88489a21532364 upstream. This patch adds a check to limit the number of can_filters that can be set via setsockopt on CAN_RAW sockets. Otherwise allocations > MAX_ORDER are not prevented resulting in a warning. Reference: https://lkml.org/lkml/2016/12/2/230 Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/can.h | 1 + net/can/raw.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 9692cda5f8fc..c48d93a28d1a 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -196,5 +196,6 @@ struct can_filter { }; #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */ +#define CAN_RAW_FILTER_MAX 512 /* maximum number of can_filter set via setsockopt() */ #endif /* !_UAPI_CAN_H */ diff --git a/net/can/raw.c b/net/can/raw.c index 2e67b1423cd3..56af689ca999 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -499,6 +499,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen % sizeof(struct can_filter) != 0) return -EINVAL; + if (optlen > CAN_RAW_FILTER_MAX * sizeof(struct can_filter)) + return -EINVAL; + count = optlen / sizeof(struct can_filter); if (count > 1) { From e29949ed3903930e38dead204b01e7912c98fe33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=B6=94=EC=A7=80=ED=98=B8?= Date: Thu, 8 Dec 2016 12:01:13 +0000 Subject: [PATCH 054/199] can: peak: fix bad memory access and free sequence commit b67d0dd7d0dc9e456825447bbeb935d8ef43ea7c upstream. Fix for bad memory access while disconnecting. netdev is freed before private data free, and dev is accessed after freeing netdev. This makes a slub problem, and it raise kernel oops with slub debugger config. Signed-off-by: Jiho Chu Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 5a2e341a6d1e..91be4575b524 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -872,23 +872,25 @@ lbl_free_candev: static void peak_usb_disconnect(struct usb_interface *intf) { struct peak_usb_device *dev; + struct peak_usb_device *dev_prev_siblings; /* unregister as many netdev devices as siblings */ - for (dev = usb_get_intfdata(intf); dev; dev = dev->prev_siblings) { + for (dev = usb_get_intfdata(intf); dev; dev = dev_prev_siblings) { struct net_device *netdev = dev->netdev; char name[IFNAMSIZ]; + dev_prev_siblings = dev->prev_siblings; dev->state &= ~PCAN_USB_STATE_CONNECTED; strncpy(name, netdev->name, IFNAMSIZ); unregister_netdev(netdev); - free_candev(netdev); kfree(dev->cmd_buf); dev->next_siblings = NULL; if (dev->adapter->dev_free) dev->adapter->dev_free(dev); + free_candev(netdev); dev_info(&intf->dev, "%s removed\n", name); } From 55e15b2f44d7f7d9350b70890b0e39a8ee713504 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 2 Feb 2016 15:53:59 +0000 Subject: [PATCH 055/199] arm64: futex.h: Add missing PAN toggling commit 811d61e384e24759372bb3f01772f3744b0a8327 upstream. futex.h's futex_atomic_cmpxchg_inatomic() does not use the __futex_atomic_op() macro and needs its own PAN toggling. This was missed when the feature was implemented. Fixes: 338d4f49d6f ("arm64: kernel: Add support for Privileged Access Never") Signed-off-by: James Morse Signed-off-by: Will Deacon Cc: Mian Yousaf Kaukab Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/futex.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 007a69fc4f40..5f3ab8c1db55 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .align 3\n" " .quad 1b, 4b, 2b, 4b\n" " .popsection\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); From f03531d091255156cf8c865b969377f01750e97a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Oct 2016 17:12:28 +0200 Subject: [PATCH 056/199] m68k: Fix ndelay() macro commit 7e251bb21ae08ca2e4fb28cc0981fac2685a8efa upstream. The current ndelay() macro definition has an extra semi-colon at the end of the line thus leading to a compilation error when ndelay is used in a conditional block without curly braces like this one: if (cond) ndelay(t); else ... which, after the preprocessor pass gives: if (cond) m68k_ndelay(t);; else ... thus leading to the following gcc error: error: 'else' without a previous 'if' Remove this extra semi-colon. Signed-off-by: Boris Brezillon Fixes: c8ee038bd1488 ("m68k: Implement ndelay() based on the existing udelay() logic") Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- arch/m68k/include/asm/delay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/delay.h b/arch/m68k/include/asm/delay.h index d28fa8fe26fe..c598d847d56b 100644 --- a/arch/m68k/include/asm/delay.h +++ b/arch/m68k/include/asm/delay.h @@ -114,6 +114,6 @@ static inline void __udelay(unsigned long usecs) */ #define HZSCALE (268435456 / (1000000 / HZ)) -#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)); +#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)) #endif /* defined(_M68K_DELAY_H) */ From 537e42d759aeb833bfc6dbb3711bc45e5fddb308 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 30 Nov 2016 21:47:09 +0100 Subject: [PATCH 057/199] batman-adv: Check for alloc errors when preparing TT local data commit c2d0f48a13e53b4747704c9e692f5e765e52041a upstream. batadv_tt_prepare_tvlv_local_data can fail to allocate the memory for the new TVLV block. The caller is informed about this problem with the returned length of 0. Not checking this value results in an invalid memory access when either tt_data or tt_change is accessed. Reported-by: Dan Carpenter Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/translation-table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 83b0ca27a45e..f2079acb555d 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2764,7 +2764,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto unlock; /* Copy the last orig_node's OGM buffer */ @@ -2782,7 +2782,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto out; /* fill the rest of the tvlv with the real TT entries */ From 1c0f4e0ebb791cad8fee77d8de1ceb684e631698 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 7 Dec 2016 14:54:38 +0100 Subject: [PATCH 058/199] hotplug: Make register and unregister notifier API symmetric commit 777c6e0daebb3fcefbbd6f620410a946b07ef6d0 upstream. Yu Zhao has noticed that __unregister_cpu_notifier only unregisters its notifiers when HOTPLUG_CPU=y while the registration might succeed even when HOTPLUG_CPU=n if MODULE is enabled. This means that e.g. zswap might keep a stale notifier on the list on the manual clean up during the pool tear down and thus corrupt the list. Resulting in the following [ 144.964346] BUG: unable to handle kernel paging request at ffff880658a2be78 [ 144.971337] IP: [] raw_notifier_chain_register+0x1b/0x40 [ 145.122628] Call Trace: [ 145.125086] [] __register_cpu_notifier+0x18/0x20 [ 145.131350] [] zswap_pool_create+0x273/0x400 [ 145.137268] [] __zswap_param_set+0x1fc/0x300 [ 145.143188] [] ? trace_hardirqs_on+0xd/0x10 [ 145.149018] [] ? kernel_param_lock+0x28/0x30 [ 145.154940] [] ? __might_fault+0x4f/0xa0 [ 145.160511] [] zswap_compressor_param_set+0x17/0x20 [ 145.167035] [] param_attr_store+0x5c/0xb0 [ 145.172694] [] module_attr_store+0x1d/0x30 [ 145.178443] [] sysfs_kf_write+0x4f/0x70 [ 145.183925] [] kernfs_fop_write+0x149/0x180 [ 145.189761] [] __vfs_write+0x18/0x40 [ 145.194982] [] vfs_write+0xb2/0x1a0 [ 145.200122] [] SyS_write+0x52/0xa0 [ 145.205177] [] entry_SYSCALL_64_fastpath+0x12/0x17 This can be even triggered manually by changing /sys/module/zswap/parameters/compressor multiple times. Fix this issue by making unregister APIs symmetric to the register so there are no surprises. Fixes: 47e627bc8c9a ("[PATCH] hotplug: Allow modules to use the cpu hotplug notifiers even if !CONFIG_HOTPLUG_CPU") Reported-and-tested-by: Yu Zhao Signed-off-by: Michal Hocko Cc: linux-mm@kvack.org Cc: Andrew Morton Cc: Dan Streetman Link: http://lkml.kernel.org/r/20161207135438.4310-1-mhocko@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 15 ++++----------- kernel/cpu.c | 3 +-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d2ca8c38f9c4..3ea9aae2387d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -131,22 +131,16 @@ enum { { .notifier_call = fn, .priority = pri }; \ __register_cpu_notifier(&fn##_nb); \ } -#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) -#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ -#ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); extern void __unregister_cpu_notifier(struct notifier_block *nb); -#else -#ifndef MODULE -extern int register_cpu_notifier(struct notifier_block *nb); -extern int __register_cpu_notifier(struct notifier_block *nb); -#else +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; @@ -156,7 +150,6 @@ static inline int __register_cpu_notifier(struct notifier_block *nb) { return 0; } -#endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 85ff5e26e23b..cd6d1258554e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -223,8 +223,6 @@ static int cpu_notify(unsigned long val, void *v) return __cpu_notify(val, v, -1, NULL); } -#ifdef CONFIG_HOTPLUG_CPU - static void cpu_notify_nofail(unsigned long val, void *v) { BUG_ON(cpu_notify(val, v)); @@ -246,6 +244,7 @@ void __unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(__unregister_cpu_notifier); +#ifdef CONFIG_HOTPLUG_CPU /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id From 5d488dee9236466a23220ad9a881cae7e45efe89 Mon Sep 17 00:00:00 2001 From: David Michael Date: Tue, 29 Nov 2016 11:15:12 -0800 Subject: [PATCH 059/199] crypto: rsa - Add Makefile dependencies to fix parallel builds commit 57891633eeef60e732e045731cf20e50ee80acb4 upstream. Both asn1 headers are included by rsa_helper.c, so rsa_helper.o should explicitly depend on them. Signed-off-by: David Michael Signed-off-by: Herbert Xu Cc: Tuomas Tynkkynen Signed-off-by: Greg Kroah-Hartman --- crypto/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/Makefile b/crypto/Makefile index f7aba923458d..82fbff180ad3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h +$(obj)/rsa_helper.o: $(obj)/rsapubkey-asn1.h $(obj)/rsaprivkey-asn1.h clean-files += rsapubkey-asn1.c rsapubkey-asn1.h clean-files += rsaprivkey-asn1.c rsaprivkey-asn1.h From a34f0e8a2f8976f0bae4962edf2af4dd16cb85f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 15 Dec 2016 08:49:34 -0800 Subject: [PATCH 060/199] Linux 4.4.39 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6876efe0d735..88d26a632bef 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 38 +SUBLEVEL = 39 EXTRAVERSION = NAME = Blurry Fish Butt From 26a380b3ce97e6c3919f7ea576b91b23f7e9728c Mon Sep 17 00:00:00 2001 From: Keun-young Park Date: Mon, 14 Nov 2016 18:25:15 -0800 Subject: [PATCH 061/199] ANDROID: dm verity: add minimum prefetch size - For device like eMMC, it gives better performance to read more hash blocks at a time. - For android, set it to default 128. For other devices, set it to 1 which is the same as now. - saved boot-up time by 300ms in tested device bug: 32246564 Cc: Sami Tolvanen Signed-off-by: Keun-young Park --- drivers/md/Kconfig | 16 ++++++++++++++++ drivers/md/dm-verity-target.c | 9 ++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 6035794bc1f2..3d237a03dab3 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -458,6 +458,21 @@ config DM_VERITY If unsure, say N. +config DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + bool "Prefetch size 128" + +config DM_VERITY_HASH_PREFETCH_MIN_SIZE + int "Verity hash prefetch minimum size" + depends on DM_VERITY + range 1 4096 + default 128 if DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 + default 1 + ---help--- + This sets minimum number of hash blocks to prefetch for dm-verity. + For devices like eMMC, having larger prefetch size like 128 can improve + performance with increased memory consumption for keeping more hashes + in RAM. + config DM_VERITY_FEC bool "Verity forward error correction support" depends on DM_VERITY @@ -510,6 +525,7 @@ config DM_ANDROID_VERITY depends on ASYMMETRIC_KEY_TYPE depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE depends on MD_LINEAR + select DM_VERITY_HASH_PREFETCH_MIN_SIZE_128 ---help--- This device-mapper target is virtually a VERITY target. This target is setup by reading the metadata contents piggybacked diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 9d3d4b297201..c7e97cf6e7fb 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -501,6 +501,7 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; + sector_t prefetch_size; for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -523,8 +524,14 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: + // for emmc, it is more efficient to send bigger read + prefetch_size = max((sector_t)CONFIG_DM_VERITY_HASH_PREFETCH_MIN_SIZE, + hash_block_end - hash_block_start + 1); + if ((hash_block_start + prefetch_size) >= (v->hash_start + v->hash_blocks)) { + prefetch_size = hash_block_end - hash_block_start + 1; + } dm_bufio_prefetch(v->bufio, hash_block_start, - hash_block_end - hash_block_start + 1); + prefetch_size); } kfree(pw); From 3f5ec85c09a09411ccd133c4e0b84527502dba52 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Thu, 8 Dec 2016 14:02:10 +0800 Subject: [PATCH 062/199] sched: fix wrong truncation of walt_avg The result of "__entry->walt_avg = (__entry->demand << 10)" will exceed the range of "unsigned int", which will be truncated and make the trace looks like as follows: UnityMain-4588 [004] 6029.645672: walt_update_history: 4588(UnityMain): runtime 9928307 samples 1 event 4 demand 9928307 walt 157 pelt 870 (hist: 9928307 9604307 8440077 87392 34144328) cpu 4 UnityMain-4588 [004] 6029.653658: walt_update_history: 4588(UnityMain): runtime 10000000 samples 1 event 4 demand 10000000 walt 165 pelt 886 (hist: 10000000 9955691 6549308 64000 34144328) cpu 4 Fix this by using a u64 type instead of unsgined int type and make the trace as below: UnityMain-4617 [004] 117.613558: walt_update_history: 4617(UnityMain): runtime 5770597 samples 1 event 4 demand 7038739 walt 720 pelt 680 (hist: 5770597 7680001 8904509 65596 156) cpu 4 UnityMain-4617 [004] 117.633560: walt_update_history: 4617(UnityMain): runtime 9911238 samples 1 event 4 demand 9911238 walt 1014 pelt 769 (hist: 9911238 5770597 7680001 0 1664188058) cpu 4 Signed-off-by: Ke Wang --- include/trace/events/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index dffaffab4bc8..c18d8c89bd12 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -1044,7 +1044,7 @@ TRACE_EVENT(walt_update_history, __field( int, samples ) __field( int, evt ) __field( u64, demand ) - __field(unsigned int, walt_avg ) + __field( u64, walt_avg ) __field(unsigned int, pelt_avg ) __array( u32, hist, RAVG_HIST_SIZE_MAX) __field( int, cpu ) @@ -1066,7 +1066,7 @@ TRACE_EVENT(walt_update_history, ), TP_printk("%d (%s): runtime %u samples %d event %d demand %llu" - " walt %u pelt %u (hist: %u %u %u %u %u) cpu %d", + " walt %llu pelt %u (hist: %u %u %u %u %u) cpu %d", __entry->pid, __entry->comm, __entry->runtime, __entry->samples, __entry->evt, __entry->demand, From ede03b5d80a61a01217635de00b805c53d87e474 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 6 Dec 2016 11:50:53 +0000 Subject: [PATCH 063/199] sched/walt: kill {min,max}_capacity {min,max}_capacity are static variables that are only updated from __update_min_max_capacity(), but not used anywhere else. Remove them together with the function updating them. This has also the nice side effect of fixing a LOCKDEP warning related to locking all CPUs in update_min_max_capacity(), as reported by Ke Wang: [ 2.853595] c0 ============================================= [ 2.859219] c0 [ INFO: possible recursive locking detected ] [ 2.864852] c0 4.4.6+ #5 Tainted: G W [ 2.869604] c0 --------------------------------------------- [ 2.875230] c0 swapper/0/1 is trying to acquire lock: [ 2.880248] (&rq->lock){-.-.-.}, at: [] cpufreq_notifier_policy+0x2e8/0x37c [ 2.888815] c0 [ 2.888815] c0 but task is already holding lock: [ 2.895132] (&rq->lock){-.-.-.}, at: [] cpufreq_notifier_policy+0x2e8/0x37c [ 2.903700] c0 [ 2.903700] c0 other info that might help us debug this: [ 2.910710] c0 Possible unsafe locking scenario: [ 2.910710] c0 [ 2.917112] c0 CPU0 [ 2.919795] c0 ---- [ 2.922478] lock(&rq->lock); [ 2.925507] lock(&rq->lock); [ 2.928536] c0 [ 2.928536] c0 *** DEADLOCK *** [ 2.928536] c0 [ 2.935200] c0 May be due to missing lock nesting notation [ 2.935200] c0 [ 2.942471] c0 7 locks held by swapper/0/1: [ 2.946623] #0: (&dev->mutex){......}, at: [] __driver_attach+0x64/0xb8 [ 2.954931] #1: (&dev->mutex){......}, at: [] __driver_attach+0x74/0xb8 [ 2.963239] #2: (cpu_hotplug.lock){++++++}, at: [] get_online_cpus+0x48/0xa8 [ 2.971979] #3: (subsys mutex#6){+.+.+.}, at: [] subsys_interface_register+0x44/0xc0 [ 2.981411] #4: (&policy->rwsem){+.+.+.}, at: [] cpufreq_online+0x330/0x76c [ 2.990065] #5: ((cpufreq_policy_notifier_list).rwsem){.+.+..}, at: [] blocking_notifier_call_chain+0x38/0xc4 [ 3.001661] #6: (&rq->lock){-.-.-.}, at: [] cpufreq_notifier_policy+0x2e8/0x37c [ 3.010661] c0 [ 3.010661] c0 stack backtrace: [ 3.015514] c0 CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 4.4.6+ #5 [ 3.022864] c0 Hardware name: Spreadtrum SP9860g Board (DT) [ 3.028402] c0 Call trace: [ 3.031092] c0 [] dump_backtrace+0x0/0x210 [ 3.036716] c0 [] show_stack+0x20/0x28 [ 3.041994] c0 [] dump_stack+0xa8/0xe0 [ 3.047273] c0 [] __lock_acquire+0x1e0c/0x2218 [ 3.053243] c0 [] lock_acquire+0xe0/0x280 [ 3.058784] c0 [] _raw_spin_lock+0x44/0x58 [ 3.064407] c0 [] cpufreq_notifier_policy+0x2e8/0x37c [ 3.070983] c0 [] blocking_notifier_call_chain+0x78/0xc4 [ 3.077820] c0 [] cpufreq_online+0x28c/0x76c [ 3.083618] c0 [] cpufreq_add_dev+0x98/0xdc [ 3.089331] c0 [] subsys_interface_register+0x84/0xc0 [ 3.095907] c0 [] cpufreq_register_driver+0x168/0x28c [ 3.102486] c0 [] sprd_cpufreq_probe+0x134/0x19c [ 3.108629] c0 [] platform_drv_probe+0x58/0xd0 [ 3.114599] c0 [] driver_probe_device+0x1e8/0x470 [ 3.120830] c0 [] __driver_attach+0xb4/0xb8 [ 3.126541] c0 [] bus_for_each_dev+0x6c/0xac [ 3.132339] c0 [] driver_attach+0x2c/0x34 [ 3.137877] c0 [] bus_add_driver+0x210/0x298 [ 3.143676] c0 [] driver_register+0x7c/0x114 [ 3.149476] c0 [] __platform_driver_register+0x60/0x6c [ 3.156139] c0 [] sprd_cpufreq_platdrv_init+0x18/0x20 [ 3.162714] c0 [] do_one_initcall+0xd0/0x1d8 [ 3.168514] c0 [] kernel_init_freeable+0x1fc/0x29c [ 3.174834] c0 [] kernel_init+0x20/0x12c [ 3.180281] c0 [] ret_from_fork+0x10/0x40 Reported-by: Ke Wang Signed-off-by: Juri Lelli --- kernel/sched/walt.c | 45 +-------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 2ffb1680b380..6e053bd9830c 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -62,8 +62,6 @@ static unsigned int max_possible_freq = 1; */ static unsigned int min_max_freq = 1; -static unsigned int max_capacity = 1024; -static unsigned int min_capacity = 1024; static unsigned int max_load_scale_factor = 1024; static unsigned int max_possible_capacity = 1024; @@ -869,39 +867,6 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu) double_rq_unlock(src_rq, dest_rq); } -/* Keep track of max/min capacity possible across CPUs "currently" */ -static void __update_min_max_capacity(void) -{ - int i; - int max = 0, min = INT_MAX; - - for_each_online_cpu(i) { - if (cpu_rq(i)->capacity > max) - max = cpu_rq(i)->capacity; - if (cpu_rq(i)->capacity < min) - min = cpu_rq(i)->capacity; - } - - max_capacity = max; - min_capacity = min; -} - -static void update_min_max_capacity(void) -{ - unsigned long flags; - int i; - - local_irq_save(flags); - for_each_possible_cpu(i) - raw_spin_lock(&cpu_rq(i)->lock); - - __update_min_max_capacity(); - - for_each_possible_cpu(i) - raw_spin_unlock(&cpu_rq(i)->lock); - local_irq_restore(flags); -} - /* * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that * least efficient cpu gets capacity of 1024 @@ -984,15 +949,9 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, /* Initialized to policy->max in case policy->related_cpus is empty! */ unsigned int orig_max_freq = policy->max; - if (val != CPUFREQ_NOTIFY && val != CPUFREQ_REMOVE_POLICY && - val != CPUFREQ_CREATE_POLICY) + if (val != CPUFREQ_NOTIFY) return 0; - if (val == CPUFREQ_REMOVE_POLICY || val == CPUFREQ_CREATE_POLICY) { - update_min_max_capacity(); - return 0; - } - for_each_cpu(i, policy->related_cpus) { cpumask_copy(&cpu_rq(i)->freq_domain_cpumask, policy->related_cpus); @@ -1082,8 +1041,6 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, max_load_scale_factor = highest_mplsf; } - __update_min_max_capacity(); - return 0; } From e5159b936f7237870f6e111b4409181d973a4890 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Thu, 8 Dec 2016 17:06:03 -0800 Subject: [PATCH 064/199] goldfish: enable CONFIG_INET_DIAG_DESTROY Bug: 31648368 Change-Id: I3715cc6474129ba2176be62ed2c0a7d09a6f2ac7 --- arch/arm/configs/ranchu_defconfig | 1 + arch/arm64/configs/ranchu64_defconfig | 1 + arch/x86/configs/i386_ranchu_defconfig | 2 +- arch/x86/configs/x86_64_ranchu_defconfig | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/ranchu_defconfig b/arch/arm/configs/ranchu_defconfig index 35a90af941a4..49e7bbd5825a 100644 --- a/arch/arm/configs/ranchu_defconfig +++ b/arch/arm/configs/ranchu_defconfig @@ -48,6 +48,7 @@ CONFIG_UNIX=y CONFIG_XFRM_USER=y CONFIG_NET_KEY=y CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y diff --git a/arch/arm64/configs/ranchu64_defconfig b/arch/arm64/configs/ranchu64_defconfig index 00eb346e0928..fc55008d8c4c 100644 --- a/arch/arm64/configs/ranchu64_defconfig +++ b/arch/arm64/configs/ranchu64_defconfig @@ -50,6 +50,7 @@ CONFIG_UNIX=y CONFIG_XFRM_USER=y CONFIG_NET_KEY=y CONFIG_INET=y +CONFIG_INET_DIAG_DESTROY=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y diff --git a/arch/x86/configs/i386_ranchu_defconfig b/arch/x86/configs/i386_ranchu_defconfig index 0206eb8cfb61..65ed8c8f8444 100644 --- a/arch/x86/configs/i386_ranchu_defconfig +++ b/arch/x86/configs/i386_ranchu_defconfig @@ -89,7 +89,7 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set +CONFIG_INET_DIAG_DESTROY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y diff --git a/arch/x86/configs/x86_64_ranchu_defconfig b/arch/x86/configs/x86_64_ranchu_defconfig index dd389774bacb..d977bd91e390 100644 --- a/arch/x86/configs/x86_64_ranchu_defconfig +++ b/arch/x86/configs/x86_64_ranchu_defconfig @@ -87,7 +87,7 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set +CONFIG_INET_DIAG_DESTROY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y From 4e4b5185e3b9ba6e4f081ec8320c5f7008c62ac3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Mar 2016 15:10:59 +0100 Subject: [PATCH 065/199] UPSTREAM: arm64: enable CONFIG_DEBUG_RODATA by default (Cherry picked from commit 57efac2f7108e3255d0dfe512290c9896f4ed55f) In spite of its name, CONFIG_DEBUG_RODATA is an important hardening feature for production kernels, and distros all enable it by default in their kernel configs. However, since enabling it used to result in more granular, and thus less efficient kernel mappings, it is not enabled by default for performance reasons. However, since commit 2f39b5f91eb4 ("arm64: mm: Mark .rodata as RO"), the various kernel segments (.text, .rodata, .init and .data) are already mapped individually, and the only effect of setting CONFIG_DEBUG_RODATA is that the existing .text and .rodata mappings are updated late in the boot sequence to have their read-only attributes set, which means that any performance concerns related to enabling CONFIG_DEBUG_RODATA are no longer valid. So from now on, make CONFIG_DEBUG_RODATA default to 'y' Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Acked-by: Kees Cook Signed-off-by: Catalin Marinas Signed-off-by: Amit Pundir --- arch/arm64/Kconfig.debug | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index ab1cb1fc4e3d..d33041614206 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -64,13 +64,13 @@ config DEBUG_SET_MODULE_RONX config DEBUG_RODATA bool "Make kernel text and rodata read-only" + default y help If this is set, kernel text and rodata will be made read-only. This is to help catch accidental or malicious attempts to change the - kernel's executable code. Additionally splits rodata from kernel - text so it can be made explicitly non-executable. + kernel's executable code. - If in doubt, say Y + If in doubt, say Y config DEBUG_ALIGN_RODATA depends on DEBUG_RODATA From 988801b7ebcad81196d1119722ed827ac2bc3208 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Mar 2016 09:46:23 +0200 Subject: [PATCH 066/199] UPSTREAM: efi/arm64: Don't apply MEMBLOCK_NOMAP to UEFI memory map mapping (Cherry picked from commit 7cc8cbcf82d165dd658d89a7a287140948e76413) Commit 4dffbfc48d65 ("arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP") updated the mapping logic of both the RuntimeServices regions as well as the kernel's copy of the UEFI memory map to set the MEMBLOCK_NOMAP flag, which causes these regions to be omitted from the kernel direct mapping, and from being covered by a struct page. For the RuntimeServices regions, this is an obvious win, since the contents of these regions have significance to the firmware executable code itself, and are mapped in the EFI page tables using attributes that are described in the UEFI memory map, and which may differ from the attributes we use for mapping system RAM. It also prevents the contents from being modified inadvertently, since the EFI page tables are only live during runtime service invocations. None of these concerns apply to the allocation that covers the UEFI memory map, since it is entirely owned by the kernel. Setting the MEMBLOCK_NOMAP on the region did allow us to use ioremap_cache() to map it both on arm64 and on ARM, since the latter does not allow ioremap_cache() to be used on regions that are covered by a struct page. The ioremap_cache() on ARM restriction will be lifted in the v4.7 timeframe, but in the mean time, it has been reported that commit 4dffbfc48d65 causes a regression on 64k granule kernels. This is due to the fact that, given the 64 KB page size, the region that we end up removing from the kernel direct mapping is rounded up to 64 KB, and this 64 KB page frame may be shared with the initrd when booting via GRUB (which does not align its EFI_LOADER_DATA allocations to 64 KB like the stub does). This will crash the kernel as soon as it tries to access the initrd. Since the issue is specific to arm64, revert back to memblock_reserve()'ing the UEFI memory map when running on arm64. This is a temporary fix for v4.5 and v4.6, and will be superseded in the v4.7 timeframe when we will be able to move back to memblock_reserve() unconditionally. Fixes: 4dffbfc48d65 ("arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP") Reported-by: Mark Salter Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Cc: Leif Lindholm Cc: Mark Rutland Cc: Jeremy Linton Cc: Mark Langsdorf Cc: # v4.5 Signed-off-by: Matt Fleming Fixes: Change-Id: Ia3ce78f40f8d41a9afdd42238fe9cbfd81bbff08 ("UPSTREAM: arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP") Signed-off-by: Amit Pundir --- drivers/firmware/efi/arm-init.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 9e15d571b53c..a76c35fc0b92 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -203,7 +203,19 @@ void __init efi_init(void) reserve_regions(); early_memunmap(memmap.map, params.mmap_size); - memblock_mark_nomap(params.mmap & PAGE_MASK, - PAGE_ALIGN(params.mmap_size + - (params.mmap & ~PAGE_MASK))); + + if (IS_ENABLED(CONFIG_ARM)) { + /* + * ARM currently does not allow ioremap_cache() to be called on + * memory regions that are covered by struct page. So remove the + * UEFI memory map from the linear mapping. + */ + memblock_mark_nomap(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + + (params.mmap & ~PAGE_MASK))); + } else { + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + + (params.mmap & ~PAGE_MASK))); + } } From 7d69300b5199a80f9d5ca6a8a60b5152d602fee0 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 2 Jan 2017 13:54:18 +0530 Subject: [PATCH 067/199] Revert "ANDROID: net: core: fix UID-based routing" This reverts commit 6acba5b6a1507f6282cbe985da7c0c3276d247d7. Bug: 16355602 Signed-off-by: Amit Pundir --- include/uapi/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index a2fad11894ff..78e6e348626b 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -306,12 +306,12 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, - RTA_UID, RTA_VIA, RTA_NEWDST, RTA_PREF, RTA_ENCAP_TYPE, RTA_ENCAP, + RTA_UID, __RTA_MAX }; From 9392770aa38bd8dce937c49440d161168794b55a Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 2 Jan 2017 13:54:49 +0530 Subject: [PATCH 068/199] Revert "ANDROID: net: fib: remove duplicate assignment" This reverts commit 439bce3122de3963d6911c8514fd49b0fadfe989. Bug: 16355602 Signed-off-by: Amit Pundir --- include/net/fib_rules.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index bdd985f41022..55b5419cb6a7 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -89,6 +89,7 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ + [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_UID_START] = { .type = NLA_U32 }, \ [FRA_UID_END] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ From 82518b9a3f2778f83583cab8facf19f886b71e1a Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 2 Jan 2017 14:02:23 +0530 Subject: [PATCH 069/199] Revert "ANDROID: net: fix 'const' warnings" This reverts commit 4f27b251f23643c115fce7e36f31268deed6e71b. Bug: 16355602 Signed-off-by: Amit Pundir --- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv6/inet6_connection_sock.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 030cd09dd2a2..759f90e1e499 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -420,7 +420,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid((struct sock *)sk)); + htons(ireq->ir_num), sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -457,7 +457,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid((struct sock *)sk)); + htons(ireq->ir_num), sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 897bb6eb5751..7b214652768e 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -86,7 +86,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); - fl6->flowi6_uid = sock_i_uid((struct sock *)sk); + fl6->flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); From 3048a60ec721f64d09906295ec6f11b0b063d737 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 2 Jan 2017 14:04:56 +0530 Subject: [PATCH 070/199] Revert "net: core: Handle 'sk' being NULL in UID-based routing" This reverts commit ad493510385ee040516bf83a60e6c4921fcdfdac. Bug: 16355602 Signed-off-by: Amit Pundir --- include/net/route.h | 3 +-- net/ipv4/route.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index d016a8cb45cf..bf6fe1a6527e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -154,8 +154,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport, - sk ? sock_i_uid(sk) : GLOBAL_ROOT_UID); + daddr, saddr, dport, sport, sock_i_uid(sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f75b5658a3a0..de51d32d6af8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -518,7 +518,7 @@ static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, RT_SCOPE_UNIVERSE, prot, flow_flags, iph->daddr, iph->saddr, 0, 0, - sk ? sock_i_uid(sk) : GLOBAL_ROOT_UID); + sock_i_uid(sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, From 823e9056c3126b809f0e7764951ee8be08212c2e Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 2 Jan 2017 14:05:24 +0530 Subject: [PATCH 071/199] Revert "net: core: Support UID-based routing." This reverts commit fd2cf795f3ab193752781be7372949ac1780d0ed. Bug: 16355602 Change-Id: I1ec2d1eb3d53f4186b60c6ca5d6a20fcca46d442 Signed-off-by: Amit Pundir --- include/net/fib_rules.h | 5 --- include/net/flow.h | 9 +----- include/net/ip.h | 1 - include/net/ip6_route.h | 2 +- include/net/route.h | 5 ++- include/uapi/linux/fib_rules.h | 2 -- include/uapi/linux/rtnetlink.h | 1 - net/core/fib_rules.c | 53 ++------------------------------ net/ipv4/fib_frontend.c | 1 - net/ipv4/inet_connection_sock.c | 4 +-- net/ipv4/ip_output.c | 3 +- net/ipv4/ping.c | 3 +- net/ipv4/raw.c | 3 +- net/ipv4/route.c | 25 ++++----------- net/ipv4/syncookies.c | 5 ++- net/ipv4/udp.c | 3 +- net/ipv6/af_inet6.c | 1 - net/ipv6/ah6.c | 2 +- net/ipv6/datagram.c | 1 - net/ipv6/esp6.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 -- net/ipv6/ipcomp6.c | 2 +- net/ipv6/ping.c | 1 - net/ipv6/raw.c | 1 - net/ipv6/route.c | 11 ++----- net/ipv6/syncookies.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - 29 files changed, 26 insertions(+), 127 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 55b5419cb6a7..59160de702b6 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -29,8 +29,6 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; - kuid_t uid_start; - kuid_t uid_end; struct rcu_head rcu; }; @@ -89,9 +87,6 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_UID_START] = { .type = NLA_U32 }, \ - [FRA_UID_END] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 } diff --git a/include/net/flow.h b/include/net/flow.h index 833080732dec..83969eebebf3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,7 +11,6 @@ #include #include #include -#include /* * ifindex generation is per-net namespace, and loopback is @@ -39,7 +38,6 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; - kuid_t flowic_uid; }; union flowi_uli { @@ -77,7 +75,6 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key -#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -97,8 +94,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport, - kuid_t uid) + __be16 dport, __be16 sport) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -109,7 +105,6 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; - fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -138,7 +133,6 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key -#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -183,7 +177,6 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key -#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/ip.h b/include/net/ip.h index f78c3a52529b..b450d8653b30 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -170,7 +170,6 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; - kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ba82feec2590..295d291269e2 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -116,7 +116,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark, kuid_t uid); + u32 mark); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, diff --git a/include/net/route.h b/include/net/route.h index bf6fe1a6527e..a3b9ef74a389 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -154,7 +154,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport, sock_i_uid(sk)); + daddr, saddr, dport, sport); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -267,8 +267,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport, - sock_i_uid(sk)); + protocol, flow_flags, dst, src, dport, sport); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index ce19c5bf51f7..96161b8202b5 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -49,8 +49,6 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, - FRA_UID_START, /* UID range */ - FRA_UID_END, __FRA_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 78e6e348626b..fa3b34365560 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -311,7 +311,6 @@ enum rtattr_type_t { RTA_PREF, RTA_ENCAP_TYPE, RTA_ENCAP, - RTA_UID, __RTA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3fbd839f6d20..365de66436ac 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -33,8 +33,6 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; - r->uid_start = INVALID_UID; - r->uid_end = INVALID_UID; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -174,23 +172,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); -static inline kuid_t fib_nl_uid(struct nlattr *nla) -{ - return make_kuid(current_user_ns(), nla_get_u32(nla)); -} - -static int nla_put_uid(struct sk_buff *skb, int idx, kuid_t uid) -{ - return nla_put_u32(skb, idx, from_kuid_munged(current_user_ns(), uid)); -} - -static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) -{ - return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || - (uid_gte(fl->flowi_uid, rule->uid_start) && - uid_lte(fl->flowi_uid, rule->uid_end)); -} - static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -208,9 +189,6 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id)) goto out; - if (!fib_uid_range_match(fl, rule)) - goto out; - ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -393,19 +371,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) } else if (rule->action == FR_ACT_GOTO) goto errout_free; - /* UID start and end must either both be valid or both unspecified. */ - rule->uid_start = rule->uid_end = INVALID_UID; - if (tb[FRA_UID_START] || tb[FRA_UID_END]) { - if (tb[FRA_UID_START] && tb[FRA_UID_END]) { - rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); - rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); - } - if (!uid_valid(rule->uid_start) || - !uid_valid(rule->uid_end) || - !uid_lte(rule->uid_start, rule->uid_end)) - goto errout_free; - } - err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -518,14 +483,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID]))) continue; - if (tb[FRA_UID_START] && - !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) - continue; - - if (tb[FRA_UID_END] && - !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) - continue; - if (!ops->compare(rule, frh, tb)) continue; @@ -592,9 +549,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size(8) /* FRA_TUN_ID */ - + nla_total_size(4) /* FRA_UID_START */ - + nla_total_size(4); /* FRA_UID_END */ + + nla_total_size(8); /* FRA_TUN_ID */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -652,11 +607,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->target && nla_put_u32(skb, FRA_GOTO, rule->target)) || (rule->tun_id && - nla_put_be64(skb, FRA_TUN_ID, rule->tun_id)) || - (uid_valid(rule->uid_start) && - nla_put_uid(skb, FRA_UID_START, rule->uid_start)) || - (uid_valid(rule->uid_end) && - nla_put_uid(skb, FRA_UID_END, rule->uid_end))) + nla_put_be64(skb, FRA_TUN_ID, rule->tun_id))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 98c754e61024..63566ec54794 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -627,7 +627,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, - [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 759f90e1e499..64148914803a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -420,7 +420,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid(sk)); + htons(ireq->ir_num)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -457,7 +457,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sock_i_uid(sk)); + htons(ireq->ir_num)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 661bda968594..2b7283303650 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1580,8 +1580,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest, - arg->uid); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index cd2cf3515c66..23160d2b3f71 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -795,8 +795,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, - sock_i_uid(sk)); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9b479a1c4a0..7113bae4e6a0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -601,8 +601,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0, - sock_i_uid(sk)); + daddr, saddr, 0, 0); if (!saddr && ipc.oif) { err = l3mdev_get_saddr(net, ipc.oif, &fl4); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index de51d32d6af8..7ceb8a574a50 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -501,7 +501,7 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, +static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -517,12 +517,11 @@ static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0, - sock_i_uid(sk)); + iph->daddr, iph->saddr, 0, 0); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, - struct sock *sk) + const struct sock *sk) { const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; @@ -533,7 +532,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); } -static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) +static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; @@ -547,12 +546,11 @@ static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0, - sock_i_uid(sk)); + daddr, inet->inet_saddr, 0, 0); rcu_read_unlock(); } -static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, +static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct sk_buff *skb) { if (skb) @@ -2488,11 +2486,6 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; - if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && - nla_put_u32(skb, RTA_UID, - from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) - goto nla_put_failure; - error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2545,7 +2538,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; - kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2573,10 +2565,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; - if (tb[RTA_UID]) - uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); - else - uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2584,7 +2572,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; - fl4.flowi4_uid = uid; if (netif_index_is_l3_master(net, fl4.flowi4_oif)) fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 31b6a4c9db32..4cbe9f0a4281 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -374,9 +374,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest, - sock_i_uid(sk)); + opt->srr ? opt->faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 381a035fcfa1..91108c67fdbc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1025,8 +1025,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport, - sock_i_uid(sk)); + faddr, saddr, dport, inet->inet_sport); if (!saddr && ipc.oif) { err = l3mdev_get_saddr(net, ipc.oif, fl4); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d9b25bd17bf1..9dbfacb6e0d9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -678,7 +678,6 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index c52b8fc904c9..0630a4d5daaa 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -664,7 +664,7 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 183ff87dacf3..428162155280 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -161,7 +161,6 @@ ipv4_connected: fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 17df6bbebcff..111ba55fd512 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -476,7 +476,7 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 41e5c9520c7d..3697cd08c515 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,7 +92,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7b214652768e..a7ca2cde2ecb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -86,7 +86,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); - fl6->flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -135,7 +134,6 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; - fl6->flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index b247baceb797..1b9316e1386a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -76,7 +76,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index fa65e92e9510..265bcdcf29c7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -141,7 +141,6 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; - fl6.flowi6_uid = sock_i_uid(sk); fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d9ad71a01b4c..99140986e887 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -768,7 +768,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; - fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 46476a3af2ad..614e6a771887 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1382,7 +1382,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark, kuid_t uid) + int oif, u32 mark) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1394,7 +1394,6 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1406,7 +1405,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk)); + sk->sk_bound_dev_if, sk->sk_mark); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -2687,7 +2686,6 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PREF] = { .type = NLA_U8 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, - [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3251,11 +3249,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); - if (tb[RTA_UID]) - fl6.flowi6_uid = make_kuid(current_user_ns(), - nla_get_u32(tb[RTA_UID])); - else - fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a22015fab95e..eaf7ac496d50 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -228,7 +228,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 50e71e784e85..7c09f3df537c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -234,7 +234,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sock_i_uid(sk); opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); final_p = fl6_update_dst(&fl6, opt, &final); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1207379c1cce..7888468906c1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1244,7 +1244,6 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; - fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; From 04ac0fa0d1b557a15aa51fd1dad004236b28c470 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:41 +0900 Subject: [PATCH 072/199] net: core: Add a UID field to struct sock. Protocol sockets (struct sock) don't have UIDs, but most of the time, they map 1:1 to userspace sockets (struct socket) which do. Various operations such as the iptables xt_owner match need access to the "UID of a socket", and do so by following the backpointer to the struct socket. This involves taking sk_callback_lock and doesn't work when there is no socket because userspace has already called close(). Simplify this by adding a sk_uid field to struct sock whose value matches the UID of the corresponding struct socket. The semantics are as follows: 1. Whenever sk_socket is non-null: sk_uid is the same as the UID in sk_socket, i.e., matches the return value of sock_i_uid. Specifically, the UID is set when userspace calls socket(), fchown(), or accept(). 2. When sk_socket is NULL, sk_uid is defined as follows: - For a socket that no longer has a sk_socket because userspace has called close(): the previous UID. - For a cloned socket (e.g., an incoming connection that is established but on which userspace has not yet called accept): the UID of the socket it was cloned from. - For a socket that has never had an sk_socket: UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. Kernel sockets created by sock_create_kern are a special case of #1 and sk_uid is the user that created them. For kernel sockets created at network namespace creation time, such as the per-processor ICMP and TCP sockets, this is the user that created the network namespace. Bug: 16355602 Change-Id: Idbc3e9a0cec91c4c6e01916b967b6237645ebe59 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/sock.h | 7 +++++++ net/core/sock.c | 5 ++++- net/socket.c | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8f77df63a8f4..62c9e2268d63 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -446,6 +446,7 @@ struct sock { void *sk_security; #endif __u32 sk_mark; + kuid_t sk_uid; #ifdef CONFIG_CGROUP_NET_CLASSID u32 sk_classid; #endif @@ -1692,6 +1693,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1699,6 +1701,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); diff --git a/net/core/sock.c b/net/core/sock.c index f4c0917e66b5..f367df38c264 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2384,8 +2384,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/socket.c b/net/socket.c index 0090225eeb1e..66d984ac2991 100644 --- a/net/socket.c +++ b/net/socket.c @@ -520,9 +520,23 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .getxattr = sockfs_getxattr, .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** From a16373f493e5d32f63188a9781a904bd1741e628 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:42 +0900 Subject: [PATCH 073/199] net: core: add UID to flows, rules, and routes - Define a new FIB rule attributes, FRA_UID_RANGE, to describe a range of UIDs. - Define a RTA_UID attribute for per-UID route lookups and dumps. - Support passing these attributes to and from userspace via rtnetlink. The value INVALID_UID indicates no UID was specified. - Add a UID field to the flow structures. Bug: 16355602 Change-Id: Iea98e6fedd0fd4435a1f4efa3deb3629505619ab Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 ++++- include/net/flow.h | 5 +++ include/uapi/linux/fib_rules.h | 8 ++++ include/uapi/linux/rtnetlink.h | 3 ++ net/core/fib_rules.c | 72 +++++++++++++++++++++++++++++++++- net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 11 ++++++ net/ipv6/route.c | 7 ++++ 8 files changed, 114 insertions(+), 2 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 59160de702b6..bd2b5c007561 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include #include +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -29,6 +34,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -89,7 +95,8 @@ struct fib_rules_ops { [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 83969eebebf3..8913962d7d25 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * ifindex generation is per-net namespace, and loopback is @@ -38,6 +39,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -75,6 +77,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -133,6 +136,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -177,6 +181,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 96161b8202b5..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -49,6 +54,9 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_PAD, + FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index fa3b34365560..d7d7599b112a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -311,6 +311,9 @@ enum rtattr_type_t { RTA_PREF, RTA_ENCAP_TYPE, RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 365de66436ac..cb744a352167 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include #include +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int uid_range_set(struct fib_kuid_range *range) +{ + return uid_valid(range->start) && uid_valid(range->end); +} + +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) +{ + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; +} + +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) +{ + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -189,6 +223,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id)) goto out; + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -371,6 +409,21 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -432,6 +485,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -451,6 +505,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -483,6 +545,11 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID]))) continue; + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -550,6 +617,7 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size(8); /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -607,7 +675,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->target && nla_put_u32(skb, FRA_GOTO, rule->target)) || (rule->tun_id && - nla_put_be64(skb, FRA_TUN_ID, rule->tun_id))) + nla_put_be64(skb, FRA_TUN_ID, rule->tun_id)) || + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 63566ec54794..98c754e61024 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -627,6 +627,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7ceb8a574a50..d1cff08f1580 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2486,6 +2486,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2538,6 +2543,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2565,6 +2571,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2572,6 +2582,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (netif_index_is_l3_master(net, fl4.flowi4_oif)) fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 614e6a771887..4bca900f6299 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2686,6 +2686,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PREF] = { .type = NLA_U8 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3249,6 +3250,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; From 06d600f737420eb3e3a6f2e6e2eb9a9ea7d3a783 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Mon, 2 Jan 2017 14:09:58 +0530 Subject: [PATCH 074/199] Revert "net: ipv6: fix virtual tunneling build" This reverts commit 04f86f74f217b48cf3c24bbfdf66dec38d4d1e23. Bug: 16355602 Signed-off-by: Amit Pundir --- net/ipv6/ip6_vti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c76ebc7fc52d..0a8610b33d79 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -599,7 +599,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else - ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); return 0; From a8e959ae3047521fb94a01c2443cd52e64b51e22 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:43 +0900 Subject: [PATCH 075/199] net: inet: Support UID-based routing in IP protocols. - Use the UID in routing lookups made by protocol connect() and sendmsg() functions. - Make sure that routing lookups triggered by incoming packets (e.g., Path MTU discovery) take the UID of the socket into account. - For packets not associated with a userspace socket, (e.g., ping replies) use UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. This allows all namespaces to apply routing and iptables rules to kernel-originated traffic in that namespaces by matching UID 0. This is better than using the UID of the kernel socket that is sending the traffic, because the UID of kernel sockets created at namespace creation time (e.g., the per-processor ICMP and TCP sockets) is the UID of the user that created the socket, which might not be mapped in the namespace. Bug: 16355602 Change-Id: I910504b508948057912bc188fd1e8aca28294de3 Tested: compiles allnoconfig, allyesconfig, allmodconfig Tested: https://android-review.googlesource.com/253302 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller [AmitP: A little bit of refactoring because of LTS cherry-pick commit e5abc10d19b9 ("tcp: fix NULL deref in tcp_v4_send_ack()") Signed-off-by: Amit Pundir --- include/net/flow.h | 4 +++- include/net/ip.h | 1 + include/net/ip6_route.h | 5 +++-- include/net/route.h | 5 +++-- net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 26 +++++++++++++++----------- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_ipv4.c | 9 ++++++--- net/ipv4/udp.c | 3 ++- net/ipv6/af_inet6.c | 1 + net/ipv6/ah6.c | 5 +++-- net/ipv6/datagram.c | 1 + net/ipv6/esp6.c | 5 +++-- net/ipv6/icmp.c | 7 +++++-- net/ipv6/inet6_connection_sock.c | 2 ++ net/ipv6/ip6_gre.c | 4 ++++ net/ipv6/ip6_tunnel.c | 3 +++ net/ipv6/ip6_vti.c | 5 +++-- net/ipv6/ipcomp6.c | 5 +++-- net/ipv6/netfilter.c | 1 + net/ipv6/ping.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/route.c | 13 +++++++++---- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 2 ++ net/ipv6/udp.c | 1 + net/l2tp/l2tp_ip6.c | 1 + 31 files changed, 88 insertions(+), 40 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 8913962d7d25..833080732dec 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -97,7 +97,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -108,6 +109,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; diff --git a/include/net/ip.h b/include/net/ip.h index b450d8653b30..f78c3a52529b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -170,6 +170,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 295d291269e2..af0e8c081191 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -116,9 +116,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/route.h b/include/net/route.h index a3b9ef74a389..3adb9c724818 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -154,7 +154,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -267,7 +267,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 36e26977c908..ef2d4322aba7 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 64148914803a..c97a2108cd61 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -420,7 +420,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -457,7 +457,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2b7283303650..661bda968594 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1580,7 +1580,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 23160d2b3f71..495fefe6a898 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -795,7 +795,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7113bae4e6a0..6287418c1dfe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -601,7 +601,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!saddr && ipc.oif) { err = l3mdev_get_saddr(net, ipc.oif, &fl4); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d1cff08f1580..3c798d01780f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -501,7 +501,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -517,7 +518,8 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, @@ -529,7 +531,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -546,7 +548,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -796,7 +798,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1014,7 +1016,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1030,7 +1032,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1049,6 +1051,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1062,7 +1065,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1102,7 +1105,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1117,9 +1120,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4cbe9f0a4281..2dc982b15df8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -375,7 +375,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest); + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b6a48d4a3794..25309b137c43 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -687,6 +687,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.bound_dev_if = sk->sk_bound_dev_if; arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, @@ -708,7 +709,7 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -723,6 +724,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -772,6 +774,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, @@ -785,7 +788,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -813,7 +816,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 91108c67fdbc..ad3d1534c524 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1025,7 +1025,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); if (!saddr && ipc.oif) { err = l3mdev_get_saddr(net, ipc.oif, fl4); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9dbfacb6e0d9..1604163c2850 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -678,6 +678,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..189eb10b742d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 428162155280..cf2dfb222230 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -161,6 +161,7 @@ ipv4_connected: fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 111ba55fd512..cbcdd5db31f4 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3697cd08c515..3ae2fbe07b25 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -478,6 +479,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -585,6 +587,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a7ca2cde2ecb..dc79ebc14189 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -86,6 +86,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -134,6 +135,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 17430f341073..eba61b42cd42 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -794,6 +794,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ @@ -844,6 +846,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2994d1f1a661..c9bd1ee1f145 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1203,6 +1203,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = ipv4_get_dsfield(iph); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -1256,6 +1258,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0a8610b33d79..24fb9c0efd00 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -597,9 +597,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 265bcdcf29c7..1737fc0f2988 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -141,6 +141,7 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 99140986e887..58eb8ee19f34 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -768,6 +768,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4bca900f6299..3c5d6bb3b850 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1382,7 +1382,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1394,6 +1394,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1405,7 +1406,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -1486,7 +1487,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1499,6 +1501,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1520,6 +1523,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1528,7 +1532,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index eaf7ac496d50..336843ca4e6b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -228,6 +228,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7c09f3df537c..8ed00c8a128a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -234,6 +234,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -813,6 +814,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7888468906c1..156a13c7ada8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1244,6 +1244,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 3c4f867d3633..0289208b0346 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -518,6 +518,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; if (lsa) { if (addr_len < SIN6_LEN_RFC2133) From 6eca888614710c57ea7a3327d15db218e4960af9 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 30 Nov 2016 02:56:47 +0900 Subject: [PATCH 076/199] net: ipv4: Don't crash if passing a null sk to ip_rt_update_pmtu. Commit e2d118a1cb5e ("net: inet: Support UID-based routing in IP protocols.") made __build_flow_key call sock_net(sk) to determine the network namespace of the passed-in socket. This crashes if sk is NULL. Fix this by getting the network namespace from the skb instead. Bug: 16355602 Change-Id: I27161b70f448bb95adce3994a97920d54987ce4e Fixes: e2d118a1cb5e ("net: inet: Support UID-based routing in IP protocols.") Reported-by: Erez Shitrit Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3c798d01780f..3708ff083211 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -525,13 +525,14 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, const struct sock *sk) { + const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) From af5c611fce52bc7c06ad3b1e45b9fd3139afd382 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Tue, 20 Dec 2016 11:08:34 -0800 Subject: [PATCH 077/199] ANDROID: android-base: Enable QUOTA related configs Bug: 33757366 Change-Id: Iec4f55c3ca4a16dbc8695054f481d9261c56d0f6 --- android/configs/android-base.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 8531a7a79e33..f10371a981b7 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -139,7 +139,11 @@ CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PREEMPT=y CONFIG_PROFILING=y +CONFIG_QFMT_V2=y CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QUOTA_TREE=y +CONFIG_QUOTACTL=y CONFIG_RANDOMIZE_BASE=y CONFIG_RTC_CLASS=y CONFIG_RT_GROUP_SCHED=y From 282b308ad3b60ef0a55ff8b1ef6f16dadcede935 Mon Sep 17 00:00:00 2001 From: mukesh agrawal Date: Tue, 12 Jul 2016 11:28:05 -0700 Subject: [PATCH 078/199] ANDROID: trace: net: use %pK for kernel pointers We want to use network trace events in production builds, to help diagnose Wifi problems. However, we don't want to expose raw kernel pointers in such builds. Change the format specifier for the skbaddr field, so that, if kptr_restrict is enabled, the pointers will be reported as 0. Bug: 30090733 Change-Id: Ic4bd583d37af6637343601feca875ee24479ddff Signed-off-by: mukesh agrawal --- include/trace/events/net.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 49cc7c3de252..89d009e10938 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -57,7 +57,7 @@ TRACE_EVENT(net_dev_start_xmit, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", + TP_printk("dev=%s queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", __get_str(name), __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, __entry->len, @@ -90,7 +90,7 @@ TRACE_EVENT(net_dev_xmit, __assign_str(name, dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + TP_printk("dev=%s skbaddr=%pK len=%u rc=%d", __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(net_dev_template, __assign_str(name, skb->dev->name); ), - TP_printk("dev=%s skbaddr=%p len=%u", + TP_printk("dev=%s skbaddr=%pK len=%u", __get_str(name), __entry->skbaddr, __entry->len) ) @@ -191,7 +191,7 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, __entry->gso_type = skb_shinfo(skb)->gso_type; ), - TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%pK vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d hash=0x%08x l4_hash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", __get_str(name), __entry->napi_id, __entry->queue_mapping, __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, __entry->protocol, __entry->ip_summed, From 0d619cf6f982e67631c583c1e1457c331d911519 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Mon, 12 Dec 2016 14:32:44 -0800 Subject: [PATCH 079/199] btrfs: limit async_work allocation and worker func duration commit 2939e1a86f758b55cdba73e29397dd3d94df13bc upstream. Problem statement: unprivileged user who has read-write access to more than one btrfs subvolume may easily consume all kernel memory (eventually triggering oom-killer). Reproducer (./mkrmdir below essentially loops over mkdir/rmdir): [root@kteam1 ~]# cat prep.sh DEV=/dev/sdb mkfs.btrfs -f $DEV mount $DEV /mnt for i in `seq 1 16` do mkdir /mnt/$i btrfs subvolume create /mnt/SV_$i ID=`btrfs subvolume list /mnt |grep "SV_$i$" |cut -d ' ' -f 2` mount -t btrfs -o subvolid=$ID $DEV /mnt/$i chmod a+rwx /mnt/$i done [root@kteam1 ~]# sh prep.sh [maxim@kteam1 ~]$ for i in `seq 1 16`; do ./mkrmdir /mnt/$i 2000 2000 & done [root@kteam1 ~]# for i in `seq 1 4`; do grep "kmalloc-128" /proc/slabinfo | grep -v dma; sleep 60; done kmalloc-128 10144 10144 128 32 1 : tunables 0 0 0 : slabdata 317 317 0 kmalloc-128 9992352 9992352 128 32 1 : tunables 0 0 0 : slabdata 312261 312261 0 kmalloc-128 24226752 24226752 128 32 1 : tunables 0 0 0 : slabdata 757086 757086 0 kmalloc-128 42754240 42754240 128 32 1 : tunables 0 0 0 : slabdata 1336070 1336070 0 The huge numbers above come from insane number of async_work-s allocated and queued by btrfs_wq_run_delayed_node. The problem is caused by btrfs_wq_run_delayed_node() queuing more and more works if the number of delayed items is above BTRFS_DELAYED_BACKGROUND. The worker func (btrfs_async_run_delayed_root) processes at least BTRFS_DELAYED_BATCH items (if they are present in the list). So, the machinery works as expected while the list is almost empty. As soon as it is getting bigger, worker func starts to process more than one item at a time, it takes longer, and the chances to have async_works queued more than needed is getting higher. The problem above is worsened by another flaw of delayed-inode implementation: if async_work was queued in a throttling branch (number of items >= BTRFS_DELAYED_WRITEBACK), corresponding worker func won't quit until the number of items < BTRFS_DELAYED_BACKGROUND / 2. So, it is possible that the func occupies CPU infinitely (up to 30sec in my experiments): while the func is trying to drain the list, the user activity may add more and more items to the list. The patch fixes both problems in straightforward way: refuse queuing too many works in btrfs_wq_run_delayed_node and bail out of worker func if at least BTRFS_DELAYED_WRITEBACK items are processed. Changed in v2: remove support of thresh == NO_THRESHOLD. Signed-off-by: Maxim Patlasov Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/async-thread.c | 14 ++++++++++++++ fs/btrfs/async-thread.h | 1 + fs/btrfs/delayed-inode.c | 6 ++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 9aba42b78253..a09264d8b853 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *arg) \ normal_work_helper(work); \ } +bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq) +{ + /* + * We could compare wq->normal->pending with num_online_cpus() + * to support "thresh == NO_THRESHOLD" case, but it requires + * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's + * postpone it until someone needs the support of that case. + */ + if (wq->normal->thresh == NO_THRESHOLD) + return false; + + return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2; +} + BTRFS_WORK_HELPER(worker_helper); BTRFS_WORK_HELPER(delalloc_helper); BTRFS_WORK_HELPER(flush_delalloc_helper); diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index ad4d0647d1a6..8e1d6576d764 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workqueue *wq, void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); void btrfs_set_work_high_priority(struct btrfs_work *work); +bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq); #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 02b934d0ee65..09fa5af9782e 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1375,7 +1375,8 @@ release_path: total_done++; btrfs_release_prepared_delayed_node(delayed_node); - if (async_work->nr == 0 || total_done < async_work->nr) + if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || + total_done < async_work->nr) goto again; free_path: @@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, { struct btrfs_async_delayed_work *async_work; - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) + if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND || + btrfs_workqueue_normal_congested(fs_info->delayed_workers)) return 0; async_work = kmalloc(sizeof(*async_work), GFP_NOFS); From 919b74ba5ed5d7d27ff1e70b13e7952a0bc86f22 Mon Sep 17 00:00:00 2001 From: Robbie Ko Date: Fri, 7 Oct 2016 17:30:47 +0800 Subject: [PATCH 080/199] Btrfs: fix tree search logic when replaying directory entry deletes commit 2a7bf53f577e49c43de4ffa7776056de26db65d9 upstream. If a log tree has a layout like the following: leaf N: ... item 240 key (282 DIR_LOG_ITEM 0) itemoff 8189 itemsize 8 dir log end 1275809046 leaf N + 1: item 0 key (282 DIR_LOG_ITEM 3936149215) itemoff 16275 itemsize 8 dir log end 18446744073709551615 ... When we pass the value 1275809046 + 1 as the parameter start_ret to the function tree-log.c:find_dir_range() (done by replay_dir_deletes()), we end up with path->slots[0] having the value 239 (points to the last item of leaf N, item 240). Because the dir log item in that position has an offset value smaller than *start_ret (1275809046 + 1) we need to move on to the next leaf, however the logic for that is wrong since it compares the current slot to the number of items in the leaf, which is smaller and therefore we don't lookup for the next leaf but instead we set the slot to point to an item that does not exist, at slot 240, and we later operate on that slot which has unexpected content or in the worst case can result in an invalid memory access (accessing beyond the last page of leaf N's extent buffer). So fix the logic that checks when we need to lookup at the next leaf by first incrementing the slot and only after to check if that slot is beyond the last item of the current leaf. Signed-off-by: Robbie Ko Reviewed-by: Filipe Manana Fixes: e02119d5a7b4 (Btrfs: Add a write ahead tree log to optimize synchronous operations) Signed-off-by: Filipe Manana [Modified changelog for clarity and correctness] Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f7441193bf35..ee7832e2d39d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struct btrfs_root *root, next: /* check the next slot in the tree to see if it is a valid item */ nritems = btrfs_header_nritems(path->nodes[0]); + path->slots[0]++; if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) goto out; - } else { - path->slots[0]++; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); From b5e715ed11ec7b759fe72c1ecc05a9b0850045ec Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 1 Nov 2016 14:21:23 +0100 Subject: [PATCH 081/199] btrfs: store and load values of stripes_min/stripes_max in balance status item commit ed0df618b1b06d7431ee4d985317fc5419a5d559 upstream. The balance status item contains currently known filter values, but the stripes filter was unintentionally not among them. This would mean, that interrupted and automatically restarted balance does not apply the stripe filters. Fixes: dee32d0ac3719ef8d640efaf0884111df444730f Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1391f72c28c3..e847573c6db0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3070,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, cpu->target = le64_to_cpu(disk->target); cpu->flags = le64_to_cpu(disk->flags); cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); } static inline void @@ -3088,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, disk->target = cpu_to_le64(cpu->target); disk->flags = cpu_to_le64(cpu->flags); disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); } /* struct btrfs_super_block */ From 1f5adadcd03266a461ebe752964c40a3dc95bf2f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 24 Nov 2016 02:09:04 +0000 Subject: [PATCH 082/199] Btrfs: fix qgroup rescan worker initialization commit 8d9eddad19467b008e0c881bc3133d7da94b7ec1 upstream. We were setting the qgroup_rescan_running flag to true only after the rescan worker started (which is a task run by a queue). So if a user space task starts a rescan and immediately after asks to wait for the rescan worker to finish, this second call might happen before the rescan worker task starts running, in which case the rescan wait ioctl returns immediatley, not waiting for the rescan worker to finish. This was making the fstest btrfs/022 fail very often. Fixes: d2c609b834d6 (btrfs: properly track when rescan worker is running) Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bcc965ed5fa1..88d9b66e2207 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2283,10 +2283,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; - mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = true; - mutex_unlock(&fs_info->qgroup_rescan_lock); - path = btrfs_alloc_path(); if (!path) goto out; @@ -2397,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, sizeof(fs_info->qgroup_rescan_progress)); fs_info->qgroup_rescan_progress.objectid = progress_objectid; init_completion(&fs_info->qgroup_rescan_completion); + fs_info->qgroup_rescan_running = true; spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); From 17907f291779d55bcbcf829061261c7d49be175a Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 1 Dec 2016 16:38:39 +0100 Subject: [PATCH 083/199] USB: serial: option: add support for Telit LE922A PIDs 0x1040, 0x1041 commit 5b09eff0c379002527ad72ea5ea38f25da8a8650 upstream. This patch adds support for PIDs 0x1040, 0x1041 of Telit LE922A. Since the interface positions are the same than the ones used for other Telit compositions, previous defined blacklists are used. Signed-off-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9894e341c6ac..33ae20392398 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -268,6 +268,8 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_CC864_SINGLE 0x1006 #define TELIT_PRODUCT_DE910_DUAL 0x1010 #define TELIT_PRODUCT_UE910_V2 0x1012 +#define TELIT_PRODUCT_LE922_USBCFG1 0x1040 +#define TELIT_PRODUCT_LE922_USBCFG2 0x1041 #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 @@ -1210,6 +1212,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), + .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG2), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), From 6a6e113cd9284ce58d4318d60dda212ee0dbda30 Mon Sep 17 00:00:00 2001 From: Giuseppe Lippolis Date: Tue, 6 Dec 2016 21:24:19 +0100 Subject: [PATCH 084/199] USB: serial: option: add dlink dwm-158 commit d8a12b7117b42fd708f1e908498350232bdbd5ff upstream. Adding registration for 3G modem DWM-158 in usb-serial-option Signed-off-by: Giuseppe Lippolis Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 33ae20392398..7ce31a4c7e7f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1995,6 +1995,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ From 5d6a392b6d36537dfc7bb3399f5efb3af78a3358 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 29 Nov 2016 16:55:01 +0100 Subject: [PATCH 085/199] USB: serial: kl5kusb105: fix open error path commit 6774d5f53271d5f60464f824748995b71da401ab upstream. Kill urbs and disable read before returning from open on failure to retrieve the line state. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/kl5kusb105.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index e020ad28a00c..53c90131764d 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -296,7 +296,7 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) rc = usb_serial_generic_open(tty, port); if (rc) { retval = rc; - goto exit; + goto err_free_cfg; } rc = usb_control_msg(port->serial->dev, @@ -315,17 +315,32 @@ static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port) dev_dbg(&port->dev, "%s - enabled reading\n", __func__); rc = klsi_105_get_line_state(port, &line_state); - if (rc >= 0) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_state = line_state; - spin_unlock_irqrestore(&priv->lock, flags); - dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, line_state); - retval = 0; - } else + if (rc < 0) { retval = rc; + goto err_disable_read; + } -exit: + spin_lock_irqsave(&priv->lock, flags); + priv->line_state = line_state; + spin_unlock_irqrestore(&priv->lock, flags); + dev_dbg(&port->dev, "%s - read line state 0x%lx\n", __func__, + line_state); + + return 0; + +err_disable_read: + usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + KL5KUSB105A_SIO_CONFIGURE, + USB_TYPE_VENDOR | USB_DIR_OUT, + KL5KUSB105A_SIO_CONFIGURE_READ_OFF, + 0, /* index */ + NULL, 0, + KLSI_TIMEOUT); + usb_serial_generic_close(port); +err_free_cfg: kfree(cfg); + return retval; } From acca3cf0d62bedc56e63485a018787317a644481 Mon Sep 17 00:00:00 2001 From: Nathaniel Quillin Date: Mon, 5 Dec 2016 06:53:00 -0800 Subject: [PATCH 086/199] USB: cdc-acm: add device id for GW Instek AFG-125 commit 301216044e4c27d5a7323c1fa766266fad00db5e upstream. Add device-id entry for GW Instek AFG-125, which has a byte swapped bInterfaceSubClass (0x20). Signed-off-by: Nathaniel Quillin Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 4d77745f439f..96849e2e7435 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1708,6 +1708,7 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */ .driver_info = QUIRK_CONTROL_LINE_STATE, }, { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ + { USB_DEVICE(0x2184, 0x0036) }, /* GW Instek AFG-125 */ { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ }, /* Motorola H24 HSPA module: */ From 747b31526369e700feac40e0cbc7d9b610e29649 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 17 Nov 2016 11:14:14 +0200 Subject: [PATCH 087/199] usb: hub: Fix auto-remount of safely removed or ejected USB-3 devices commit 37be66767e3cae4fd16e064d8bb7f9f72bf5c045 upstream. USB-3 does not have any link state that will avoid negotiating a connection with a plugged-in cable but will signal the host when the cable is unplugged. For USB-3 we used to first set the link to Disabled, then to RxDdetect to be able to detect cable connects or disconnects. But in RxDetect the connected device is detected again and eventually enabled. Instead set the link into U3 and disable remote wakeups for the device. This is what Windows does, and what Alan Stern suggested. Cc: Alan Stern Acked-by: Alan Stern Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 101 +++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 65 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bcc1e1b729ad..496d6a558793 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -101,6 +101,8 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); +static void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -883,82 +885,28 @@ static int hub_set_port_link_state(struct usb_hub *hub, int port1, } /* - * If USB 3.0 ports are placed into the Disabled state, they will no longer - * detect any device connects or disconnects. This is generally not what the - * USB core wants, since it expects a disabled port to produce a port status - * change event when a new device connects. - * - * Instead, set the link state to Disabled, wait for the link to settle into - * that state, clear any change bits, and then put the port into the RxDetect - * state. + * USB-3 does not have a similar link state as USB-2 that will avoid negotiating + * a connection with a plugged-in cable but will signal the host when the cable + * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices */ -static int hub_usb3_port_disable(struct usb_hub *hub, int port1) -{ - int ret; - int total_time; - u16 portchange, portstatus; - - if (!hub_is_superspeed(hub->hdev)) - return -EINVAL; - - ret = hub_port_status(hub, port1, &portstatus, &portchange); - if (ret < 0) - return ret; - - /* - * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI - * Controller [1022:7814] will have spurious result making the following - * usb 3.0 device hotplugging route to the 2.0 root hub and recognized - * as high-speed device if we set the usb 3.0 port link state to - * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we - * check the state here to avoid the bug. - */ - if ((portstatus & USB_PORT_STAT_LINK_STATE) == - USB_SS_PORT_LS_RX_DETECT) { - dev_dbg(&hub->ports[port1 - 1]->dev, - "Not disabling port; link state is RxDetect\n"); - return ret; - } - - ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED); - if (ret) - return ret; - - /* Wait for the link to enter the disabled state. */ - for (total_time = 0; ; total_time += HUB_DEBOUNCE_STEP) { - ret = hub_port_status(hub, port1, &portstatus, &portchange); - if (ret < 0) - return ret; - - if ((portstatus & USB_PORT_STAT_LINK_STATE) == - USB_SS_PORT_LS_SS_DISABLED) - break; - if (total_time >= HUB_DEBOUNCE_TIMEOUT) - break; - msleep(HUB_DEBOUNCE_STEP); - } - if (total_time >= HUB_DEBOUNCE_TIMEOUT) - dev_warn(&hub->ports[port1 - 1]->dev, - "Could not disable after %d ms\n", total_time); - - return hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_RX_DETECT); -} - static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *hdev = hub->hdev; int ret = 0; - if (port_dev->child && set_state) - usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); if (!hub->error) { - if (hub_is_superspeed(hub->hdev)) - ret = hub_usb3_port_disable(hub, port1); - else + if (hub_is_superspeed(hub->hdev)) { + hub_usb3_port_prepare_disable(hub, port_dev); + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U3); + } else { ret = usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_ENABLE); + } } + if (port_dev->child && set_state) + usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); if (ret && ret != -ENODEV) dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); return ret; @@ -4073,6 +4021,26 @@ void usb_unlocked_enable_lpm(struct usb_device *udev) } EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); +/* usb3 devices use U3 for disabled, make sure remote wakeup is disabled */ +static void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev) +{ + struct usb_device *udev = port_dev->child; + int ret; + + if (udev && udev->port_is_suspended && udev->do_remote_wakeup) { + ret = hub_set_port_link_state(hub, port_dev->portnum, + USB_SS_PORT_LS_U0); + if (!ret) { + msleep(USB_RESUME_TIMEOUT); + ret = usb_disable_remote_wakeup(udev); + } + if (ret) + dev_warn(&udev->dev, + "Port disable: can't disable remote wake\n"); + udev->do_remote_wakeup = 0; + } +} #else /* CONFIG_PM */ @@ -4080,6 +4048,9 @@ EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); #define hub_resume NULL #define hub_reset_resume NULL +static inline void hub_usb3_port_prepare_disable(struct usb_hub *hub, + struct usb_port *port_dev) { } + int usb_disable_lpm(struct usb_device *udev) { return 0; From c67c2ed829f3854c1df95ca53bbfdfc9c906d510 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 8 Nov 2016 10:10:44 +0800 Subject: [PATCH 088/199] usb: gadget: f_uac2: fix error handling at afunc_bind commit f1d3861d63a5d79b8968a02eea1dcb01bb684e62 upstream. The current error handling flow uses incorrect goto label, fix it Fixes: d12a8727171c ("usb: gadget: function: Remove redundant usb_free_all_descriptors") Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 12628dd36e55..12064d3bddf6 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -1079,13 +1079,13 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); if (!agdev->out_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - goto err; + return ret; } agdev->in_ep = usb_ep_autoconfig(gadget, &fs_epin_desc); if (!agdev->in_ep) { dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); - goto err; + return ret; } uac2->p_prm.uac2 = uac2; @@ -1102,7 +1102,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) ret = usb_assign_descriptors(fn, fs_audio_desc, hs_audio_desc, NULL); if (ret) - goto err; + return ret; prm = &agdev->uac2.c_prm; prm->max_psize = hs_epout_desc.wMaxPacketSize; @@ -1117,19 +1117,19 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) prm->rbuf = kzalloc(prm->max_psize * USB_XFERS, GFP_KERNEL); if (!prm->rbuf) { prm->max_psize = 0; - goto err_free_descs; + goto err; } ret = alsa_uac2_init(agdev); if (ret) - goto err_free_descs; + goto err; return 0; -err_free_descs: - usb_free_all_descriptors(fn); err: kfree(agdev->uac2.p_prm.rbuf); kfree(agdev->uac2.c_prm.rbuf); +err_free_descs: + usb_free_all_descriptors(fn); return -EINVAL; } From 8ede2d7908aa9970979ef5e5fc95d3d69976d353 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 28 Sep 2016 10:38:11 +0300 Subject: [PATCH 089/199] usb: gadget: composite: correctly initialize ep->maxpacket commit e8f29bb719b47a234f33b0af62974d7a9521a52c upstream. usb_endpoint_maxp() returns wMaxPacketSize in its raw form. Without taking into consideration that it also contains other bits reserved for isochronous endpoints. This patch fixes one occasion where this is a problem by making sure that we initialize ep->maxpacket only with lower 10 bits of the value returned by usb_endpoint_maxp(). Note that seperate patches will be necessary to audit all call sites of usb_endpoint_maxp() and make sure that usb_endpoint_maxp() only returns lower 10 bits of wMaxPacketSize. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 8b14c2a13ac5..e9bc1d392783 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -144,7 +144,7 @@ int config_ep_by_speed(struct usb_gadget *g, ep_found: /* commit results */ - _ep->maxpacket = usb_endpoint_maxp(chosen_desc); + _ep->maxpacket = usb_endpoint_maxp(chosen_desc) & 0x7ff; _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; From cbb2a2563587fe344156b37b614d168d6faa4238 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 21 Oct 2016 16:49:07 -0400 Subject: [PATCH 090/199] USB: UHCI: report non-PME wakeup signalling for Intel hardware commit ccdb6be9ec6580ef69f68949ebe26e0fb58a6fb0 upstream. The UHCI controllers in Intel chipsets rely on a platform-specific non-PME mechanism for wakeup signalling. They can generate wakeup signals even though they don't support PME. We need to let the USB core know this so that it will enable runtime suspend for UHCI controllers. Signed-off-by: Alan Stern Signed-off-by: Bjorn Helgaas Acked-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c index 940304c33224..02260cfdedb1 100644 --- a/drivers/usb/host/uhci-pci.c +++ b/drivers/usb/host/uhci-pci.c @@ -129,6 +129,10 @@ static int uhci_pci_init(struct usb_hcd *hcd) if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_HP) uhci->wait_for_hp = 1; + /* Intel controllers use non-PME wakeup signalling */ + if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_INTEL) + device_set_run_wake(uhci_dev(uhci), 1); + /* Set up pointers to PCI-specific functions */ uhci->reset_hc = uhci_pci_reset_hc; uhci->check_and_reset_hc = uhci_pci_check_and_reset_hc; From d0ba0f6a82914e684e5d9ae2cae702bed75670fe Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 9 Dec 2016 15:15:57 +1100 Subject: [PATCH 091/199] ALSA: usb-audio: Add QuickCam Communicate Deluxe/S7500 to volume_control_quirks commit 82ffb6fc637150b279f49e174166d2aa3853eaf4 upstream. The Logitech QuickCam Communicate Deluxe/S7500 microphone fails with the following warning. [ 6.778995] usb 2-1.2.2.2: Warning! Unlikely big volume range (=3072), cval->res is probably wrong. [ 6.778996] usb 2-1.2.2.2: [5] FU [Mic Capture Volume] ch = 1, val = 4608/7680/1 Adding it to the list of devices in volume_control_quirks makes it work properly, fixing related typo. Signed-off-by: Con Kolivas Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 4f85757009b3..499b03c8281d 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -931,9 +931,10 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */ case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */ case USB_ID(0x046d, 0x0991): + case USB_ID(0x046d, 0x09a2): /* QuickCam Communicate Deluxe/S7500 */ /* Most audio usb devices lie about volume resolution. * Most Logitech webcams have res = 384. - * Proboly there is some logitech magic behind this number --fishor + * Probably there is some logitech magic behind this number --fishor */ if (!strcmp(kctl->id.name, "Mic Capture Volume")) { usb_audio_info(chip, From 29348065c339eca935e90acaf70adbe8c7688930 Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Mon, 28 Nov 2016 11:27:45 +0200 Subject: [PATCH 092/199] ALSA: hiface: Fix M2Tech hiFace driver sampling rate change commit 995c6a7fd9b9212abdf01160f6ce3193176be503 upstream. Sampling rate changes after first set one are not reflected to the hardware, while driver and ALSA think the rate has been changed. Fix the problem by properly stopping the interface at the beginning of prepare call, allowing new rate to be set to the hardware. This keeps the hardware in sync with the driver. Signed-off-by: Jussi Laako Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/hiface/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c index 2c44139b4041..33db205dd12b 100644 --- a/sound/usb/hiface/pcm.c +++ b/sound/usb/hiface/pcm.c @@ -445,6 +445,8 @@ static int hiface_pcm_prepare(struct snd_pcm_substream *alsa_sub) mutex_lock(&rt->stream_mutex); + hiface_pcm_stream_stop(rt); + sub->dma_off = 0; sub->period_off = 0; From 518fca87c645392eb9020af970a86ece63046fe4 Mon Sep 17 00:00:00 2001 From: Sven Hahne Date: Fri, 25 Nov 2016 14:16:43 +0100 Subject: [PATCH 093/199] ALSA: hda/ca0132 - Add quirk for Alienware 15 R2 2016 commit b5337cfe067e96b8a98699da90c7dcd2bec21133 upstream. I'm using an Alienware 15 R2 and had to use the alienware quirks to get my headphone output working. I fixed it by adding, SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE) to the patch. Signed-off-by: Sven Hahne Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 9ceb2bc36e68..c146d0de53d8 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -780,6 +780,7 @@ static const struct hda_pintbl alienware_pincfgs[] = { static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE), + SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE), {} }; From faff777fb4560e839960fc8460e3735e26982e07 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 23 Nov 2016 16:05:37 +0800 Subject: [PATCH 094/199] ALSA: hda - ignore the assoc and seq when comparing pin configurations commit 64047d7f4912de1769d1bf0d34c6322494b13779 upstream. More and more pin configurations have been adding to the pin quirk table, lots of them are only different from assoc and seq, but they all apply to the same QUIRK_FIXUP, if we don't compare assoc and seq when matching pin configurations, it will greatly reduce the pin quirk table size. We have tested this change on a couple of Dell laptops, it worked well. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_auto_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 7f57a145a47e..4ad29f8d7a4a 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -901,7 +901,7 @@ static bool pin_config_match(struct hda_codec *codec, for (; t_pins->nid; t_pins++) { if (t_pins->nid == nid) { found = 1; - if (t_pins->val == cfg) + if ((t_pins->val & 0xfffffff0) == (cfg & 0xfffffff0)) break; else if ((cfg & 0xf0000000) == 0x40000000 && (t_pins->val & 0xf0000000) == 0x40000000) break; From fa695bdc17412df3c6ca5b7375c63b52915096d2 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 23 Nov 2016 16:05:38 +0800 Subject: [PATCH 095/199] ALSA: hda - fix headset-mic problem on a Dell laptop commit 989dbe4a30728c047316ab87e5fa8b609951ce7c upstream. This group of new pins is not in the pin quirk table yet, adding them to the pin quirk table to fix the headset-mic problem. Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f0986cac82f1..3b2687889cd5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5899,6 +5899,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60180}, {0x14, 0x90170120}, {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x1b, 0x01011020}, + {0x21, 0x02211010}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, From f0ea0ade046e1d334ae7aff4a3a1ea508acd61e4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Dec 2016 11:55:17 +0100 Subject: [PATCH 096/199] ALSA: hda - Gate the mic jack on HP Z1 Gen3 AiO commit f73cd43ac3b41c0f09a126387f302bbc0d9c726d upstream. HP Z1 Gen3 AiO with Conexant codec doesn't give an unsolicited event to the headset mic pin upon the jack plugging, it reports only to the headphone pin. It results in the missing mic switching. Let's fix up by simply gating the jack event. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 36cd715986bc..46f7b023f69c 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -262,6 +262,7 @@ enum { CXT_FIXUP_CAP_MIX_AMP_5047, CXT_FIXUP_MUTE_LED_EAPD, CXT_FIXUP_HP_SPECTRE, + CXT_FIXUP_HP_GATE_MIC, }; /* for hda_fixup_thinkpad_acpi() */ @@ -633,6 +634,17 @@ static void cxt_fixup_cap_mix_amp_5047(struct hda_codec *codec, (1 << AC_AMPCAP_MUTE_SHIFT)); } +static void cxt_fixup_hp_gate_mic_jack(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* the mic pin (0x19) doesn't give an unsolicited event; + * probe the mic pin together with the headphone pin (0x16) + */ + if (action == HDA_FIXUP_ACT_PROBE) + snd_hda_jack_set_gating_jack(codec, 0x19, 0x16); +} + /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { { 0x16, 0x042140ff }, /* HP (seq# overridden) */ @@ -774,6 +786,10 @@ static const struct hda_fixup cxt_fixups[] = { { } } }, + [CXT_FIXUP_HP_GATE_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_hp_gate_mic_jack, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -824,6 +840,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), + SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), From 119b6658ce050a40a89c12fa88cd9dc4932b1a9f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 6 Dec 2016 16:56:27 +0800 Subject: [PATCH 097/199] ALSA: hda: when comparing pin configurations, ignore assoc in addition to seq commit 5e0ad0d8747f3e4803a9c3d96d64dd7332506d3c upstream. Commit [64047d7f4912 ALSA: hda - ignore the assoc and seq when comparing pin configurations] intented to ignore both seq and assoc at pin comparing, but it only ignored seq. So that commit may still fail to match pins on some machines. Change the bitmask to also ignore assoc. v2: Use macro to do bit masking. Thanks to Hui Wang for the analysis. Fixes: 64047d7f4912 ("ALSA: hda - ignore the assoc and seq when comparing...") Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_auto_parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 4ad29f8d7a4a..a03cf68d0bcd 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -884,6 +884,8 @@ void snd_hda_apply_fixup(struct hda_codec *codec, int action) } EXPORT_SYMBOL_GPL(snd_hda_apply_fixup); +#define IGNORE_SEQ_ASSOC (~(AC_DEFCFG_SEQUENCE | AC_DEFCFG_DEF_ASSOC)) + static bool pin_config_match(struct hda_codec *codec, const struct hda_pintbl *pins) { @@ -901,7 +903,7 @@ static bool pin_config_match(struct hda_codec *codec, for (; t_pins->nid; t_pins++) { if (t_pins->nid == nid) { found = 1; - if ((t_pins->val & 0xfffffff0) == (cfg & 0xfffffff0)) + if ((t_pins->val & IGNORE_SEQ_ASSOC) == (cfg & IGNORE_SEQ_ASSOC)) break; else if ((cfg & 0xf0000000) == 0x40000000 && (t_pins->val & 0xf0000000) == 0x40000000) break; From 590202271ffd18b63fefeaa902f1ee36b853cf91 Mon Sep 17 00:00:00 2001 From: Richard Watts Date: Fri, 2 Dec 2016 23:14:38 +0200 Subject: [PATCH 098/199] clk: ti: omap36xx: Work around sprz319 advisory 2.1 commit 035cd485a47dda64f25ccf8a90b11a07d0b7aa7a upstream. The OMAP36xx DPLL5, driving EHCI USB, can be subject to a long-term frequency drift. The frequency drift magnitude depends on the VCO update rate, which is inversely proportional to the PLL divider. The kernel DPLL configuration code results in a high value for the divider, leading to a long term drift high enough to cause USB transmission errors. In the worst case the USB PHY's ULPI interface can stop responding, breaking USB operation completely. This manifests itself on the Beagleboard xM by the LAN9514 reporting 'Cannot enable port 2. Maybe the cable is bad?' in the kernel log. Errata sprz319 advisory 2.1 documents PLL values that minimize the drift. Use them automatically when DPLL5 is used for USB operation, which we detect based on the requested clock rate. The clock framework will still compute the PLL parameters and resulting rate as usual, but the PLL M and N values will then be overridden. This can result in the effective clock rate being slightly different than the rate cached by the clock framework, but won't cause any adverse effect to USB operation. Signed-off-by: Richard Watts [Upported from v3.2 to v4.9] Signed-off-by: Laurent Pinchart Tested-by: Ladislav Michl Signed-off-by: Stephen Boyd Cc: Adam Ford Signed-off-by: Greg Kroah-Hartman --- drivers/clk/ti/clk-3xxx.c | 20 ++++++------ drivers/clk/ti/clock.h | 9 ++++++ drivers/clk/ti/dpll.c | 19 ++++++++++- drivers/clk/ti/dpll3xxx.c | 67 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 11 deletions(-) diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 8831e1a05367..11d8aa3ec186 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -22,13 +22,6 @@ #include "clock.h" -/* - * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks - * that are sourced by DPLL5, and both of these require this clock - * to be at 120 MHz for proper operation. - */ -#define DPLL5_FREQ_FOR_USBHOST 120000000 - #define OMAP3430ES2_ST_DSS_IDLE_SHIFT 1 #define OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT 5 #define OMAP3430ES2_ST_SSI_IDLE_SHIFT 8 @@ -546,14 +539,21 @@ void __init omap3_clk_lock_dpll5(void) struct clk *dpll5_clk; struct clk *dpll5_m2_clk; + /* + * Errata sprz319f advisory 2.1 documents a USB host clock drift issue + * that can be worked around using specially crafted dpll5 settings + * with a dpll5_m2 divider set to 8. Set the dpll5 rate to 8x the USB + * host clock rate, its .set_rate handler() will detect that frequency + * and use the errata settings. + */ dpll5_clk = clk_get(NULL, "dpll5_ck"); - clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST); + clk_set_rate(dpll5_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST * 8); clk_prepare_enable(dpll5_clk); - /* Program dpll5_m2_clk divider for no division */ + /* Program dpll5_m2_clk divider */ dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck"); clk_prepare_enable(dpll5_m2_clk); - clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST); + clk_set_rate(dpll5_m2_clk, OMAP3_DPLL5_FREQ_FOR_USBHOST); clk_disable_unprepare(dpll5_m2_clk); clk_disable_unprepare(dpll5_clk); diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 90f3f472ae1c..13c37f48d9d6 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -257,11 +257,20 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +/* + * OMAP3_DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks + * that are sourced by DPLL5, and both of these require this clock + * to be at 120 MHz for proper operation. + */ +#define OMAP3_DPLL5_FREQ_FOR_USBHOST 120000000 + unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate, u8 index); +int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); void omap3_clk_lock_dpll5(void); unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index 5519b386edc0..f9a5089ddc79 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -114,6 +114,18 @@ static const struct clk_ops omap3_dpll_ck_ops = { .round_rate = &omap2_dpll_round_rate, }; +static const struct clk_ops omap3_dpll5_ck_ops = { + .enable = &omap3_noncore_dpll_enable, + .disable = &omap3_noncore_dpll_disable, + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap3_dpll_recalc, + .set_rate = &omap3_dpll5_set_rate, + .set_parent = &omap3_noncore_dpll_set_parent, + .set_rate_and_parent = &omap3_noncore_dpll_set_rate_and_parent, + .determine_rate = &omap3_noncore_dpll_determine_rate, + .round_rate = &omap2_dpll_round_rate, +}; + static const struct clk_ops omap3_dpll_per_ck_ops = { .enable = &omap3_noncore_dpll_enable, .disable = &omap3_noncore_dpll_disable, @@ -461,7 +473,12 @@ static void __init of_ti_omap3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); + if ((of_machine_is_compatible("ti,omap3630") || + of_machine_is_compatible("ti,omap36xx")) && + !strcmp(node->name, "dpll5_ck")) + of_ti_dpll_setup(node, &omap3_dpll5_ck_ops, &dd); + else + of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock", of_ti_omap3_dpll_setup); diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c index f4dec00fb684..0e9119fae760 100644 --- a/drivers/clk/ti/dpll3xxx.c +++ b/drivers/clk/ti/dpll3xxx.c @@ -815,3 +815,70 @@ int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate, index); } + +/* Apply DM3730 errata sprz319 advisory 2.1. */ +static bool omap3_dpll5_apply_errata(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct omap3_dpll5_settings { + unsigned int rate, m, n; + }; + + static const struct omap3_dpll5_settings precomputed[] = { + /* + * From DM3730 errata advisory 2.1, table 35 and 36. + * The N value is increased by 1 compared to the tables as the + * errata lists register values while last_rounded_field is the + * real divider value. + */ + { 12000000, 80, 0 + 1 }, + { 13000000, 443, 5 + 1 }, + { 19200000, 50, 0 + 1 }, + { 26000000, 443, 11 + 1 }, + { 38400000, 25, 0 + 1 } + }; + + const struct omap3_dpll5_settings *d; + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(precomputed); ++i) { + if (parent_rate == precomputed[i].rate) + break; + } + + if (i == ARRAY_SIZE(precomputed)) + return false; + + d = &precomputed[i]; + + /* Update the M, N and rounded rate values and program the DPLL. */ + dd = clk->dpll_data; + dd->last_rounded_m = d->m; + dd->last_rounded_n = d->n; + dd->last_rounded_rate = div_u64((u64)parent_rate * d->m, d->n); + omap3_noncore_dpll_program(clk, 0); + + return true; +} + +/** + * omap3_dpll5_set_rate - set rate for omap3 dpll5 + * @hw: clock to change + * @rate: target rate for clock + * @parent_rate: rate of the parent clock + * + * Set rate for the DPLL5 clock. Apply the sprz319 advisory 2.1 on OMAP36xx if + * the DPLL is used for USB host (detected through the requested rate). + */ +int omap3_dpll5_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + if (rate == OMAP3_DPLL5_FREQ_FOR_USBHOST * 8) { + if (omap3_dpll5_apply_errata(hw, parent_rate)) + return 0; + } + + return omap3_noncore_dpll_set_rate(hw, rate, parent_rate); +} From 25ee81e5462015d2ba3792d2ae0ea902a98ae8a4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 3 Aug 2016 12:33:01 -0700 Subject: [PATCH 099/199] Btrfs: fix memory leak in reading btree blocks commit 2571e739677f1e4c0c63f5ed49adcc0857923625 upstream. So we can read a btree block via readahead or intentional read, and we can end up with a memory leak when something happens as follows, 1) readahead starts to read block A but does not wait for read completion, 2) btree_readpage_end_io_hook finds that block A is corrupted, and it needs to clear all block A's pages' uptodate bit. 3) meanwhile an intentional read kicks in and checks block A's pages' uptodate to decide which page needs to be read. 4) when some pages have the uptodate bit during 3)'s check so 3) doesn't count them for eb->io_pages, but they are later cleared by 2) so we has to readpage on the page, we get the wrong eb->io_pages which results in a memory leak of this block. This fixes the problem by firstly getting all pages's locking and then checking pages' uptodate bit. t1(readahead) t2(readahead endio) t3(the following read) read_extent_buffer_pages end_bio_extent_readpage for pg in eb: for page 0,1,2 in eb: if pg is uptodate: btree_readpage_end_io_hook(pg) num_reads++ if uptodate: eb->io_pages = num_reads SetPageUptodate(pg) _______________ for pg in eb: for page 3 in eb: read_extent_buffer_pages if pg is NOT uptodate: btree_readpage_end_io_hook(pg) for pg in eb: __extent_read_full_page(pg) sanity check reports something wrong if pg is uptodate: clear_extent_buffer_uptodate(eb) num_reads++ for pg in eb: eb->io_pages = num_reads ClearPageUptodate(page) _______________ for pg in eb: if pg is NOT uptodate: __extent_read_full_page(pg) So t3's eb->io_pages is not consistent with the number of pages it's reading, and during endio(), atomic_dec_and_test(&eb->io_pages) will get a negative number so that we're not able to free the eb. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 257bbdcb5df6..e767f347f2b1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5294,11 +5294,20 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, lock_page(page); } locked_pages++; + } + /* + * We need to firstly lock all pages to make sure that + * the uptodate bit of our pages won't be affected by + * clear_extent_buffer_uptodate(). + */ + for (i = start_i; i < num_pages; i++) { + page = eb->pages[i]; if (!PageUptodate(page)) { num_reads++; all_uptodate = 0; } } + if (all_uptodate) { if (start_i == 0) set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); From 78a587c608f9ef0b0f7bd0bb5f4165a61a782862 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 25 Aug 2016 18:08:27 -0700 Subject: [PATCH 100/199] Btrfs: bail out if block group has different mixed flag commit 49303381f19ab16a371a061b67e783d3f570d56e upstream. Currently we allow inconsistence about mixed flag (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA). We'd get ENOSPC if block group has mixed flag and btrfs doesn't. If that happens, we have one space_info with mixed flag and another space_info only with BTRFS_BLOCK_GROUP_METADATA, and global_block_rsv.space_info points to the latter one, but all bytes from block_group contributes to the mixed space_info, thus all the allocation will fail with ENOSPC. This adds a check for the above case. Reported-by: Vegard Nossum Signed-off-by: Liu Bo [ updated message ] Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 47cdc6f3390b..431364ab9c0e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9686,6 +9686,11 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct extent_buffer *leaf; int need_clear = 0; u64 cache_gen; + u64 feature; + int mixed; + + feature = btrfs_super_incompat_flags(info->super_copy); + mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS); root = info->extent_root; key.objectid = 0; @@ -9739,6 +9744,15 @@ int btrfs_read_block_groups(struct btrfs_root *root) btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(cache->item)); cache->flags = btrfs_block_group_flags(&cache->item); + if (!mixed && + ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) && + (cache->flags & BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(info, +"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", + cache->key.objectid); + ret = -EINVAL; + goto error; + } key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(path); From 8ca6845a58cda0cda9359eb313af35c2f88df073 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 14 Sep 2016 08:51:46 -0700 Subject: [PATCH 101/199] Btrfs: return gracefully from balance if fs tree is corrupted commit 3561b9db70928f207be4570b48fc19898eeaef54 upstream. When relocating tree blocks, we firstly get block information from back references in the extent tree, we then search fs tree to try to find all parents of a block. However, if fs tree is corrupted, eg. if there're some missing items, we could come across these WARN_ONs and BUG_ONs. This makes us print some error messages and return gracefully from balance. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b4ca5454ef1a..937d015422e0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -921,9 +921,16 @@ again: path2->slots[level]--; eb = path2->nodes[level]; - WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != - cur->bytenr); - + if (btrfs_node_blockptr(eb, path2->slots[level]) != + cur->bytenr) { + btrfs_err(root->fs_info, + "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", + cur->bytenr, level - 1, root->objectid, + node_key->objectid, node_key->type, + node_key->offset); + err = -ENOENT; + goto out; + } lower = cur; need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { @@ -2676,11 +2683,15 @@ static int do_relocation(struct btrfs_trans_handle *trans, if (!upper->eb) { ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - err = ret; + if (ret) { + if (ret < 0) + err = ret; + else + err = -ENOENT; + + btrfs_release_path(path); break; } - BUG_ON(ret > 0); if (!upper->eb) { upper->eb = path->nodes[upper->level]; From 5a9b659edef60fb665786525528086b5df5e0e19 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 2 Sep 2016 15:25:43 -0400 Subject: [PATCH 102/199] Btrfs: don't leak reloc root nodes on error commit 6bdf131fac2336adb1a628f992ba32384f653a55 upstream. We don't track the reloc roots in any sort of normal way, so the only way the root/commit_root nodes get free'd is if the relocation finishes successfully and the reloc root is deleted. Fix this by free'ing them in free_reloc_roots. Thanks, Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 937d015422e0..8ca9aa92972d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2350,6 +2350,10 @@ void free_reloc_roots(struct list_head *list) while (!list_empty(list)) { reloc_root = list_entry(list->next, struct btrfs_root, root_list); + free_extent_buffer(reloc_root->node); + free_extent_buffer(reloc_root->commit_root); + reloc_root->node = NULL; + reloc_root->commit_root = NULL; __del_reloc_root(reloc_root); } } From 107800061996dc872a9fa72fb806ac9dadf2cb6b Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 13 Sep 2016 19:02:27 -0700 Subject: [PATCH 103/199] Btrfs: fix memory leak in do_walk_down commit a958eab0ed7fdc1b977bc25d3af6efedaa945488 upstream. The extent buffer 'next' needs to be free'd conditionally. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 431364ab9c0e..099645ab986d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8488,6 +8488,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, &wc->flags[level - 1]); if (ret < 0) { btrfs_tree_unlock(next); + free_extent_buffer(next); return ret; } From 323ffc03ddb2007fbfda0e3c3b749edfc7053e67 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 23 Sep 2016 13:23:28 +0200 Subject: [PATCH 104/199] Btrfs: don't BUG() during drop snapshot commit 4867268c57ff709a7b6b86ae6f6537d846d1443a upstream. Really there's lots of things that can go wrong here, kill all the BUG_ON()'s and replace the logic ones with ASSERT()'s and return EIO instead. Signed-off-by: Josef Bacik [ switched to btrfs_err, errors go to common label ] Reviewed-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 099645ab986d..2af08c3de775 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8486,15 +8486,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1, &wc->refs[level - 1], &wc->flags[level - 1]); - if (ret < 0) { - btrfs_tree_unlock(next); - free_extent_buffer(next); - return ret; - } + if (ret < 0) + goto out_unlock; if (unlikely(wc->refs[level - 1] == 0)) { btrfs_err(root->fs_info, "Missing references."); - BUG(); + ret = -EIO; + goto out_unlock; } *lookup_info = 0; @@ -8546,7 +8544,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } level--; - BUG_ON(level != btrfs_header_level(next)); + ASSERT(level == btrfs_header_level(next)); + if (level != btrfs_header_level(next)) { + btrfs_err(root->fs_info, "mismatched level"); + ret = -EIO; + goto out_unlock; + } path->nodes[level] = next; path->slots[level] = 0; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; @@ -8561,8 +8564,15 @@ skip: if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { parent = path->nodes[level]->start; } else { - BUG_ON(root->root_key.objectid != + ASSERT(root->root_key.objectid == btrfs_header_owner(path->nodes[level])); + if (root->root_key.objectid != + btrfs_header_owner(path->nodes[level])) { + btrfs_err(root->fs_info, + "mismatched block owner"); + ret = -EIO; + goto out_unlock; + } parent = 0; } @@ -8579,12 +8589,18 @@ skip: } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto out_unlock; } + + *lookup_info = 1; + ret = 1; + +out_unlock: btrfs_tree_unlock(next); free_extent_buffer(next); - *lookup_info = 1; - return 1; + + return ret; } /* From 85cfbd9db2c6e0d213e5c28b04c8ca1820000533 Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Thu, 13 Oct 2016 09:23:39 +0800 Subject: [PATCH 105/199] btrfs: make file clone aware of fatal signals commit 69ae5e4459e43e56f03d0987e865fbac2b05af2a upstream. Indeed this just make the behavior similar to xfs when process has fatal signals pending, and it'll make fstests/generic/298 happy. Signed-off-by: Wang Xiaoguang Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a7e18dbadf74..317b99acdf4b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3825,6 +3825,11 @@ process_slot: } btrfs_release_path(path); key.offset = next_key_min_offset; + + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto out; + } } ret = 0; From d80411dea6a43adc8fd92c9c39e367f44aba1be9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 12 Dec 2016 08:21:51 -0700 Subject: [PATCH 106/199] block_dev: don't test bdev->bd_contains when it is not stable commit bcc7f5b4bee8e327689a4d994022765855c807ff upstream. bdev->bd_contains is not stable before calling __blkdev_get(). When __blkdev_get() is called on a parition with ->bd_openers == 0 it sets bdev->bd_contains = bdev; which is not correct for a partition. After a call to __blkdev_get() succeeds, ->bd_openers will be > 0 and then ->bd_contains is stable. When FMODE_EXCL is used, blkdev_get() calls bd_start_claiming() -> bd_prepare_to_claim() -> bd_may_claim() This call happens before __blkdev_get() is called, so ->bd_contains is not stable. So bd_may_claim() cannot safely use ->bd_contains. It currently tries to use it, and this can lead to a BUG_ON(). This happens when a whole device is already open with a bd_holder (in use by dm in my particular example) and two threads race to open a partition of that device for the first time, one opening with O_EXCL and one without. The thread that doesn't use O_EXCL gets through blkdev_get() to __blkdev_get(), gains the ->bd_mutex, and sets bdev->bd_contains = bdev; Immediately thereafter the other thread, using FMODE_EXCL, calls bd_start_claiming() from blkdev_get(). This should fail because the whole device has a holder, but because bdev->bd_contains == bdev bd_may_claim() incorrectly reports success. This thread continues and blocks on bd_mutex. The first thread then sets bdev->bd_contains correctly and drops the mutex. The thread using FMODE_EXCL then continues and when it calls bd_may_claim() again in: BUG_ON(!bd_may_claim(bdev, whole, holder)); The BUG_ON fires. Fix this by removing the dependency on ->bd_contains in bd_may_claim(). As bd_may_claim() has direct access to the whole device, it can simply test if the target bdev is the whole device. Fixes: 6b4517a7913a ("block: implement bd_claiming and claiming block") Signed-off-by: NeilBrown Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 44d4a1e9244e..f10dbac851a1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -759,7 +759,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, return true; /* already a holder */ else if (bdev->bd_holder != NULL) return false; /* held by someone else */ - else if (bdev->bd_contains == bdev) + else if (whole == bdev) return true; /* is a whole device which isn't held */ else if (whole->bd_holder == bd_may_claim) From 03eed7afbc09e061f66b448daf7863174c3dc3f3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Oct 2016 21:23:16 -0500 Subject: [PATCH 107/199] mm: Add a user_ns owner to mm_struct and fix ptrace permission checks commit bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 upstream. During exec dumpable is cleared if the file that is being executed is not readable by the user executing the file. A bug in ptrace_may_access allows reading the file if the executable happens to enter into a subordinate user namespace (aka clone(CLONE_NEWUSER), unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER). This problem is fixed with only necessary userspace breakage by adding a user namespace owner to mm_struct, captured at the time of exec, so it is clear in which user namespace CAP_SYS_PTRACE must be present in to be able to safely give read permission to the executable. The function ptrace_may_access is modified to verify that the ptracer has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns. This ensures that if the task changes it's cred into a subordinate user namespace it does not become ptraceable. The function ptrace_attach is modified to only set PT_PTRACE_CAP when CAP_SYS_PTRACE is held over task->mm->user_ns. The intent of PT_PTRACE_CAP is to be a flag to note that whatever permission changes the task might go through the tracer has sufficient permissions for it not to be an issue. task->cred->user_ns is always the same as or descendent of mm->user_ns. Which guarantees that having CAP_SYS_PTRACE over mm->user_ns is the worst case for the tasks credentials. To prevent regressions mm->dumpable and mm->user_ns are not considered when a task has no mm. As simply failing ptrace_may_attach causes regressions in privileged applications attempting to read things such as /proc//stat Acked-by: Kees Cook Tested-by: Cyrill Gorcunov Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- include/linux/mm_types.h | 1 + kernel/fork.c | 9 ++++++--- kernel/ptrace.c | 26 +++++++++++--------------- mm/init-mm.c | 2 ++ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f8d1492a114f..2ccccbfcd532 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -469,6 +469,7 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif + struct user_namespace *user_ns; /* store ref to file /proc//exe symlink points to */ struct file __rcu *exe_file; diff --git a/kernel/fork.c b/kernel/fork.c index 7161ebe67cbb..2e55b53399de 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -585,7 +585,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { mm->mmap = NULL; mm->mm_rb = RB_ROOT; @@ -625,6 +626,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) if (init_new_context(p, mm)) goto fail_nocontext; + mm->user_ns = get_user_ns(user_ns); return mm; fail_nocontext: @@ -670,7 +672,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -685,6 +687,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -942,7 +945,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; err = dup_mmap(mm, oldmm); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 3189e51db7e8..21b60e0a3056 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -219,7 +219,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -270,16 +270,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -330,6 +325,11 @@ static int ptrace_attach(struct task_struct *task, long request, task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (!retval) { + struct mm_struct *mm = task->mm; + if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE)) + flags |= PT_PTRACE_CAP; + } task_unlock(task); if (retval) goto unlock_creds; @@ -343,10 +343,6 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); diff --git a/mm/init-mm.c b/mm/init-mm.c index a56a851908d2..975e49f00f34 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -21,5 +22,6 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .user_ns = &init_user_ns, INIT_MM_CONTEXT(init_mm) }; From 1c1f15f8ebfbd5042883a1c9ae4b18a6299c9c5f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 14 Nov 2016 18:48:07 -0600 Subject: [PATCH 108/199] ptrace: Capture the ptracer's creds not PT_PTRACE_CAP commit 64b875f7ac8a5d60a4e191479299e931ee949b67 upstream. When the flag PT_PTRACE_CAP was added the PTRACE_TRACEME path was overlooked. This can result in incorrect behavior when an application like strace traces an exec of a setuid executable. Further PT_PTRACE_CAP does not have enough information for making good security decisions as it does not report which user namespace the capability is in. This has already allowed one mistake through insufficient granulariy. I found this issue when I was testing another corner case of exec and discovered that I could not get strace to set PT_PTRACE_CAP even when running strace as root with a full set of caps. This change fixes the above issue with strace allowing stracing as root a setuid executable without disabling setuid. More fundamentaly this change allows what is allowable at all times, by using the correct information in it's decision. Fixes: 4214e42f96d4 ("v2.4.9.11 -> v2.4.9.12") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- include/linux/capability.h | 1 + include/linux/ptrace.h | 1 - include/linux/sched.h | 1 + kernel/capability.c | 20 ++++++++++++++++++++ kernel/ptrace.c | 12 +++++++----- 6 files changed, 30 insertions(+), 7 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index b06623a9347f..4040d7b173c0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1254,7 +1254,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; if (p->ptrace) { - if (p->ptrace & PT_PTRACE_CAP) + if (ptracer_capable(p, current_user_ns())) bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; else bprm->unsafe |= LSM_UNSAFE_PTRACE; diff --git a/include/linux/capability.h b/include/linux/capability.h index 5f8249d378a2..7d38697c7e63 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -249,6 +249,7 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) #endif /* CONFIG_MULTIUSER */ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); +extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 504c98a278d4..e13bfdf7f314 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -19,7 +19,6 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c0193baea2a..ce0f61dcd887 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1540,6 +1540,7 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task diff --git a/kernel/capability.c b/kernel/capability.c index 00411c82dac5..dfa0e4528b0b 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -473,3 +473,23 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) kgid_has_mapping(ns, inode->i_gid); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); + +/** + * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace + * @tsk: The task that may be ptraced + * @ns: The user namespace to search for CAP_SYS_PTRACE in + * + * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE + * in the specified user namespace. + */ +bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) +{ + int ret = 0; /* An absent tracer adds no restrictions */ + const struct cred *cred; + rcu_read_lock(); + cred = rcu_dereference(tsk->ptracer_cred); + if (cred) + ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + return (ret == 0); +} diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 21b60e0a3056..a46c40bfb5f6 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -39,6 +39,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; + rcu_read_lock(); + child->ptracer_cred = get_cred(__task_cred(new_parent)); + rcu_read_unlock(); } /** @@ -71,11 +74,15 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); child->ptrace = 0; child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); @@ -325,11 +332,6 @@ static int ptrace_attach(struct task_struct *task, long request, task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); - if (!retval) { - struct mm_struct *mm = task->mm; - if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - } task_unlock(task); if (retval) goto unlock_creds; From 4b6050922fb8feae8240e8b257d55fda52148e2a Mon Sep 17 00:00:00 2001 From: Alex Porosanu Date: Wed, 9 Nov 2016 10:46:11 +0200 Subject: [PATCH 109/199] crypto: caam - fix AEAD givenc descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d128af17876d79b87edf048303f98b35f6a53dbc upstream. The AEAD givenc descriptor relies on moving the IV through the output FIFO and then back to the CTX2 for authentication. The SEQ FIFO STORE could be scheduled before the data can be read from OFIFO, especially since the SEQ FIFO LOAD needs to wait for the SEQ FIFO LOAD SKIP to finish first. The SKIP takes more time when the input is SG than when it's a contiguous buffer. If the SEQ FIFO LOAD is not scheduled before the STORE, the DECO will hang waiting for data to be available in the OFIFO so it can be transferred to C2. In order to overcome this, first force transfer of IV to C2 by starting the "cryptlen" transfer first and then starting to store data from OFIFO to the output buffer. Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation") Signed-off-by: Alex Porosanu Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamalg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 2cde3796cb82..f3307fc38e79 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -702,7 +702,9 @@ copy_iv: /* Will read cryptlen */ append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | + FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); /* Write ICV */ append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | From 26492d8a22b013df435eadd91eeba2c8701206f6 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 14 Nov 2016 21:04:37 -0500 Subject: [PATCH 110/199] ext4: fix mballoc breakage with 64k block size commit 69e43e8cc971a79dd1ee5d4343d8e63f82725123 upstream. 'border' variable is set to a value of 2 times the block size of the underlying filesystem. With 64k block size, the resulting value won't fit into a 16-bit variable. Hence this commit changes the data type of 'border' to 'unsigned int'. Fixes: c9de560ded61f Signed-off-by: Chandan Rajendra Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3c7f0c44cfb3..748dec05ec7b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; - unsigned short border; + unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); From 7b74c351de1c135faf6f80226dedf3aba697464a Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 14 Nov 2016 21:26:26 -0500 Subject: [PATCH 111/199] ext4: fix stack memory corruption with 64k block size commit 30a9d7afe70ed6bd9191d3000e2ef1a34fb58493 upstream. The number of 'counters' elements needed in 'struct sg' is super_block->s_blocksize_bits + 2. Presently we have 16 'counters' elements in the array. This is insufficient for block sizes >= 32k. In such cases the memcpy operation performed in ext4_mb_seq_groups_show() would cause stack memory corruption. Fixes: c9de560ded61f Signed-off-by: Chandan Rajendra Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 748dec05ec7b..b7a3957a9dca 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2287,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_group_info *grinfo; struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[16]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; From 3865880138108e50cc7ca3cc5fde1fd41dfd6000 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:28:30 -0500 Subject: [PATCH 112/199] ext4: use more strict checks for inodes_per_block on mount commit cd6bb35bf7f6d7d922509bf50265383a0ceabe96 upstream. Centralize the checks for inodes_per_block and be more strict to make sure the inodes_per_block_group can't end up being zero. Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 127155b82e6e..c59f864bd0c8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3496,12 +3496,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) - goto cantfind_ext4; sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; + if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || + sbi->s_inodes_per_group > blocksize * 8) { + ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", + sbi->s_blocks_per_group); + goto failed_mount; + } sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); @@ -3584,13 +3588,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } sbi->s_cluster_ratio = clustersize / blocksize; - if (sbi->s_inodes_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#inodes per group too big: %lu", - sbi->s_inodes_per_group); - goto failed_mount; - } - /* Do we have standard group size of clustersize * 8 blocks ? */ if (sbi->s_blocks_per_group == clustersize << 3) set_opt2(sb, STD_GROUP_SIZE); From 3995881b0f722cde317c06a098a11d20a9805ee9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:24:26 -0500 Subject: [PATCH 113/199] ext4: fix in-superblock mount options processing commit 5aee0f8a3f42c94c5012f1673420aee96315925a upstream. Fix a large number of problems with how we handle mount options in the superblock. For one, if the string in the superblock is long enough that it is not null terminated, we could run off the end of the string and try to interpret superblocks fields as characters. It's unlikely this will cause a security problem, but it could result in an invalid parse. Also, parse_options is destructive to the string, so in some cases if there is a comma-separated string, it would be modified in the superblock. (Fortunately it only happens on file systems with a 1k block size.) Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c59f864bd0c8..5a5b8c8d4b0d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3130,7 +3130,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) char *orig_data = kstrdup(data, GFP_KERNEL); struct buffer_head *bh; struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi; + struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3149,16 +3149,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; ext4_group_t first_not_zeroed; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - goto out_free_orig; + if ((data && !orig_data) || !sbi) + goto out_free_base; sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); - if (!sbi->s_blockgroup_lock) { - kfree(sbi); - goto out_free_orig; - } + if (!sbi->s_blockgroup_lock) + goto out_free_base; + sb->s_fs_info = sbi; sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; @@ -3304,11 +3302,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; - if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, - &journal_devnum, &journal_ioprio, 0)) { - ext4_msg(sb, KERN_WARNING, - "failed to parse options in superblock: %s", - sbi->s_es->s_mount_opts); + if (sbi->s_es->s_mount_opts[0]) { + char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, + sizeof(sbi->s_es->s_mount_opts), + GFP_KERNEL); + if (!s_mount_opts) + goto failed_mount; + if (!parse_options(s_mount_opts, sb, &journal_devnum, + &journal_ioprio, 0)) { + ext4_msg(sb, KERN_WARNING, + "failed to parse options in superblock: %s", + s_mount_opts); + } + kfree(s_mount_opts); } sbi->s_def_mount_opt = sbi->s_mount_opt; if (!parse_options((char *) data, sb, &journal_devnum, @@ -3991,7 +3997,9 @@ no_journal: if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, + "Opts: %.*s%s%s", descr, + (int) sizeof(sbi->s_es->s_mount_opts), + sbi->s_es->s_mount_opts, *sbi->s_es->s_mount_opts ? "; " : "", orig_data); if (es->s_error_count) @@ -4061,8 +4069,8 @@ failed_mount: out_fail: sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); +out_free_base: kfree(sbi); -out_free_orig: kfree(orig_data); return err ? err : ret; } From f655b3575c4746679553a46559a97fb4afb29ce1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:37:47 -0500 Subject: [PATCH 114/199] ext4: add sanity checking to count_overhead() commit c48ae41bafe31e9a66d8be2ced4e42a6b57fa814 upstream. The commit "ext4: sanity check the block and cluster size at mount time" should prevent any problems, but in case the superblock is modified while the file system is mounted, add an extra safety check to make sure we won't overrun the allocated buffer. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5a5b8c8d4b0d..a052771fbfc1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3037,10 +3037,15 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_set_bit(s++, buf); count++; } - for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { - ext4_set_bit(EXT4_B2C(sbi, s++), buf); - count++; + j = ext4_bg_num_gdb(sb, grp); + if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) { + ext4_error(sb, "Invalid number of block group " + "descriptor blocks: %d", j); + j = EXT4_BLOCKS_PER_GROUP(sb) - s; } + count += j; + for (; j > 0; j--) + ext4_set_bit(EXT4_B2C(sbi, s++), buf); } if (!count) return 0; From 519a30148e23a5a4efc520effa8fb22da5557e6a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 10 Dec 2016 09:55:01 -0500 Subject: [PATCH 115/199] ext4: reject inodes with negative size commit 7e6e1ef48fc02f3ac5d0edecbb0c6087cd758d58 upstream. Don't load an inode with a negative size; this causes integer overflow problems in the VFS. [ Added EXT4_ERROR_INODE() to mark file system as corrupted. -TYT] Fixes: a48380f769df (ext4: rename i_dir_acl to i_size_high) Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c71d2941a45b..10690e5ba2eb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4175,6 +4175,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct inode *inode; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; + loff_t size; int block; uid_t i_uid; gid_t i_gid; @@ -4266,6 +4267,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); + if ((size = i_size_read(inode)) < 0) { + EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; From 36af7cd560b93228cdc691aab613da28ad94e18b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 10 Dec 2016 09:56:01 -0500 Subject: [PATCH 116/199] ext4: return -ENOMEM instead of success commit 578620f451f836389424833f1454eeeb2ffc9e9f upstream. We should set the error code if kzalloc() fails. Fixes: 67cf5b09a46f ("ext4: add the basic function for inline data support") Signed-off-by: Dan Carpenter Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d884989cc83d..8968a93e2150 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -336,8 +336,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, len -= EXT4_MIN_INLINE_DATA_SIZE; value = kzalloc(len, GFP_NOFS); - if (!value) + if (!value) { + error = -ENOMEM; goto out; + } error = ext4_xattr_ibody_get(inode, i.name_index, i.name, value, len); From 3460edfc70c21998f13662dceee9a26bf697d2c3 Mon Sep 17 00:00:00 2001 From: Sergey Karamov Date: Sat, 10 Dec 2016 17:54:58 -0500 Subject: [PATCH 117/199] ext4: do not perform data journaling when data is encrypted commit 73b92a2a5e97d17cc4d5c4fe9d724d3273fb6fd2 upstream. Currently data journalling is incompatible with encryption: enabling both at the same time has never been supported by design, and would result in unpredictable behavior. However, users are not precluded from turning on both features simultaneously. This change programmatically replaces data journaling for encrypted regular files with ordered data journaling mode. Background: Journaling encrypted data has not been supported because it operates on buffer heads of the page in the page cache. Namely, when the commit happens, which could be up to five seconds after caching, the commit thread uses the buffer heads attached to the page to copy the contents of the page to the journal. With encryption, it would have been required to keep the bounce buffer with ciphertext for up to the aforementioned five seconds, since the page cache can only hold plaintext and could not be used for journaling. Alternatively, it would be required to setup the journal to initiate a callback at the commit time to perform deferred encryption - in this case, not only would the data have to be written twice, but it would also have to be encrypted twice. This level of complexity was not justified for a mode that in practice is very rarely used because of the overhead from the data journalling. Solution: If data=journaled has been set as a mount option for a filesystem, or if journaling is enabled on a regular file, do not perform journaling if the file is also encrypted, instead fall back to the data=ordered mode for the file. Rationale: The intent is to allow seamless and proper filesystem operation when journaling and encryption have both been enabled, and have these two conflicting features gracefully resolved by the filesystem. Fixes: 4461471107b7 Signed-off-by: Sergey Karamov Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_jbd2.h | 14 ++++++++------ fs/ext4/super.c | 5 +++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 5f5846211095..f817ed58f5ad 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -395,17 +395,19 @@ static inline int ext4_inode_journal_mode(struct inode *inode) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ /* We do not support data journalling with delayed allocation */ if (!S_ISREG(inode->i_mode) || - test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) - return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ - if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && - !test_opt(inode->i_sb, DELALLOC)) + test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && + !test_opt(inode->i_sb, DELALLOC))) { + /* We do not support data journalling for encrypted data */ + if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode)) + return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ + } if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ - else - BUG(); + BUG(); } static inline int ext4_should_journal_data(struct inode *inode) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a052771fbfc1..68640e6f95c5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3345,6 +3345,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "both data=journal and dax"); goto failed_mount; } + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "encrypted files will use data=ordered " + "instead of data journaling mode"); + } if (test_opt(sb, DELALLOC)) clear_opt(sb, DELALLOC); } else { From 011ded975e347d6ef14f7d08018f3d4750483160 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Sun, 20 Nov 2016 19:57:23 +0100 Subject: [PATCH 118/199] f2fs: set ->owner for debugfs status file's file_operations commit 05e6ea2685c964db1e675a24a4f4e2adc22d2388 upstream. The struct file_operations instance serving the f2fs/status debugfs file lacks an initialization of its ->owner. This means that although that file might have been opened, the f2fs module can still get removed. Any further operation on that opened file, releasing included, will cause accesses to unmapped memory. Indeed, Mike Marshall reported the following: BUG: unable to handle kernel paging request at ffffffffa0307430 IP: [] full_proxy_release+0x24/0x90 <...> Call Trace: [] __fput+0xdf/0x1d0 [] ____fput+0xe/0x10 [] task_work_run+0x8e/0xc0 [] do_exit+0x2ae/0xae0 [] ? __audit_syscall_entry+0xae/0x100 [] ? syscall_trace_enter+0x1ca/0x310 [] do_group_exit+0x44/0xc0 [] SyS_exit_group+0x14/0x20 [] do_syscall_64+0x61/0x150 [] entry_SYSCALL64_slow_path+0x25/0x25 <...> ---[ end trace f22ae883fa3ea6b8 ]--- Fixing recursive fault but reboot is needed! Fix this by initializing the f2fs/status file_operations' ->owner with THIS_MODULE. This will allow debugfs to grab a reference to the f2fs module upon any open on that file, thus preventing it from getting removed. Fixes: 902829aa0b72 ("f2fs: move proc files to debugfs") Reported-by: Mike Marshall Reported-by: Martin Brandenburg Signed-off-by: Nicolai Stange Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 478e5d54154f..24d6a51b48d1 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -352,6 +352,7 @@ static int stat_open(struct inode *inode, struct file *file) } static const struct file_operations stat_fops = { + .owner = THIS_MODULE, .open = stat_open, .read = seq_read, .llseek = seq_lseek, From ffef1630187bd6ed8521a90d8dfcd27bd8c8cf36 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 14 Nov 2016 14:56:17 -0800 Subject: [PATCH 119/199] loop: return proper error from loop_queue_rq() commit b4a567e8114327518c09f5632339a5954ab975a3 upstream. ->queue_rq() should return one of the BLK_MQ_RQ_QUEUE_* constants, not an errno. Fixes: f4aa4c7bbac6 ("block: loop: convert to per-device workqueue") Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 80cf8add46ff..ab0b2dd3f629 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1657,7 +1657,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (lo->lo_state != Lo_bound) - return -EIO; + return BLK_MQ_RQ_QUEUE_ERROR; if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH | REQ_DISCARD))) From 14d8e5cae0390301ca582b44a28196dc84b10056 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 12 Dec 2016 16:41:50 -0800 Subject: [PATCH 120/199] mm/vmscan.c: set correct defer count for shrinker commit 5f33a0803bbd781de916f5c7448cbbbbc763d911 upstream. Our system uses significantly more slab memory with memcg enabled with the latest kernel. With 3.10 kernel, slab uses 2G memory, while with 4.6 kernel, 6G memory is used. The shrinker has problem. Let's see we have two memcg for one shrinker. In do_shrink_slab: 1. Check cg1. nr_deferred = 0, assume total_scan = 700. batch size is 1024, then no memory is freed. nr_deferred = 700 2. Check cg2. nr_deferred = 700. Assume freeable = 20, then total_scan = 10 or 40. Let's assume it's 10. No memory is freed. nr_deferred = 10. The deferred share of cg1 is lost in this case. kswapd will free no memory even run above steps again and again. The fix makes sure one memcg's deferred share isn't lost. Link: http://lkml.kernel.org/r/2414be961b5d25892060315fbb56bb19d81d0c07.1476227351.git.shli@fb.com Signed-off-by: Shaohua Li Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index de1c59d8daa3..bfc5050cbd01 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -277,6 +277,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, int nid = shrinkctl->nid; long batch_size = shrinker->batch ? shrinker->batch : SHRINK_BATCH; + long scanned = 0, next_deferred; freeable = shrinker->count_objects(shrinker, shrinkctl); if (freeable == 0) @@ -298,7 +299,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", shrinker->scan_objects, total_scan); total_scan = freeable; - } + next_deferred = nr; + } else + next_deferred = total_scan; /* * We need to avoid excessive windup on filesystem shrinkers @@ -355,17 +358,22 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, count_vm_events(SLABS_SCANNED, nr_to_scan); total_scan -= nr_to_scan; + scanned += nr_to_scan; cond_resched(); } + if (next_deferred >= scanned) + next_deferred -= scanned; + else + next_deferred = 0; /* * move the unused scan count back into the shrinker in a * manner that handles concurrent updates. If we exhausted the * scan, there is no need to do an update. */ - if (total_scan > 0) - new_nr = atomic_long_add_return(total_scan, + if (next_deferred > 0) + new_nr = atomic_long_add_return(next_deferred, &shrinker->nr_deferred[nid]); else new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); From 0812936b11eac115d58f72e5ce7a97a2b17c16e4 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 21 Dec 2016 16:26:24 +1100 Subject: [PATCH 121/199] fs: exec: apply CLOEXEC before changing dumpable task flags commit 613cc2b6f272c1a8ad33aefa21cad77af23139f7 upstream. If you have a process that has set itself to be non-dumpable, and it then undergoes exec(2), any CLOEXEC file descriptors it has open are "exposed" during a race window between the dumpable flags of the process being reset for exec(2) and CLOEXEC being applied to the file descriptors. This can be exploited by a process by attempting to access /proc//fd/... during this window, without requiring CAP_SYS_PTRACE. The race in question is after set_dumpable has been (for get_link, though the trace is basically the same for readlink): [vfs] -> proc_pid_link_inode_operations.get_link -> proc_pid_get_link -> proc_fd_access_allowed -> ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); Which will return 0, during the race window and CLOEXEC file descriptors will still be open during this window because do_close_on_exec has not been called yet. As a result, the ordering of these calls should be reversed to avoid this race window. This is of particular concern to container runtimes, where joining a PID namespace with file descriptors referring to the host filesystem can result in security issues (since PRCTL_SET_DUMPABLE doesn't protect against access of CLOEXEC file descriptors -- file descriptors which may reference filesystem objects the container shouldn't have access to). Cc: dev@opencontainers.org Cc: # v3.2+ Reported-by: Michael Crosby Signed-off-by: Aleksa Sarai Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 4040d7b173c0..3479fdd4a6e7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -19,7 +19,7 @@ * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary - * formats. + * formats. */ #include @@ -1114,6 +1114,13 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); current->personality &= ~bprm->per_clear; + /* + * We have to apply CLOEXEC before we change whether the process is + * dumpable (in setup_new_exec) to avoid a race with a process in userspace + * trying to access the should-be-closed file descriptors of a process + * undergoing exec(2). + */ + do_close_on_exec(current->files); return 0; out: @@ -1163,7 +1170,6 @@ void setup_new_exec(struct linux_binprm * bprm) group */ current->self_exec_id++; flush_signal_handlers(current, 0); - do_close_on_exec(current->files); } EXPORT_SYMBOL(setup_new_exec); From b35f34f66943fa1b8feadb037d054a141723f7fe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 16 Nov 2016 22:06:51 -0600 Subject: [PATCH 122/199] exec: Ensure mm->user_ns contains the execed files commit f84df2a6f268de584a201e8911384a2d244876e3 upstream. When the user namespace support was merged the need to prevent ptrace from revealing the contents of an unreadable executable was overlooked. Correct this oversight by ensuring that the executed file or files are in mm->user_ns, by adjusting mm->user_ns. Use the new function privileged_wrt_inode_uidgid to see if the executable is a member of the user namespace, and as such if having CAP_SYS_PTRACE in the user namespace should allow tracing the executable. If not update mm->user_ns to the parent user namespace until an appropriate parent is found. Reported-by: Jann Horn Fixes: 9e4a36ece652 ("userns: Fail exec for suid and sgid binaries with ids outside our user namespace.") Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 20 ++++++++++++++++++-- include/linux/capability.h | 1 + kernel/capability.c | 16 ++++++++++++++-- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 3479fdd4a6e7..3a6de10d3891 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -1130,8 +1131,22 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file_inode(file), MAY_READ) < 0) + struct inode *inode = file_inode(file); + if (inode_permission(inode, MAY_READ) < 0) { + struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + + /* Ensure mm->user_ns contains the executable */ + user_ns = old = bprm->mm->user_ns; + while ((user_ns != &init_user_ns) && + !privileged_wrt_inode_uidgid(user_ns, inode)) + user_ns = user_ns->parent; + + if (old != user_ns) { + bprm->mm->user_ns = get_user_ns(user_ns); + put_user_ns(old); + } + } } EXPORT_SYMBOL(would_dump); @@ -1161,7 +1176,6 @@ void setup_new_exec(struct linux_binprm * bprm) !gid_eq(bprm->cred->gid, current_egid())) { current->pdeath_signal = 0; } else { - would_dump(bprm, bprm->file); if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) set_dumpable(current->mm, suid_dumpable); } @@ -1593,6 +1607,8 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out; + would_dump(bprm, bprm->file); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/include/linux/capability.h b/include/linux/capability.h index 7d38697c7e63..2654f75a4c46 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -247,6 +247,7 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) return true; } #endif /* CONFIG_MULTIUSER */ +extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); diff --git a/kernel/capability.c b/kernel/capability.c index dfa0e4528b0b..4984e1f552eb 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -456,6 +456,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, } EXPORT_SYMBOL(file_ns_capable); +/** + * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode? + * @ns: The user namespace in question + * @inode: The inode in question + * + * Return true if the inode uid and gid are within the namespace. + */ +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +{ + return kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); +} + /** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question @@ -469,8 +482,7 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); From c53af76d5de1af844a71e673ae4a02f1786c1b9c Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 28 Sep 2016 12:33:31 +0300 Subject: [PATCH 123/199] usb: gadget: composite: always set ep->mult to a sensible value commit eaa496ffaaf19591fe471a36cef366146eeb9153 upstream. ep->mult is supposed to be set to Isochronous and Interrupt Endapoint's multiplier value. This value is computed from different places depending on the link speed. If we're dealing with HighSpeed, then it's part of bits [12:11] of wMaxPacketSize. This case wasn't taken into consideration before. While at that, also make sure the ep->mult defaults to one so drivers can use it unconditionally and assume they'll never multiply ep->maxpacket to zero. Cc: Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 9 +++++++-- drivers/usb/gadget/function/uvc_video.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index e9bc1d392783..739b5e2d8adb 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -148,7 +148,12 @@ ep_found: _ep->desc = chosen_desc; _ep->comp_desc = NULL; _ep->maxburst = 0; - _ep->mult = 0; + _ep->mult = 1; + + if (g->speed == USB_SPEED_HIGH && (usb_endpoint_xfer_isoc(_ep->desc) || + usb_endpoint_xfer_int(_ep->desc))) + _ep->mult = usb_endpoint_maxp(_ep->desc) & 0x7ff; + if (!want_comp_desc) return 0; @@ -165,7 +170,7 @@ ep_found: switch (usb_endpoint_type(_ep->desc)) { case USB_ENDPOINT_XFER_ISOC: /* mult: bits 1:0 of bmAttributes */ - _ep->mult = comp_desc->bmAttributes & 0x3; + _ep->mult = (comp_desc->bmAttributes & 0x3) + 1; case USB_ENDPOINT_XFER_BULK: case USB_ENDPOINT_XFER_INT: _ep->maxburst = comp_desc->bMaxBurst + 1; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 3d0d5d94a62f..0f01c04d7cbd 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -243,7 +243,7 @@ uvc_video_alloc_requests(struct uvc_video *video) req_size = video->ep->maxpacket * max_t(unsigned int, video->ep->maxburst, 1) - * (video->ep->mult + 1); + * (video->ep->mult); for (i = 0; i < UVC_NUM_REQUESTS; ++i) { video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL); From f0898dc2852b2e12afabff150bbf2390efec0395 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Oct 2016 17:18:48 -0700 Subject: [PATCH 124/199] blk-mq: Do not invoke .queue_rq() for a stopped queue commit bc27c01b5c46d3bfec42c96537c7a3fae0bb2cc4 upstream. The meaning of the BLK_MQ_S_STOPPED flag is "do not call .queue_rq()". Hence modify blk_mq_make_request() such that requests are queued instead of issued if a queue has been stopped. Reported-by: Ming Lei Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c3e461ec40e4..9f99a01b00e8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1313,9 +1313,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); if (!old_rq) goto done; - if (!blk_mq_direct_issue_request(old_rq, &cookie)) - goto done; - blk_mq_insert_request(old_rq, false, true, true); + if (test_bit(BLK_MQ_S_STOPPED, &data.hctx->state) || + blk_mq_direct_issue_request(old_rq, &cookie) != 0) + blk_mq_insert_request(old_rq, false, true, true); goto done; } From 9188611f81e3046cad7703303dbcf6f26c8e4d4d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 14:09:27 +0000 Subject: [PATCH 125/199] dm flakey: return -EINVAL on interval bounds error in flakey_ctr() commit bff7e067ee518f9ed7e1cbc63e4c9e01670d0b71 upstream. Fix to return error code -EINVAL instead of 0, as is done elsewhere in this function. Fixes: e80d1c805a3b ("dm: do not override error code returned from dm_get_device()") Signed-off-by: Wei Yongjun Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-flakey.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 8e9e928dafba..78f403b45ab3 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -200,11 +200,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!(fc->up_interval + fc->down_interval)) { ti->error = "Total (up + down) interval is zero"; + r = -EINVAL; goto bad; } if (fc->up_interval + fc->down_interval < fc->up_interval) { ti->error = "Interval overflow"; + r = -EINVAL; goto bad; } From 85290a163b0661c2d7576c18ee49b32bf10ec32f Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Wed, 2 Nov 2016 15:02:08 +0100 Subject: [PATCH 126/199] dm crypt: mark key as invalid until properly loaded commit 265e9098bac02bc5e36cda21fdbad34cb5b2f48d upstream. In crypt_set_key(), if a failure occurs while replacing the old key (e.g. tfm->setkey() fails) the key must not have DM_CRYPT_KEY_VALID flag set. Otherwise, the crypto layer would have an invalid key that still has DM_CRYPT_KEY_VALID flag set. Signed-off-by: Ondrej Kozina Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5cac11d7a876..de628883ee3d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1500,12 +1500,15 @@ static int crypt_set_key(struct crypt_config *cc, char *key) if (!cc->key_size && strcmp(key, "-")) goto out; + /* clear the flag since following operations may invalidate previously valid key */ + clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); + if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) goto out; - set_bit(DM_CRYPT_KEY_VALID, &cc->flags); - r = crypt_setkey_allcpus(cc); + if (!r) + set_bit(DM_CRYPT_KEY_VALID, &cc->flags); out: /* Hex key string not needed after here, so wipe it. */ From 701ec6e5cea7ed6508999a25934af807767fe545 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Wed, 30 Nov 2016 17:56:14 -0600 Subject: [PATCH 127/199] dm space map metadata: fix 'struct sm_metadata' leak on failed create commit 314c25c56c1ee5026cf99c570bdfe01847927acb upstream. In dm_sm_metadata_create() we temporarily change the dm_space_map operations from 'ops' (whose .destroy function deallocates the sm_metadata) to 'bootstrap_ops' (whose .destroy function doesn't). If dm_sm_metadata_create() fails in sm_ll_new_metadata() or sm_ll_extend(), it exits back to dm_tm_create_internal(), which calls dm_sm_destroy() with the intention of freeing the sm_metadata, but it doesn't (because the dm_space_map operations is still set to 'bootstrap_ops'). Fix this by setting the dm_space_map operations back to 'ops' if dm_sm_metadata_create() fails when it is set to 'bootstrap_ops'. Signed-off-by: Benjamin Marzinski Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- .../md/persistent-data/dm-space-map-metadata.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 7e44005595c1..20557e2c60c6 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -775,16 +775,14 @@ int dm_sm_metadata_create(struct dm_space_map *sm, memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); r = sm_ll_new_metadata(&smm->ll, tm); - if (r) - return r; - - if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) - nr_blocks = DM_SM_METADATA_MAX_BLOCKS; - r = sm_ll_extend(&smm->ll, nr_blocks); - if (r) - return r; - + if (!r) { + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; + r = sm_ll_extend(&smm->ll, nr_blocks); + } memcpy(&smm->sm, &ops, sizeof(smm->sm)); + if (r) + return r; /* * Now we need to update the newly created data structures with the From 6dcb01e7833369acd7ff3b54f49a0a99f4fdf6c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 25 Nov 2016 16:54:06 +0100 Subject: [PATCH 128/199] ASoC: intel: Fix crash at suspend/resume without card registration commit 2fc995a87f2efcd803438f07bfecd35cc3d90d32 upstream. When ASoC Intel SST Medfield driver is probed but without codec / card assigned, it causes an Oops and freezes the kernel at suspend/resume, PM: Suspending system (freeze) Suspending console(s) (use no_console_suspend to debug) BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] sst_soc_prepare+0x19/0xa0 [snd_soc_sst_mfld_platform] Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 1552 Comm: systemd-sleep Tainted: G W 4.9.0-rc6-1.g5f5c2ad-default #1 Call Trace: [] dpm_prepare+0x209/0x460 [] dpm_suspend_start+0x11/0x60 [] suspend_devices_and_enter+0xb2/0x710 [] pm_suspend+0x30e/0x390 [] state_store+0x8a/0x90 [] kobj_attr_store+0xf/0x20 [] sysfs_kf_write+0x37/0x40 [] kernfs_fop_write+0x11c/0x1b0 [] __vfs_write+0x28/0x140 [] ? apparmor_file_permission+0x18/0x20 [] ? security_file_permission+0x3b/0xc0 [] vfs_write+0xb5/0x1a0 [] SyS_write+0x46/0xa0 [] entry_SYSCALL_64_fastpath+0x1e/0xad Add proper NULL checks in the PM code of mdfld driver. Signed-off-by: Takashi Iwai Acked-by: Vinod Koul Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 0487cfaac538..2b96b11fbe71 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -762,6 +762,9 @@ static int sst_soc_prepare(struct device *dev) struct sst_data *drv = dev_get_drvdata(dev); int i; + if (!drv->soc_card) + return 0; + /* suspend all pcms first */ snd_soc_suspend(drv->soc_card->dev); snd_soc_poweroff(drv->soc_card->dev); @@ -784,6 +787,9 @@ static void sst_soc_complete(struct device *dev) struct sst_data *drv = dev_get_drvdata(dev); int i; + if (!drv->soc_card) + return; + /* restart SSPs */ for (i = 0; i < drv->soc_card->num_rtd; i++) { struct snd_soc_dai *dai = drv->soc_card->rtd[i].cpu_dai; From f0b715409cb9cf7e21e690f9b163047739761962 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 4 Nov 2016 11:50:31 -0700 Subject: [PATCH 129/199] CIFS: Fix a possible memory corruption during reconnect commit 53e0e11efe9289535b060a51d4cf37c25e0d0f2b upstream. We can not unlock/lock cifs_tcp_ses_lock while walking through ses and tcon lists because it can corrupt list iterator pointers and a tcon structure can be released if we don't hold an extra reference. Fix it by moving a reconnect process to a separate delayed work and acquiring a reference to every tcon that needs to be reconnected. Also do not send an echo request on newly established connections. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 3 ++ fs/cifs/cifsproto.h | 3 ++ fs/cifs/connect.c | 34 +++++++++++++++----- fs/cifs/smb2pdu.c | 75 ++++++++++++++++++++++++++++++--------------- fs/cifs/smb2proto.h | 1 + 5 files changed, 85 insertions(+), 31 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c669a1471395..b76883606e4b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -627,6 +627,8 @@ struct TCP_Server_Info { #ifdef CONFIG_CIFS_SMB2 unsigned int max_read; unsigned int max_write; + struct delayed_work reconnect; /* reconnect workqueue job */ + struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ #endif /* CONFIG_CIFS_SMB2 */ }; @@ -826,6 +828,7 @@ cap_unix(struct cifs_ses *ses) struct cifs_tcon { struct list_head tcon_list; int tc_count; + struct list_head rlist; /* reconnect list */ struct list_head openFileList; spinlock_t open_file_lock; /* protects list above */ struct cifs_ses *ses; /* pointer to session associated with */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c63fd1dde25b..54590fd33df1 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -205,6 +205,9 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink, struct cifs_pending_open *open); extern void cifs_del_pending_open(struct cifs_pending_open *open); +extern void cifs_put_tcp_session(struct TCP_Server_Info *server, + int from_reconnect); +extern void cifs_put_tcon(struct cifs_tcon *tcon); #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 812a8cb07c63..5d59f25521ce 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -52,6 +52,9 @@ #include "nterr.h" #include "rfc1002pdu.h" #include "fscache.h" +#ifdef CONFIG_CIFS_SMB2 +#include "smb2proto.h" +#endif #define CIFS_PORT 445 #define RFC1001_PORT 139 @@ -2113,8 +2116,8 @@ cifs_find_tcp_session(struct smb_vol *vol) return NULL; } -static void -cifs_put_tcp_session(struct TCP_Server_Info *server) +void +cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) { struct task_struct *task; @@ -2131,6 +2134,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) cancel_delayed_work_sync(&server->echo); +#ifdef CONFIG_CIFS_SMB2 + if (from_reconnect) + /* + * Avoid deadlock here: reconnect work calls + * cifs_put_tcp_session() at its end. Need to be sure + * that reconnect work does nothing with server pointer after + * that step. + */ + cancel_delayed_work(&server->reconnect); + else + cancel_delayed_work_sync(&server->reconnect); +#endif + spin_lock(&GlobalMid_Lock); server->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); @@ -2195,6 +2211,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info) INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); +#ifdef CONFIG_CIFS_SMB2 + INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); + mutex_init(&tcp_ses->reconnect_mutex); +#endif memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, sizeof(tcp_ses->srcaddr)); memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, @@ -2347,7 +2367,7 @@ cifs_put_smb_ses(struct cifs_ses *ses) spin_unlock(&cifs_tcp_ses_lock); sesInfoFree(ses); - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); } #ifdef CONFIG_KEYS @@ -2521,7 +2541,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) mutex_unlock(&ses->session_mutex); /* existing SMB ses has a server reference already */ - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); free_xid(xid); return ses; } @@ -2611,7 +2631,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char *unc) return NULL; } -static void +void cifs_put_tcon(struct cifs_tcon *tcon) { unsigned int xid; @@ -3767,7 +3787,7 @@ mount_fail_check: else if (ses) cifs_put_smb_ses(ses); else - cifs_put_tcp_session(server); + cifs_put_tcp_session(server, 0); bdi_destroy(&cifs_sb->bdi); } @@ -4078,7 +4098,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); if (IS_ERR(ses)) { tcon = (struct cifs_tcon *)ses; - cifs_put_tcp_session(master_tcon->ses->server); + cifs_put_tcp_session(master_tcon->ses->server, 0); goto out; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0dbbdf5e4aee..dbfe7310b41b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1822,6 +1822,54 @@ smb2_echo_callback(struct mid_q_entry *mid) add_credits(server, credits_received, CIFS_ECHO_OP); } +void smb2_reconnect_server(struct work_struct *work) +{ + struct TCP_Server_Info *server = container_of(work, + struct TCP_Server_Info, reconnect.work); + struct cifs_ses *ses; + struct cifs_tcon *tcon, *tcon2; + struct list_head tmp_list; + int tcon_exist = false; + + /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ + mutex_lock(&server->reconnect_mutex); + + INIT_LIST_HEAD(&tmp_list); + cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->need_reconnect) { + tcon->tc_count++; + list_add_tail(&tcon->rlist, &tmp_list); + tcon_exist = true; + } + } + } + /* + * Get the reference to server struct to be sure that the last call of + * cifs_put_tcon() in the loop below won't release the server pointer. + */ + if (tcon_exist) + server->srv_count++; + + spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) { + smb2_reconnect(SMB2_ECHO, tcon); + list_del_init(&tcon->rlist); + cifs_put_tcon(tcon); + } + + cifs_dbg(FYI, "Reconnecting tcons finished\n"); + mutex_unlock(&server->reconnect_mutex); + + /* now we can safely release srv struct */ + if (tcon_exist) + cifs_put_tcp_session(server, 1); +} + int SMB2_echo(struct TCP_Server_Info *server) { @@ -1834,32 +1882,11 @@ SMB2_echo(struct TCP_Server_Info *server) cifs_dbg(FYI, "In echo request\n"); if (server->tcpStatus == CifsNeedNegotiate) { - struct list_head *tmp, *tmp2; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - - cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); - list_for_each(tmp2, &ses->tcon_list) { - tcon = list_entry(tmp2, struct cifs_tcon, - tcon_list); - /* add check for persistent handle reconnect */ - if (tcon && tcon->need_reconnect) { - spin_unlock(&cifs_tcp_ses_lock); - rc = smb2_reconnect(SMB2_ECHO, tcon); - spin_lock(&cifs_tcp_ses_lock); - } - } - } - spin_unlock(&cifs_tcp_ses_lock); + /* No need to send echo on newly established connections */ + queue_delayed_work(cifsiod_wq, &server->reconnect, 0); + return rc; } - /* if no session, renegotiate failed above */ - if (server->tcpStatus == CifsNeedNegotiate) - return -EIO; - rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) return rc; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 9bc59f9c12fb..0a406ae78129 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -95,6 +95,7 @@ extern int smb2_open_file(const unsigned int xid, extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void smb2_reconnect_server(struct work_struct *work); /* * SMB2 Worker functions - most of protocol specific implementation details From a9c69e1528976adae719dd5e58a571e8ad90efba Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 29 Nov 2016 11:30:58 -0800 Subject: [PATCH 130/199] CIFS: Fix missing nls unload in smb2_reconnect() commit 4772c79599564bd08ee6682715a7d3516f67433f upstream. Acked-by: Sachin Prabhu Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index dbfe7310b41b..2fa754c5fd62 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -278,7 +278,7 @@ out: case SMB2_CHANGE_NOTIFY: case SMB2_QUERY_INFO: case SMB2_SET_INFO: - return -EAGAIN; + rc = -EAGAIN; } unload_nls(nls_codepage); return rc; From 597f9c03b2d517545518c0e4aa23fce43bbd870c Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Tue, 29 Nov 2016 16:14:43 -0800 Subject: [PATCH 131/199] CIFS: Fix a possible memory corruption in push locks commit e3d240e9d505fc67f8f8735836df97a794bbd946 upstream. If maxBuf is not 0 but less than a size of SMB2 lock structure we can end up with a memory corruption. Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index f9e766f464be..b2aff0c6f22c 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -260,7 +260,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) * and check it for zero before using. */ max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; - if (!max_buf) { + if (max_buf < sizeof(struct smb2_lock_element)) { free_xid(xid); return -EINVAL; } From f2b8b3455b22405237110d929f107318a535a480 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 14 Dec 2016 15:04:04 -0800 Subject: [PATCH 132/199] kernel/watchdog: use nmi registers snapshot in hardlockup handler commit 4d1f0fb096aedea7bb5489af93498a82e467c480 upstream. NMI handler doesn't call set_irq_regs(), it's set only by normal IRQ. Thus get_irq_regs() returns NULL or stale registers snapshot with IP/SP pointing to the code interrupted by IRQ which was interrupted by NMI. NULL isn't a problem: in this case watchdog calls dump_stack() and prints full stack trace including NMI. But if we're stuck in IRQ handler then NMI watchlog will print stack trace without IRQ part at all. This patch uses registers snapshot passed into NMI handler as arguments: these registers point exactly to the instruction interrupted by NMI. Fixes: 55537871ef66 ("kernel/watchdog.c: perform all-CPU backtrace in case of hard lockup") Link: http://lkml.kernel.org/r/146771764784.86724.6006627197118544150.stgit@buzz Signed-off-by: Konstantin Khlebnikov Cc: Jiri Kosina Cc: Ulrich Obergfell Cc: Aaron Tomlin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/watchdog.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 198137b1cadc..c1e0b5f429b6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -328,7 +328,6 @@ static void watchdog_overflow_callback(struct perf_event *event, */ if (is_hardlockup()) { int this_cpu = smp_processor_id(); - struct pt_regs *regs = get_irq_regs(); /* only print hardlockups once */ if (__this_cpu_read(hard_watchdog_warn) == true) From f93777c915446c772761b7d8f6eb7ef49eca4e63 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 14 Dec 2016 15:05:49 -0800 Subject: [PATCH 133/199] kernel/debug/debug_core.c: more properly delay for secondary CPUs commit 2d13bb6494c807bcf3f78af0e96c0b8615a94385 upstream. We've got a delay loop waiting for secondary CPUs. That loop uses loops_per_jiffy. However, loops_per_jiffy doesn't actually mean how many tight loops make up a jiffy on all architectures. It is quite common to see things like this in the boot log: Calibrating delay loop (skipped), value calculated using timer frequency.. 48.00 BogoMIPS (lpj=24000) In my case I was seeing lots of cases where other CPUs timed out entering the debugger only to print their stack crawls shortly after the kdb> prompt was written. Elsewhere in kgdb we already use udelay(), so that should be safe enough to use to implement our timeout. We'll delay 1 ms for 1000 times, which should give us a full second of delay (just like the old code wanted) but allow us to notice that we're done every 1 ms. [akpm@linux-foundation.org: simplifications, per Daniel] Link: http://lkml.kernel.org/r/1477091361-2039-1-git-send-email-dianders@chromium.org Signed-off-by: Douglas Anderson Reviewed-by: Daniel Thompson Cc: Jason Wessel Cc: Brian Norris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/debug/debug_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0874e2edd275..79517e5549f1 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -598,11 +598,11 @@ return_normal: /* * Wait for the other CPUs to be notified and be waiting for us: */ - time_left = loops_per_jiffy * HZ; + time_left = MSEC_PER_SEC; while (kgdb_do_roundup && --time_left && (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != online_cpus) - cpu_relax(); + udelay(1000); if (!time_left) pr_crit("Timed out waiting for secondary CPUs.\n"); From 00a0de085d6011fc4f5a94a428da529083c95380 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 26 Oct 2016 16:28:45 -0600 Subject: [PATCH 134/199] tpm xen: Remove bogus tpm_chip_unregister commit 1f0f30e404b3d8f4597a2d9b77fba55452f8fd0e upstream. tpm_chip_unregister can only be called after tpm_chip_register. devm manages the allocation so no unwind is needed here. Fixes: afb5abc262e96 ("tpm: two-phase chip management functions") Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/xen-tpmfront.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 3111f2778079..849f2e29c243 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -305,7 +305,6 @@ static int tpmfront_probe(struct xenbus_device *dev, rv = setup_ring(dev, priv); if (rv) { chip = dev_get_drvdata(&dev->dev); - tpm_chip_unregister(chip); ring_free(priv); return rv; } From 9397e79c4b2dc3ef0666b9a832941e87e701eeac Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 21 Nov 2016 09:56:06 -0500 Subject: [PATCH 135/199] xen/gntdev: Use VM_MIXEDMAP instead of VM_IO to avoid NUMA balancing commit 30faaafdfa0c754c91bac60f216c9f34a2bfdf7e upstream. Commit 9c17d96500f7 ("xen/gntdev: Grant maps should not be subject to NUMA balancing") set VM_IO flag to prevent grant maps from being subjected to NUMA balancing. It was discovered recently that this flag causes get_user_pages() to always fail with -EFAULT. check_vma_flags __get_user_pages __get_user_pages_locked __get_user_pages_unlocked get_user_pages_fast iov_iter_get_pages dio_refill_pages do_direct_IO do_blockdev_direct_IO do_blockdev_direct_IO ext4_direct_IO_read generic_file_read_iter aio_run_iocb (which can happen if guest's vdisk has direct-io-safe option). To avoid this let's use VM_MIXEDMAP flag instead --- it prevents NUMA balancing just as VM_IO does and has no effect on check_vma_flags(). Reported-by: Olaf Hering Suggested-by: Hugh Dickins Signed-off-by: Boris Ostrovsky Acked-by: Hugh Dickins Tested-by: Olaf Hering Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1be5dd048622..308600adf6e0 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -804,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_ops = &gntdev_vmops; - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP; if (use_ptemod) vma->vm_flags |= VM_DONTCOPY; From fdb17ddd0a499eaf08261dcedb0aee496297526a Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 7 Dec 2016 12:24:40 +0000 Subject: [PATCH 136/199] arm/xen: Use alloc_percpu rather than __alloc_percpu commit 24d5373dda7c00a438d26016bce140299fae675e upstream. The function xen_guest_init is using __alloc_percpu with an alignment which are not power of two. However, the percpu allocator never supported alignments which are not power of two and has always behaved incorectly in thise case. Commit 3ca45a4 "percpu: ensure requested alignment is power of two" introduced a check which trigger a warning [1] when booting linux-next on Xen. But in reality this bug was always present. This can be fixed by replacing the call to __alloc_percpu with alloc_percpu. The latter will use an alignment which are a power of two. [1] [ 0.023921] illegal size (48) or align (48) for percpu allocation [ 0.024167] ------------[ cut here ]------------ [ 0.024344] WARNING: CPU: 0 PID: 1 at linux/mm/percpu.c:892 pcpu_alloc+0x88/0x6c0 [ 0.024584] Modules linked in: [ 0.024708] [ 0.024804] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc7-next-20161128 #473 [ 0.025012] Hardware name: Foundation-v8A (DT) [ 0.025162] task: ffff80003d870000 task.stack: ffff80003d844000 [ 0.025351] PC is at pcpu_alloc+0x88/0x6c0 [ 0.025490] LR is at pcpu_alloc+0x88/0x6c0 [ 0.025624] pc : [] lr : [] pstate: 60000045 [ 0.025830] sp : ffff80003d847cd0 [ 0.025946] x29: ffff80003d847cd0 x28: 0000000000000000 [ 0.026147] x27: 0000000000000000 x26: 0000000000000000 [ 0.026348] x25: 0000000000000000 x24: 0000000000000000 [ 0.026549] x23: 0000000000000000 x22: 00000000024000c0 [ 0.026752] x21: ffff000008e97000 x20: 0000000000000000 [ 0.026953] x19: 0000000000000030 x18: 0000000000000010 [ 0.027155] x17: 0000000000000a3f x16: 00000000deadbeef [ 0.027357] x15: 0000000000000006 x14: ffff000088f79c3f [ 0.027573] x13: ffff000008f79c4d x12: 0000000000000041 [ 0.027782] x11: 0000000000000006 x10: 0000000000000042 [ 0.027995] x9 : ffff80003d847a40 x8 : 6f697461636f6c6c [ 0.028208] x7 : 6120757063726570 x6 : ffff000008f79c84 [ 0.028419] x5 : 0000000000000005 x4 : 0000000000000000 [ 0.028628] x3 : 0000000000000000 x2 : 000000000000017f [ 0.028840] x1 : ffff80003d870000 x0 : 0000000000000035 [ 0.029056] [ 0.029152] ---[ end trace 0000000000000000 ]--- [ 0.029297] Call trace: [ 0.029403] Exception stack(0xffff80003d847b00 to 0xffff80003d847c30) [ 0.029621] 7b00: 0000000000000030 0001000000000000 ffff80003d847cd0 ffff00000818e678 [ 0.029901] 7b20: 0000000000000002 0000000000000004 ffff000008f7c060 0000000000000035 [ 0.030153] 7b40: ffff000008f79000 ffff000008c4cd88 ffff80003d847bf0 ffff000008101778 [ 0.030402] 7b60: 0000000000000030 0000000000000000 ffff000008e97000 00000000024000c0 [ 0.030647] 7b80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 0.030895] 7ba0: 0000000000000035 ffff80003d870000 000000000000017f 0000000000000000 [ 0.031144] 7bc0: 0000000000000000 0000000000000005 ffff000008f79c84 6120757063726570 [ 0.031394] 7be0: 6f697461636f6c6c ffff80003d847a40 0000000000000042 0000000000000006 [ 0.031643] 7c00: 0000000000000041 ffff000008f79c4d ffff000088f79c3f 0000000000000006 [ 0.031877] 7c20: 00000000deadbeef 0000000000000a3f [ 0.032051] [] pcpu_alloc+0x88/0x6c0 [ 0.032229] [] __alloc_percpu+0x18/0x20 [ 0.032409] [] xen_guest_init+0x174/0x2f4 [ 0.032591] [] do_one_initcall+0x38/0x130 [ 0.032783] [] kernel_init_freeable+0xe0/0x248 [ 0.032995] [] kernel_init+0x10/0x100 [ 0.033172] [] ret_from_fork+0x10/0x50 Reported-by: Wei Chen Link: https://lkml.org/lkml/2016/11/28/669 Signed-off-by: Julien Grall Signed-off-by: Stefano Stabellini Reviewed-by: Stefano Stabellini Signed-off-by: Greg Kroah-Hartman --- arch/arm/xen/enlighten.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index fc7ea529f462..52c8c1f642fe 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -239,8 +239,7 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), - sizeof(struct vcpu_info)); + xen_vcpu_info = alloc_percpu(struct vcpu_info); if (xen_vcpu_info == NULL) return -ENOMEM; From f85a337b4066e56b0af63b4984bacae1b17dd12a Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 5 Dec 2016 12:31:06 +1100 Subject: [PATCH 137/199] xfs: set AGI buffer type in xlog_recover_clear_agi_bucket commit 6b10b23ca94451fae153a5cc8d62fd721bec2019 upstream. xlog_recover_clear_agi_bucket didn't set the type to XFS_BLFT_AGI_BUF, so we got a warning during log replay (or an ASSERT on a debug build). XFS (md0): Unknown buffer type 0! XFS (md0): _xfs_buf_ioapply: no ops on block 0xaea8802/0x1 Fix this, as was done in f19b872b for 2 other locations with the same problem. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_log_recover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 5991cdcb9040..8cab78eeb0c2 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3980,6 +3980,7 @@ xlog_recover_clear_agi_bucket( agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); + xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); From 37de955c11b59050346e530143c20b10b4846527 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 10 Jul 2016 19:27:36 +0800 Subject: [PATCH 138/199] driver core: fix race between creating/querying glue dir and its cleanup commit cebf8fd16900fdfd58c0028617944f808f97fe50 upstream. The global mutex of 'gdp_mutex' is used to serialize creating/querying glue dir and its cleanup. Turns out it isn't a perfect way because part(kobj_kset_leave()) of the actual cleanup action() is done inside the release handler of the glue dir kobject. That means gdp_mutex has to be held before releasing the last reference count of the glue dir kobject. This patch moves glue dir's cleanup after kobject_del() in device_del() for avoiding the race. Cc: Yijing Wang Reported-by: Chandra Sekhar Lingutla Signed-off-by: Ming Lei Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b7d56c5ea3c6..f18856f5954b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -836,11 +836,29 @@ static struct kobject *get_device_parent(struct device *dev, return NULL; } +static inline bool live_in_glue_dir(struct kobject *kobj, + struct device *dev) +{ + if (!kobj || !dev->class || + kobj->kset != &dev->class->p->glue_dirs) + return false; + return true; +} + +static inline struct kobject *get_glue_dir(struct device *dev) +{ + return dev->kobj.parent; +} + +/* + * make sure cleaning up dir as the last step, we need to make + * sure .release handler of kobject is run with holding the + * global lock + */ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { /* see if we live in a "glue" directory */ - if (!glue_dir || !dev->class || - glue_dir->kset != &dev->class->p->glue_dirs) + if (!live_in_glue_dir(glue_dir, dev)) return; mutex_lock(&gdp_mutex); @@ -848,11 +866,6 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) mutex_unlock(&gdp_mutex); } -static void cleanup_device_parent(struct device *dev) -{ - cleanup_glue_dir(dev, dev->kobj.parent); -} - static int device_add_class_symlinks(struct device *dev) { struct device_node *of_node = dev_of_node(dev); @@ -1028,6 +1041,7 @@ int device_add(struct device *dev) struct kobject *kobj; struct class_interface *class_intf; int error = -EINVAL; + struct kobject *glue_dir = NULL; dev = get_device(dev); if (!dev) @@ -1072,8 +1086,10 @@ int device_add(struct device *dev) /* first, register with generic layer. */ /* we require the name to be set before, and pass NULL */ error = kobject_add(&dev->kobj, dev->kobj.parent, NULL); - if (error) + if (error) { + glue_dir = get_glue_dir(dev); goto Error; + } /* notify platform of device entry */ if (platform_notify) @@ -1154,9 +1170,10 @@ done: device_remove_file(dev, &dev_attr_uevent); attrError: kobject_uevent(&dev->kobj, KOBJ_REMOVE); + glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); Error: - cleanup_device_parent(dev); + cleanup_glue_dir(dev, glue_dir); put_device(parent); name_error: kfree(dev->p); @@ -1232,6 +1249,7 @@ EXPORT_SYMBOL_GPL(put_device); void device_del(struct device *dev) { struct device *parent = dev->parent; + struct kobject *glue_dir = NULL; struct class_interface *class_intf; /* Notify clients of device removal. This call must come @@ -1276,8 +1294,9 @@ void device_del(struct device *dev) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_REMOVED_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_REMOVE); - cleanup_device_parent(dev); + glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); + cleanup_glue_dir(dev, glue_dir); put_device(parent); } EXPORT_SYMBOL_GPL(device_del); From 7a2b4ee54b3f23668fa366a4b0e04d351f809bb0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 5 Jul 2016 22:12:36 -0700 Subject: [PATCH 139/199] ppp: defer netns reference release for ppp channel commit 205e1e255c479f3fd77446415706463b282f94e4 upstream. Matt reported that we have a NULL pointer dereference in ppp_pernet() from ppp_connect_channel(), i.e. pch->chan_net is NULL. This is due to that a parallel ppp_unregister_channel() could happen while we are in ppp_connect_channel(), during which pch->chan_net set to NULL. Since we need a reference to net per channel, it makes sense to sync the refcnt with the life time of the channel, therefore we should release this reference when we destroy it. Fixes: 1f461dcdd296 ("ppp: take reference on channels netns") Reported-by: Matt Bennett Cc: Paul Mackerras Cc: linux-ppp@vger.kernel.org Cc: Guillaume Nault Cc: Cyrill Gorcunov Signed-off-by: Cong Wang Reviewed-by: Cyrill Gorcunov Signed-off-by: David S. Miller Cc: bmajal222 Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/ppp_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 174e06ec7c2f..e5bb870b5461 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2390,8 +2390,6 @@ ppp_unregister_channel(struct ppp_channel *chan) spin_lock_bh(&pn->all_channels_lock); list_del(&pch->list); spin_unlock_bh(&pn->all_channels_lock); - put_net(pch->chan_net); - pch->chan_net = NULL; pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); @@ -2984,6 +2982,9 @@ ppp_disconnect_channel(struct channel *pch) */ static void ppp_destroy_channel(struct channel *pch) { + put_net(pch->chan_net); + pch->chan_net = NULL; + atomic_dec(&channel_count); if (!pch->file.dead) { From a3edc7b2e537e36bb26c94fa9efcc249ef3a5862 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Jan 2017 11:16:31 +0100 Subject: [PATCH 140/199] Linux 4.4.40 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 88d26a632bef..5b5937780408 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 39 +SUBLEVEL = 40 EXTRAVERSION = NAME = Blurry Fish Butt From f5d90f434c9a622a23719347677da010bc1e6d12 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 5 Nov 2016 14:08:57 -0500 Subject: [PATCH 141/199] ssb: Fix error routine when fallback SPROM fails commit 8052d7245b6089992343c80b38b14dbbd8354651 upstream. When there is a CRC error in the SPROM read from the device, the code attempts to handle a fallback SPROM. When this also fails, the driver returns zero rather than an error code. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/ssb/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index 0f28c08fcb3c..77b551da5728 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -909,6 +909,7 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus, if (err) { ssb_warn("WARNING: Using fallback SPROM failed (err %d)\n", err); + goto out_free; } else { ssb_dbg("Using SPROM revision %d provided by platform\n", sprom->revision); From 05f4183ab5a450de5fd0033c8cfd41223cbd942d Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 26 Nov 2016 14:43:35 -0600 Subject: [PATCH 142/199] rtlwifi: Fix enter/exit power_save commit ba9f93f82abafe2552eac942ebb11c2df4f8dd7f upstream. In commit a5ffbe0a1993 ("rtlwifi: Fix scheduling while atomic bug") and commit a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue"), an error was introduced in the power-save routines due to the fact that leaving PS was delayed by the use of a work queue. This problem is fixed by detecting if the enter or leave routines are in interrupt mode. If so, the workqueue is used to place the request. If in normal mode, the enter or leave routines are called directly. Fixes: a269913c52ad ("rtlwifi: Rework rtl_lps_leave() and rtl_lps_enter() to use work queue") Reported-by: Ping-Ke Shih Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/base.c | 8 ++--- drivers/net/wireless/realtek/rtlwifi/core.c | 9 ++---- drivers/net/wireless/realtek/rtlwifi/pci.c | 14 +++----- drivers/net/wireless/realtek/rtlwifi/ps.c | 36 ++++++++++++++++----- 4 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index 7a40d8dffa36..aab752328c26 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1303,12 +1303,13 @@ EXPORT_SYMBOL_GPL(rtl_action_proc); static void setup_arp_tx(struct rtl_priv *rtlpriv, struct rtl_ps_ctl *ppsc) { + struct ieee80211_hw *hw = rtlpriv->hw; + rtlpriv->ra.is_special_data = true; if (rtlpriv->cfg->ops->get_btc_status()) rtlpriv->btcoexist.btc_ops->btc_special_packet_notify( rtlpriv, 1); - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } @@ -1381,8 +1382,7 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx, if (is_tx) { rtlpriv->ra.is_special_data = true; - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); ppsc->last_delaylps_stamp_jiffies = jiffies; } diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index c925a4dff599..e36d8c456275 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -1153,10 +1153,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, } else { mstatus = RT_MEDIA_DISCONNECT; - if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + if (mac->link_state == MAC80211_LINKED) + rtl_lps_leave(hw); if (ppsc->p2p_ps_info.p2p_ps_mode > P2P_PS_NONE) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); mac->link_state = MAC80211_NOLINK; @@ -1432,8 +1430,7 @@ static void rtl_op_sw_scan_start(struct ieee80211_hw *hw, } if (mac->link_state == MAC80211_LINKED) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); + rtl_lps_leave(hw); mac->link_state = MAC80211_LINKED_SCANNING; } else { rtl_ips_nic_on(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 5b4048041147..a52230377e2c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -664,11 +664,9 @@ tx_status_ok: } if (((rtlpriv->link_info.num_rx_inperiod + - rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + rtlpriv->link_info.num_tx_inperiod) > 8) || + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); } static int _rtl_pci_init_one_rxdesc(struct ieee80211_hw *hw, @@ -919,10 +917,8 @@ new_trx_end: } if (((rtlpriv->link_info.num_rx_inperiod + rtlpriv->link_info.num_tx_inperiod) > 8) || - (rtlpriv->link_info.num_rx_inperiod > 2)) { - rtlpriv->enter_ps = false; - schedule_work(&rtlpriv->works.lps_change_work); - } + (rtlpriv->link_info.num_rx_inperiod > 2)) + rtl_lps_leave(hw); skb = new_skb; no_new: if (rtlpriv->use_new_trx_flow) { diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index b69321d45f04..626ff300352b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -414,8 +414,8 @@ void rtl_lps_set_psmode(struct ieee80211_hw *hw, u8 rt_psmode) } } -/*Enter the leisure power save mode.*/ -void rtl_lps_enter(struct ieee80211_hw *hw) +/* Interrupt safe routine to enter the leisure power save mode.*/ +static void rtl_lps_enter_core(struct ieee80211_hw *hw) { struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -455,10 +455,9 @@ void rtl_lps_enter(struct ieee80211_hw *hw) spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_enter); -/*Leave the leisure power save mode.*/ -void rtl_lps_leave(struct ieee80211_hw *hw) +/* Interrupt safe routine to leave the leisure power save mode.*/ +static void rtl_lps_leave_core(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); @@ -488,7 +487,6 @@ void rtl_lps_leave(struct ieee80211_hw *hw) } spin_unlock_irqrestore(&rtlpriv->locks.lps_lock, flag); } -EXPORT_SYMBOL(rtl_lps_leave); /* For sw LPS*/ void rtl_swlps_beacon(struct ieee80211_hw *hw, void *data, unsigned int len) @@ -681,12 +679,34 @@ void rtl_lps_change_work_callback(struct work_struct *work) struct rtl_priv *rtlpriv = rtl_priv(hw); if (rtlpriv->enter_ps) - rtl_lps_enter(hw); + rtl_lps_enter_core(hw); else - rtl_lps_leave(hw); + rtl_lps_leave_core(hw); } EXPORT_SYMBOL_GPL(rtl_lps_change_work_callback); +void rtl_lps_enter(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_enter_core(hw); + rtlpriv->enter_ps = true; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_enter); + +void rtl_lps_leave(struct ieee80211_hw *hw) +{ + struct rtl_priv *rtlpriv = rtl_priv(hw); + + if (!in_interrupt()) + return rtl_lps_leave_core(hw); + rtlpriv->enter_ps = false; + schedule_work(&rtlpriv->works.lps_change_work); +} +EXPORT_SYMBOL_GPL(rtl_lps_leave); + void rtl_swlps_wq_callback(void *data) { struct rtl_works *rtlworks = container_of_dwork_rtl(data, From b63929e8e130a97303dfacc28f558262f0605708 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Dec 2016 17:22:09 +0100 Subject: [PATCH 143/199] cfg80211/mac80211: fix BSS leaks when abandoning assoc attempts commit e6f462df9acd2a3295e5d34eb29e2823220cf129 upstream. When mac80211 abandons an association attempt, it may free all the data structures, but inform cfg80211 and userspace about it only by sending the deauth frame it received, in which case cfg80211 has no link to the BSS struct that was used and will not cfg80211_unhold_bss() it. Fix this by providing a way to inform cfg80211 of this with the BSS entry passed, so that it can clean up properly, and use this ability in the appropriate places in mac80211. This isn't ideal: some code is more or less duplicated and tracing is missing. However, it's a fairly small change and it's thus easier to backport - cleanups can come later. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/net/cfg80211.h | 11 +++++++++++ net/mac80211/mlme.c | 21 ++++++++++++--------- net/wireless/core.h | 1 + net/wireless/mlme.c | 12 ++++++++++++ net/wireless/sme.c | 14 ++++++++++++++ 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2c7bdb81d30c..b5f3693fe5b6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4257,6 +4257,17 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, */ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); +/** + * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * @dev: network device + * @bss: The BSS entry with which association was abandoned. + * + * Call this whenever - for reasons reported through other API, like deauth RX, + * an association attempt was abandoned. + * This function may sleep. The caller must hold the corresponding wdev's mutex. + */ +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); + /** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame * @dev: network device diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 83097c3832d1..23095d5e0199 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2517,7 +2517,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, } static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, - bool assoc) + bool assoc, bool abandon) { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; @@ -2539,6 +2539,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->mtx); + + if (abandon) + cfg80211_abandon_assoc(sdata->dev, assoc_data->bss); } kfree(assoc_data); @@ -2768,7 +2771,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; @@ -3173,14 +3176,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); return; } @@ -3193,7 +3196,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * recalc after assoc_data is NULL but before associated * is set can cause the interface to go idle */ - ieee80211_destroy_assoc_data(sdata, true); + ieee80211_destroy_assoc_data(sdata, true, false); /* get uapsd queues configuration */ uapsd_queues = 0; @@ -3888,7 +3891,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) .u.mlme.status = MLME_TIMEOUT, }; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); drv_event_callback(sdata->local, sdata, &event); } @@ -4029,7 +4032,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); if (ifmgd->assoc_data) - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, @@ -4905,7 +4908,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code); @@ -4980,7 +4983,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); if (ifmgd->assoc_data) { struct cfg80211_bss *bss = ifmgd->assoc_data->bss; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); } if (ifmgd->auth_data) diff --git a/net/wireless/core.h b/net/wireless/core.h index 47a967fed8ff..47ea169aa0a3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -398,6 +398,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index fb44fa3bf4ef..c0e02f72e931 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) } EXPORT_SYMBOL(cfg80211_assoc_timeout); +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + + cfg80211_sme_abandon_assoc(wdev); + + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); +} +EXPORT_SYMBOL(cfg80211_abandon_assoc); + void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 8020b5b094d4..18b4a652cf41 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -39,6 +39,7 @@ struct cfg80211_conn { CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, + CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; @@ -204,6 +205,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* fall through */ + case CFG80211_CONN_ABANDON: /* free directly, disconnected event already sent */ cfg80211_sme_free(wdev); return 0; @@ -423,6 +426,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ABANDON; + schedule_work(&rdev->conn_work); +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) From dae7cb41483423e2a64b128d73718b8ae5e96d2c Mon Sep 17 00:00:00 2001 From: "Vittorio Gambaletta (VittGam)" Date: Wed, 9 Nov 2016 03:40:56 +0200 Subject: [PATCH 144/199] ath9k: Really fix LED polarity for some Mini PCI AR9220 MB92 cards. commit 79e57dd113d307a6c74773b8aaecf5442068988a upstream. The active_high LED of my Wistron DNMA-92 is still being recognized as active_low on 4.7.6 mainline. When I was preparing my former commit 0f9edcdd88a9 ("ath9k: Fix LED polarity for some Mini PCI AR9220 MB92 cards.") to fix that I must have somehow messed up with testing, because I tested the final version of that patch before sending it, and it was apparently working; but now it is not working on 4.7.6 mainline. I initially added the PCI_DEVICE_SUB section for 0x0029/0x2096 above the PCI_VDEVICE section for 0x0029; but then I moved the former below the latter after seeing how 0x002A sections were sorted in the file. This turned out to be wrong: if a generic PCI_VDEVICE entry (that has both subvendor and subdevice IDs set to PCI_ANY_ID) is put before a more specific one (PCI_DEVICE_SUB), then the generic PCI_VDEVICE entry will match first and will be used. With this patch, 0x0029/0x2096 has finally got active_high LED on 4.7.6. While I'm at it, let's fix 0x002A too by also moving its generic definition below its specific ones. Fixes: 0f9edcdd88a9 ("ath9k: Fix LED polarity for some Mini PCI AR9220 MB92 cards.") Signed-off-by: Vittorio Gambaletta [kvalo@qca.qualcomm.com: improve the commit log based on email discussions] Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c index 7cdaf40c3057..ea7b8c25955f 100644 --- a/drivers/net/wireless/ath/ath9k/pci.c +++ b/drivers/net/wireless/ath/ath9k/pci.c @@ -27,7 +27,6 @@ static const struct pci_device_id ath_pci_id_table[] = { { PCI_VDEVICE(ATHEROS, 0x0023) }, /* PCI */ { PCI_VDEVICE(ATHEROS, 0x0024) }, /* PCI-E */ { PCI_VDEVICE(ATHEROS, 0x0027) }, /* PCI */ - { PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI */ #ifdef CONFIG_ATH9K_PCOEM /* Mini PCI AR9220 MB92 cards: Compex WLM200NX, Wistron DNMA-92 */ @@ -38,7 +37,7 @@ static const struct pci_device_id ath_pci_id_table[] = { .driver_data = ATH9K_PCI_LED_ACT_HI }, #endif - { PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */ + { PCI_VDEVICE(ATHEROS, 0x0029) }, /* PCI */ #ifdef CONFIG_ATH9K_PCOEM { PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS, @@ -86,7 +85,11 @@ static const struct pci_device_id ath_pci_id_table[] = { 0x10CF, /* Fujitsu */ 0x1536), .driver_data = ATH9K_PCI_D3_L1_WAR }, +#endif + { PCI_VDEVICE(ATHEROS, 0x002A) }, /* PCI-E */ + +#ifdef CONFIG_ATH9K_PCOEM /* AR9285 card for Asus */ { PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS, 0x002B, From 842ec27cd3f2d9d1f4a8c5900f2e41be864e9d49 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 2 Dec 2016 15:14:20 +0200 Subject: [PATCH 145/199] mmc: sdhci: Fix recovery from tuning timeout commit 61e53bd0047d58caee0c7170613045bf96de4458 upstream. Clearing the tuning bits should reset the tuning circuit. However there is more to do. Reset the command and data lines for good measure, and then for eMMC ensure the card is not still trying to process a tuning command by sending a stop command. Note the JEDEC eMMC specification says the stop command (CMD12) can be used to stop a tuning command (CMD21) whereas the SD specification is silent on the subject with respect to the SD tuning command (CMD19). Considering that CMD12 is not a valid SDIO command, the stop command is sent only when the tuning command is CMD21 i.e. for eMMC. That addresses cases seen so far which have been on eMMC. Note that this replaces the commit fe5fb2e3b58f ("mmc: sdhci: Reset cmd and data circuits after tuning failure") which is being reverted for v4.9+. Signed-off-by: Adrian Hunter Tested-by: Dan O'Donovan Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 64a428984afe..7277dfd7338f 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2040,7 +2040,27 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) ctrl &= ~SDHCI_CTRL_EXEC_TUNING; sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); + sdhci_do_reset(host, SDHCI_RESET_CMD); + sdhci_do_reset(host, SDHCI_RESET_DATA); + err = -EIO; + + if (cmd.opcode != MMC_SEND_TUNING_BLOCK_HS200) + goto out; + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + + spin_unlock_irqrestore(&host->lock, flags); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = MMC_STOP_TRANSMISSION; + cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; + cmd.busy_timeout = 50; + mmc_wait_for_cmd(mmc, &cmd, 0); + + spin_lock_irqsave(&host->lock, flags); + goto out; } From 96ea1b9ea0996a563b3dd983f636424e92c11143 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Nov 2016 15:22:38 +0100 Subject: [PATCH 146/199] regulator: stw481x-vmmc: fix ages old enable error commit 295070e9aa015abb9b92cccfbb1e43954e938133 upstream. The regulator has never been properly enabled, it has been dormant all the time. It's strange that MMC was working at all, but it likely worked by the signals going through the levelshifter and reaching the card anyways. Fixes: 3615a34ea1a6 ("regulator: add STw481x VMMC driver") Signed-off-by: Linus Walleij Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/stw481x-vmmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/stw481x-vmmc.c b/drivers/regulator/stw481x-vmmc.c index 7d2ae3e9e942..342f5da79975 100644 --- a/drivers/regulator/stw481x-vmmc.c +++ b/drivers/regulator/stw481x-vmmc.c @@ -47,7 +47,8 @@ static struct regulator_desc vmmc_regulator = { .volt_table = stw481x_vmmc_voltages, .enable_time = 200, /* FIXME: look this up */ .enable_reg = STW_CONF1, - .enable_mask = STW_CONF1_PDN_VMMC, + .enable_mask = STW_CONF1_PDN_VMMC | STW_CONF1_MMC_LS_STATUS, + .enable_val = STW_CONF1_PDN_VMMC, .vsel_reg = STW_CONF1, .vsel_mask = STW_CONF1_VMMC_MASK, }; From e01b04be3eb00456d65278cfa56cfd8103872bfb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 8 Dec 2016 20:49:32 +0000 Subject: [PATCH 147/199] timekeeping_Force_unsigned_clocksource_to_nanoseconds_conversion commit 9c1645727b8fa90d07256fdfcc45bf831242a3ab upstream. The clocksource delta to nanoseconds conversion is using signed math, but the delta is unsigned. This makes the conversion space smaller than necessary and in case of a multiplication overflow the conversion can become negative. The conversion is done with scaled math: s64 nsec_delta = ((s64)clkdelta * clk->mult) >> clk->shift; Shifting a signed integer right obvioulsy preserves the sign, which has interesting consequences: - Time jumps backwards - __iter_div_u64_rem() which is used in one of the calling code pathes will take forever to piecewise calculate the seconds/nanoseconds part. This has been reported by several people with different scenarios: David observed that when stopping a VM with a debugger: "It was essentially the stopped by debugger case. I forget exactly why, but the guest was being explicitly stopped from outside, it wasn't just scheduling lag. I think it was something in the vicinity of 10 minutes stopped." When lifting the stop the machine went dead. The stopped by debugger case is not really interesting, but nevertheless it would be a good thing not to die completely. But this was also observed on a live system by Liav: "When the OS is too overloaded, delta will get a high enough value for the msb of the sum delta * tkr->mult + tkr->xtime_nsec to be set, and so after the shift the nsec variable will gain a value similar to 0xffffffffff000000." Unfortunately this has been reintroduced recently with commit 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation"). It had been fixed a year ago already in commit 35a4933a8959 ("time: Avoid signed overflow in timekeeping_get_ns()"). Though it's not surprising that the issue has been reintroduced because the function itself and the whole call chain uses s64 for the result and the propagation of it. The change in this recent commit is subtle: s64 nsec; - nsec = (d * m + n) >> s: + nsec = d * m + n; + nsec >>= s; d being type of cycle_t adds another level of obfuscation. This wouldn't have happened if the previous change to unsigned computation would have made the 'nsec' variable u64 right away and a follow up patch had cleaned up the whole call chain. There have been patches submitted which basically did a revert of the above patch leaving everything else unchanged as signed. Back to square one. This spawned a admittedly pointless discussion about potential users which rely on the unsigned behaviour until someone pointed out that it had been fixed before. The changelogs of said patches added further confusion as they made finally false claims about the consequences for eventual users which expect signed results. Despite delta being cycle_t, aka. u64, it's very well possible to hand in a signed negative value and the signed computation will happily return the correct result. But nobody actually sat down and analyzed the code which was added as user after the propably unintended signed conversion. Though in sensitive code like this it's better to analyze it proper and make sure that nothing relies on this than hunting the subtle wreckage half a year later. After analyzing all call chains it stands that no caller can hand in a negative value (which actually would work due to the s64 cast) and rely on the signed math to do the right thing. Change the conversion function to unsigned math. The conversion of all call chains is done in a follow up patch. This solves the starvation issue, which was caused by the negative result, but it does not solve the underlying problem. It merily procrastinates it. When the timekeeper update is deferred long enough that the unsigned multiplication overflows, then time going backwards is observable again. It does neither solve the issue of clocksources with a small counter width which will wrap around possibly several times and cause random time stamps to be generated. But those are usually not found on systems used for virtualization, so this is likely a non issue. I took the liberty to claim authorship for this simply because analyzing all callsites and writing the changelog took substantially more time than just making the simple s/s64/u64/ change and ignore the rest. Fixes: 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation") Reported-by: David Gibson Reported-by: Liav Rehana Signed-off-by: Thomas Gleixner Reviewed-by: David Gibson Acked-by: Peter Zijlstra (Intel) Cc: Parit Bhargava Cc: Laurent Vivier Cc: "Christopher S. Hall" Cc: Chris Metcalf Cc: Richard Cochran Cc: John Stultz Link: http://lkml.kernel.org/r/20161208204228.688545601@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 445601c580d6..738012d68117 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -298,10 +298,10 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset; static inline u32 arch_gettimeoffset(void) { return 0; } #endif -static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr, +static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, cycle_t delta) { - s64 nsec; + u64 nsec; nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; From 9d9541d44a26dea009247e001ebd7775e2feeb37 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 1 Dec 2016 20:27:21 +0100 Subject: [PATCH 148/199] clk: bcm2835: Avoid overwriting the div info when disabling a pll_div clk commit 68af4fa8f39b542a6cde7ac19518d88e9b3099dc upstream. bcm2835_pll_divider_off() is resetting the divider field in the A2W reg to zero when disabling the clock. Make sure we preserve this value by reading the previous a2w_reg value first and ORing the result with A2W_PLL_CHANNEL_DISABLE. Signed-off-by: Boris Brezillon Fixes: 41691b8862e2 ("clk: bcm2835: Add support for programming the audio domain clocks") Reviewed-by: Eric Anholt Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-bcm2835.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 6029313aa995..35ab89fe9d7b 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -1082,7 +1082,9 @@ static void bcm2835_pll_divider_off(struct clk_hw *hw) cprman_write(cprman, data->cm_reg, (cprman_read(cprman, data->cm_reg) & ~data->load_mask) | data->hold_mask); - cprman_write(cprman, data->a2w_reg, A2W_PLL_CHANNEL_DISABLE); + cprman_write(cprman, data->a2w_reg, + cprman_read(cprman, data->a2w_reg) | + A2W_PLL_CHANNEL_DISABLE); spin_unlock(&cprman->regs_lock); } From e692edec93c10b480a7cbd3c76de90da93b82075 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 22 Nov 2016 19:22:44 +0200 Subject: [PATCH 149/199] thermal: hwmon: Properly report critical temperature in sysfs commit f37fabb8643eaf8e3b613333a72f683770c85eca upstream. In the critical sysfs entry the thermal hwmon was returning wrong temperature to the user-space. It was reporting the temperature of the first trip point instead of the temperature of critical trip point. For example: /sys/class/hwmon/hwmon0/temp1_crit:50000 /sys/class/thermal/thermal_zone0/trip_point_0_temp:50000 /sys/class/thermal/thermal_zone0/trip_point_0_type:active /sys/class/thermal/thermal_zone0/trip_point_3_temp:120000 /sys/class/thermal/thermal_zone0/trip_point_3_type:critical Since commit e68b16abd91d ("thermal: add hwmon sysfs I/F") the driver have been registering a sysfs entry if get_crit_temp() callback was provided. However when accessed, it was calling get_trip_temp() instead of the get_crit_temp(). Fixes: e68b16abd91d ("thermal: add hwmon sysfs I/F") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Zhang Rui Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 06fd2ed9ef9d..705b0cafedbb 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -98,7 +98,7 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) int temperature; int ret; - ret = tz->ops->get_trip_temp(tz, 0, &temperature); + ret = tz->ops->get_crit_temp(tz, &temperature); if (ret) return ret; From 5283a7bedbcbdfa654c8a4d7fb314d734a56e544 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 14 Nov 2016 20:16:21 +0000 Subject: [PATCH 150/199] staging: comedi: ni_mio_common: fix M Series ni_ai_insn_read() data mask commit 655c4d442d1213b617926cc6d54e2a9a793fb46b upstream. For NI M Series cards, the Comedi `insn_read` handler for the AI subdevice is broken due to ANDing the value read from the AI FIFO data register with an incorrect mask. The incorrect mask clears all but the most significant bit of the sample data. It should preserve all the sample data bits. Correct it. Fixes: 817144ae7fda ("staging: comedi: ni_mio_common: remove unnecessary use of 'board->adbits'") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 35ab4a9ef95d..a575772efeb1 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -1929,7 +1929,7 @@ static int ni_ai_insn_read(struct comedi_device *dev, unsigned int *data) { struct ni_private *devpriv = dev->private; - unsigned int mask = (s->maxdata + 1) >> 1; + unsigned int mask = s->maxdata; int i, n; unsigned signbits; unsigned int d; From 01b6089b5622bbcb2c77ce655a218fb7b8ccd77d Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 14 Nov 2016 20:16:22 +0000 Subject: [PATCH 151/199] staging: comedi: ni_mio_common: fix E series ni_ai_insn_read() data commit 857a661020a2de3a0304edf33ad656abee100891 upstream. Commit 0557344e2149 ("staging: comedi: ni_mio_common: fix local var for 32-bit read") changed the type of local variable `d` from `unsigned short` to `unsigned int` to fix a bug introduced in commit 9c340ac934db ("staging: comedi: ni_stc.h: add read/write callbacks to struct ni_private") when reading AI data for NI PCI-6110 and PCI-6111 cards. Unfortunately, other parts of the function rely on the variable being `unsigned short` when an offset value in local variable `signbits` is added to `d` before writing the value to the `data` array: d += signbits; data[n] = d; The `signbits` variable will be non-zero in bipolar mode, and is used to convert the hardware's 2's complement, 16-bit numbers to Comedi's straight binary sample format (with 0 representing the most negative voltage). This breaks because `d` is now 32 bits wide instead of 16 bits wide, so after the addition of `signbits`, `data[n]` ends up being set to values above 65536 for negative voltages. This affects all supported "E series" cards except PCI-6143 (and PXI-6143). Fix it by ANDing the value written to the `data[n]` with the mask 0xffff. Fixes: 0557344e2149 ("staging: comedi: ni_mio_common: fix local var for 32-bit read") Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_mio_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index a575772efeb1..c975f6e8be49 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -1972,7 +1972,7 @@ static int ni_ai_insn_read(struct comedi_device *dev, return -ETIME; } d += signbits; - data[n] = d; + data[n] = d & 0xffff; } } else if (devpriv->is_6143) { for (n = 0; n < insn->n; n++) { @@ -2017,8 +2017,8 @@ static int ni_ai_insn_read(struct comedi_device *dev, data[n] = dl; } else { d = ni_readw(dev, NI_E_AI_FIFO_DATA_REG); - d += signbits; /* subtle: needs to be short addition */ - data[n] = d; + d += signbits; + data[n] = d & 0xffff; } } } From 2c2375e40d28b3d0fd961448fd12963f66b4b452 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 29 Nov 2016 15:32:15 +0100 Subject: [PATCH 152/199] ACPI / video: Add force_native quirk for Dell XPS 17 L702X commit 350fa038c31b056fc509624efb66348ac2c1e3d0 upstream. The Dell XPS 17 L702X has a non-working acpi_video0 backlight interface and an intel_backlight interface which works fine. Add a force_native quirk for it so that the non-working acpi_video0 interface does not get registered. Note that there also is an issue with the brightnesskeys on this laptop, they do not generate key-press events in anyway. That is not solved by this patch. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1123661 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 80e55cb0827b..5e5e1ee7e467 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -271,6 +271,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro12,1"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1123661 */ + .callback = video_detect_force_native, + .ident = "Dell XPS 17 L702X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L702X"), + }, + }, { }, }; From 69e236e70ead32e8cc2b84ce075dbe1319ca166b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 29 Nov 2016 15:32:16 +0100 Subject: [PATCH 153/199] ACPI / video: Add force_native quirk for HP Pavilion dv6 commit 6276e53fa8c06a3a5cf7b95b77b079966de9ad66 upstream. The HP Pavilion dv6 has a non-working acpi_video0 backlight interface and an intel_backlight interface which works fine. Add a force_native quirk for it so that the non-working acpi_video0 interface does not get registered. Note that there are quite a few HP Pavilion dv6 variants, some woth ATI and some with NVIDIA hybrid gfx, both seem to need this quirk to have working backlight control. There are also some versions with only Intel integrated gfx, these may not need this quirk, but it should not hurt there. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1204476 Link: https://bugs.launchpad.net/ubuntu/+source/linux-lts-trusty/+bug/1416940 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 5e5e1ee7e467..b48ecbfc4498 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -280,6 +280,17 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L702X"), }, }, + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1204476 */ + /* https://bugs.launchpad.net/ubuntu/+source/linux-lts-trusty/+bug/1416940 */ + .callback = video_detect_force_native, + .ident = "HP Pavilion dv6", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv6 Notebook PC"), + }, + }, + { }, }; From a163451c80b028d97b5224c74517dbf67ac45f2c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 19 Oct 2016 12:15:52 +1000 Subject: [PATCH 154/199] drm/nouveau/kms: lvds panel strap moved again on maxwell commit 768e847759d551c96e129e194588dbfb11a1d576 upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_bios.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 4dca65a63b92..af224fafa21f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -333,6 +333,9 @@ get_fp_strap(struct drm_device *dev, struct nvbios *bios) if (bios->major_version < 5 && bios->data[0x48] & 0x4) return NVReadVgaCrtc5758(dev, 0, 0xf) & 0xf; + if (drm->device.info.family >= NV_DEVICE_INFO_V0_MAXWELL) + return nvif_rd32(device, 0x001800) & 0x0000000f; + else if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 24) & 0xf; else From 3a2990e67b056d2f9fe8053569cc9752b000e8fa Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 14 Oct 2016 13:16:36 +1000 Subject: [PATCH 155/199] drm/nouveau/bios: require checksum to match for fast acpi shadow method commit 5dc7f4aa9d84ea94b54a9bfcef095f0289f1ebda upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 7 +++++-- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h index 212800ecdce9..7d1d3c6b4b72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h @@ -12,6 +12,7 @@ struct nvbios_source { bool rw; bool ignore_checksum; bool no_pcir; + bool require_checksum; }; int nvbios_extend(struct nvkm_bios *, u32 length); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index b2557e87afdd..7deb81b6dbac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -86,9 +86,12 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvbios_checksum(&bios->data[image.base], image.size)) { nvkm_debug(subdev, "%08x: checksum failed\n", image.base); - if (mthd->func->rw) + if (!mthd->func->require_checksum) { + if (mthd->func->rw) + score += 1; score += 1; - score += 1; + } else + return 0; } else { score += 3; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c index 8fecb5ff22a0..06572f8ce914 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c @@ -99,6 +99,7 @@ nvbios_acpi_fast = { .init = acpi_init, .read = acpi_read_fast, .rw = false, + .require_checksum = true, }; const struct nvbios_source From d32d4b3d7c9f6164170c0277740fd45bcd364bf6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 13 Dec 2016 09:29:55 +1000 Subject: [PATCH 156/199] drm/nouveau/ltc: protect clearing of comptags with mutex commit f4e65efc88b64c1dbca275d42a188edccedb56c6 upstream. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c index 85b1464c0194..587c52f08d3f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -47,8 +47,10 @@ nvkm_ltc_tags_clear(struct nvkm_ltc *ltc, u32 first, u32 count) BUG_ON((first > limit) || (limit >= ltc->num_tags)); + mutex_lock(<c->subdev.mutex); ltc->func->cbc_clear(ltc, first, limit); ltc->func->cbc_wait(ltc); + mutex_unlock(<c->subdev.mutex); } int From adea4a7b0f2b5534712cf69b699dfc071447269f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 5 Nov 2016 14:33:14 +1000 Subject: [PATCH 157/199] drm/nouveau/fifo/gf100-: protect channel preempt with subdev mutex commit b27add13f500469127afdf011dbcc9c649e16e54 upstream. This avoids an issue that occurs when we're attempting to preempt multiple channels simultaneously. HW seems to ignore preempt requests while it's still processing a previous one, which, well, makes sense. Fixes random "fifo: SCHED_ERROR 0d []" + GPCCS page faults during parallel piglit runs on (at least) GM107. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c | 9 ++++++--- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c | 8 +++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c index e7cbc139c1d4..89976ff4b305 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c @@ -59,6 +59,7 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, struct nvkm_gpuobj *inst = chan->base.inst; int ret = 0; + mutex_lock(&subdev->mutex); nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x002634) == chan->base.chid) @@ -66,10 +67,12 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, ) < 0) { nvkm_error(subdev, "channel %d [%s] kick timeout\n", chan->base.chid, chan->base.object.client->name); - ret = -EBUSY; - if (suspend) - return ret; + ret = -ETIMEDOUT; } + mutex_unlock(&subdev->mutex); + + if (ret && suspend) + return ret; if (offset) { nvkm_kmap(inst); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index 0b817540a9e4..aa1692e5669f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -39,7 +39,9 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; struct nvkm_client *client = chan->base.object.client; + int ret = 0; + mutex_lock(&subdev->mutex); nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (!(nvkm_rd32(device, 0x002634) & 0x00100000)) @@ -47,10 +49,10 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) ) < 0) { nvkm_error(subdev, "channel %d [%s] kick timeout\n", chan->base.chid, client->name); - return -EBUSY; + ret = -ETIMEDOUT; } - - return 0; + mutex_unlock(&subdev->mutex); + return ret; } static u32 From cb8d63d885e978f72bb4b3204cde2ac04d4ac42c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 3 Nov 2016 16:37:33 +1000 Subject: [PATCH 158/199] drm/nouveau/i2c/gk110b,gm10x: use the correct implementation commit 5b3800a6b763874e4a23702fb9628d3bd3315ce9 upstream. DPAUX registers moved on Kepler, these chipsets were still using the Fermi implementation for some reason. This fixes detection of hotplug/sink IRQs on DP connectors. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index bbc9824af6e0..ece9f4102c0e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1833,7 +1833,7 @@ nvf1_chipset = { .fb = gk104_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .imem = nv50_instmem_new, .ltc = gk104_ltc_new, @@ -1941,7 +1941,7 @@ nv117_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gf119_i2c_new, + .i2c = gk104_i2c_new, .ibus = gk104_ibus_new, .imem = nv50_instmem_new, .ltc = gm107_ltc_new, From deac395e0ba87af72167b22f66892cde85968b8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 13:03:23 +0900 Subject: [PATCH 159/199] drm/radeon: Also call cursor_move_locked when the cursor size changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dcab0fa64e300afa18f39cd98d05e0950f652adf upstream. The cursor size also affects the register programming. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_cursor.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index afaf346bd50e..5dc5bfb46c1e 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -297,12 +297,11 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } - radeon_crtc->cursor_width = width; - radeon_crtc->cursor_height = height; - radeon_lock_cursor(crtc, true); - if (hot_x != radeon_crtc->cursor_hot_x || + if (width != radeon_crtc->cursor_width || + height != radeon_crtc->cursor_height || + hot_x != radeon_crtc->cursor_hot_x || hot_y != radeon_crtc->cursor_hot_y) { int x, y; @@ -311,6 +310,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_cursor_move_locked(crtc, x, y); + radeon_crtc->cursor_width = width; + radeon_crtc->cursor_height = height; radeon_crtc->cursor_hot_x = hot_x; radeon_crtc->cursor_hot_y = hot_y; } From 62a272498dbe3ac3945bdd5bbe0428188b71ffa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 27 Oct 2016 14:54:31 +0900 Subject: [PATCH 160/199] drm/radeon: Hide the HW cursor while it's out of bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b16cf7785a4200b1bddf4f70c9dda2efc49e278 upstream. Fixes hangs in that case under some circumstances. v2: * Only use non-0 x/yorigin if the cursor is (partially) outside of the top/left edge of the total surface with AVIVO/DCE Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1000433 Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher (v1) Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_cursor.c | 60 ++++++++++++++++++-------- drivers/gpu/drm/radeon/radeon_mode.h | 1 + 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 5dc5bfb46c1e..b5b9cb911111 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -90,6 +90,9 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; + if (radeon_crtc->cursor_out_of_bounds) + return; + if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, upper_32_bits(radeon_crtc->cursor_addr)); @@ -148,16 +151,17 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) x += crtc->x; y += crtc->y; } - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - if (x < 0) { + if (x < 0) xorigin = min(-x, radeon_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { + if (y < 0) yorigin = min(-y, radeon_crtc->max_cursor_height - 1); - y = 0; + + if (!ASIC_IS_AVIVO(rdev)) { + x += crtc->x; + y += crtc->y; } + DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); /* fixed on DCE6 and newer */ if (ASIC_IS_AVIVO(rdev) && !ASIC_IS_DCE6(rdev)) { @@ -180,27 +184,31 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) if (i > 1) { int cursor_end, frame_end; - cursor_end = x - xorigin + w; + cursor_end = x + w; frame_end = crtc->x + crtc->mode.crtc_hdisplay; if (cursor_end >= frame_end) { w = w - (cursor_end - frame_end); if (!(frame_end & 0x7f)) w--; - } else { - if (!(cursor_end & 0x7f)) - w--; + } else if (cursor_end <= 0) { + goto out_of_bounds; + } else if (!(cursor_end & 0x7f)) { + w--; } if (w <= 0) { - w = 1; - cursor_end = x - xorigin + w; - if (!(cursor_end & 0x7f)) { - x--; - WARN_ON_ONCE(x < 0); - } + goto out_of_bounds; } } } + if (x <= (crtc->x - w) || y <= (crtc->y - radeon_crtc->cursor_height) || + x >= (crtc->x + crtc->mode.crtc_hdisplay) || + y >= (crtc->y + crtc->mode.crtc_vdisplay)) + goto out_of_bounds; + + x += xorigin; + y += yorigin; + if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y); WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin); @@ -212,6 +220,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) WREG32(AVIVO_D1CUR_SIZE + radeon_crtc->crtc_offset, ((w - 1) << 16) | (radeon_crtc->cursor_height - 1)); } else { + x -= crtc->x; + y -= crtc->y; + if (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN) y *= 2; @@ -232,6 +243,19 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) radeon_crtc->cursor_x = x; radeon_crtc->cursor_y = y; + if (radeon_crtc->cursor_out_of_bounds) { + radeon_crtc->cursor_out_of_bounds = false; + if (radeon_crtc->cursor_bo) + radeon_show_cursor(crtc); + } + + return 0; + + out_of_bounds: + if (!radeon_crtc->cursor_out_of_bounds) { + radeon_hide_cursor(crtc); + radeon_crtc->cursor_out_of_bounds = true; + } return 0; } @@ -308,12 +332,12 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, x = radeon_crtc->cursor_x + radeon_crtc->cursor_hot_x - hot_x; y = radeon_crtc->cursor_y + radeon_crtc->cursor_hot_y - hot_y; - radeon_cursor_move_locked(crtc, x, y); - radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; radeon_crtc->cursor_hot_x = hot_x; radeon_crtc->cursor_hot_y = hot_y; + + radeon_cursor_move_locked(crtc, x, y); } radeon_show_cursor(crtc); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 7a0666ac4e23..d8f8be608c19 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -330,6 +330,7 @@ struct radeon_crtc { u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; + bool cursor_out_of_bounds; uint32_t crtc_offset; struct drm_gem_object *cursor_bo; uint64_t cursor_addr; From 0e0b70f581fc8aeaf7ca7907daceb536a56c21f2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Dec 2016 00:21:48 -0500 Subject: [PATCH 161/199] drm/radeon: add additional pci revision to dpm workaround commit 8729675c00a8d13cb2094d617d70a4a4da7d83c5 upstream. New variant. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 10191b935937..eb2a2a49974f 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3026,6 +3026,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->revision == 0x80) || (rdev->pdev->revision == 0x81) || (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || (rdev->pdev->device == 0x6604) || (rdev->pdev->device == 0x6605)) { max_sclk = 75000; From 970dc8cdec9c4c8c91590071123275df2e80d1a2 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Tue, 1 Nov 2016 15:43:15 +0100 Subject: [PATCH 162/199] drm/gma500: Add compat ioctl commit 0a97c81a9717431e6c57ea845b59c3c345edce67 upstream. Hook up drm_compat_ioctl to support 32-bit userspace on 64-bit kernels. It turns out that N2600 and N2800 comes with 64-bit enabled. We previously assumed there where no such systems out there. Signed-off-by: Patrik Jakobsson Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/20161101144315.2955-1-patrik.r.jakobsson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index 92e7e5795398..db98ab5cde3d 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -484,6 +484,9 @@ static const struct file_operations psb_gem_fops = { .open = drm_open, .release = drm_release, .unlocked_ioctl = psb_unlocked_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, From 1f9c91a37542b24374c160c0139b8943183124bb Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 15 Dec 2016 16:12:41 +1100 Subject: [PATCH 163/199] drivers/gpu/drm/ast: Fix infinite loop if read fails commit 298360af3dab45659810fdc51aba0c9f4097e4f6 upstream. ast_get_dram_info() configures a window in order to access BMC memory. A BMC register can be configured to disallow this, and if so, causes an infinite loop in the ast driver which renders the system unusable. Fix this by erroring out if an error is detected. On powerpc systems with EEH, this leads to the device being fenced and the system continuing to operate. Signed-off-by: Russell Currey Reviewed-by: Joel Stanley Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161215051241.20815-1-ruscur@russell.cc Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index e0b4586a26fd..9b8f0b975ca6 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -223,7 +223,8 @@ static int ast_get_dram_info(struct drm_device *dev) ast_write32(ast, 0x10000, 0xfc600309); do { - ; + if (pci_channel_offline(dev->pdev)) + return -EIO; } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); @@ -429,7 +430,9 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags) ast_detect_chip(dev, &need_post); if (ast->chip != AST1180) { - ast_get_dram_info(dev); + ret = ast_get_dram_info(dev); + if (ret) + goto out_free; ast->vram_size = ast_get_vram_info(dev); DRM_INFO("dram %d %d %d %08x\n", ast->mclk, ast->dram_type, ast->dram_bus_width, ast->vram_size); } From abb78811e242b5692fb4d8a53014b27831bb9173 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 24 Nov 2016 13:34:02 +0200 Subject: [PATCH 164/199] mei: request async autosuspend at the end of enumeration commit d5f8e166c25750adc147b0adf64a62a91653438a upstream. pm_runtime_autosuspend can take synchronous or asynchronous paths, Because we are calling pm_runtime_mark_last_busy just before this most of the cases it takes the asynchronous way. However, when the FW or driver resets during already running runtime suspend, the call will result in calling to the driver's rpm callback and results in a deadlock on device_lock. The simplest fix is to replace pm_runtime_autosuspend with asynchronous pm_request_autosuspend. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 958af84884b5..2ff39fbc70d1 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -698,7 +698,7 @@ void mei_host_client_init(struct work_struct *work) pm_runtime_mark_last_busy(dev->dev); dev_dbg(dev->dev, "rpm: autosuspend\n"); - pm_runtime_autosuspend(dev->dev); + pm_request_autosuspend(dev->dev); } /** From f4f02a856a92e1a2990a8d0f55d08a78a29c3cc0 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 1 Dec 2016 09:18:28 +0100 Subject: [PATCH 165/199] block: protect iterate_bdevs() against concurrent close commit af309226db916e2c6e08d3eba3fa5c34225200c4 upstream. If a block device is closed while iterate_bdevs() is handling it, the following NULL pointer dereference occurs because bdev->b_disk is NULL in bdev_get_queue(), which is called from blk_get_backing_dev_info() (in turn called by the mapping_cap_writeback_dirty() call in __filemap_fdatawrite_range()): BUG: unable to handle kernel NULL pointer dereference at 0000000000000508 IP: [] blk_get_backing_dev_info+0x10/0x20 PGD 9e62067 PUD 9ee8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 1 PID: 2422 Comm: sync Not tainted 4.5.0-rc7+ #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) task: ffff880009f4d700 ti: ffff880009f5c000 task.ti: ffff880009f5c000 RIP: 0010:[] [] blk_get_backing_dev_info+0x10/0x20 RSP: 0018:ffff880009f5fe68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88000ec17a38 RCX: ffffffff81a4e940 RDX: 7fffffffffffffff RSI: 0000000000000000 RDI: ffff88000ec176c0 RBP: ffff880009f5fe68 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88000ec17860 R13: ffffffff811b25c0 R14: ffff88000ec178e0 R15: ffff88000ec17a38 FS: 00007faee505d700(0000) GS:ffff88000fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000508 CR3: 0000000009e8a000 CR4: 00000000000006e0 Stack: ffff880009f5feb8 ffffffff8112e7f5 0000000000000000 7fffffffffffffff 0000000000000000 0000000000000000 7fffffffffffffff 0000000000000001 ffff88000ec178e0 ffff88000ec17860 ffff880009f5fec8 ffffffff8112e81f Call Trace: [] __filemap_fdatawrite_range+0x85/0x90 [] filemap_fdatawrite+0x1f/0x30 [] fdatawrite_one_bdev+0x16/0x20 [] iterate_bdevs+0xf2/0x130 [] sys_sync+0x63/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x76 Code: 0f 1f 44 00 00 48 8b 87 f0 00 00 00 55 48 89 e5 <48> 8b 80 08 05 00 00 5d RIP [] blk_get_backing_dev_info+0x10/0x20 RSP CR2: 0000000000000508 ---[ end trace 2487336ceb3de62d ]--- The crash is easily reproducible by running the following command, if an msleep(100) is inserted before the call to func() in iterate_devs(): while :; do head -c1 /dev/nullb0; done > /dev/null & while :; do sync; done Fix it by holding the bd_mutex across the func() call and only calling func() if the bdev is opened. Fixes: 5c0d6b60a0ba ("vfs: Create function for iterating over block devices") Reported-and-tested-by: Wei Fang Signed-off-by: Rabin Vincent Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index f10dbac851a1..198aea66fe71 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1806,6 +1806,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; + struct block_device *bdev; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || @@ -1826,8 +1827,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) */ iput(old_inode); old_inode = inode; + bdev = I_BDEV(inode); - func(I_BDEV(inode), arg); + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_openers) + func(bdev, arg); + mutex_unlock(&bdev->bd_mutex); spin_lock(&blockdev_superblock->s_inode_list_lock); } From edf1169bbbaaebba9e0d08cf97004b6c411e0041 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 16 Nov 2016 00:55:57 +0100 Subject: [PATCH 166/199] vt: fix Scroll Lock LED trigger name commit 31b5929d533f5183972cf57a7844b456ed996f3c upstream. There is a disagreement between drivers/tty/vt/keyboard.c and drivers/input/input-leds.c with regard to what is a Scroll Lock LED trigger name: input calls it "kbd-scrolllock", but vt calls it "kbd-scrollock" (two l's). This prevents Scroll Lock LED trigger from binding to this LED by default. Since it is a scroLL Lock LED, this interface was introduced only about a year ago and in an Internet search people seem to reference this trigger only to set it to this LED let's simply rename it to "kbd-scrolllock". Also, it looks like this was supposed to be changed before this code was merged: https://lkml.org/lkml/2015/6/9/697 but it was done only on the input side. Signed-off-by: Maciej S. Szmigiero Acked-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 41987a55a538..988c564b61a8 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -982,7 +982,7 @@ static void kbd_led_trigger_activate(struct led_classdev *cdev) KBD_LED_TRIGGER((_led_bit) + 8, _name) static struct kbd_led_trigger kbd_led_triggers[] = { - KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"), + KBD_LED_TRIGGER(VC_SCROLLOCK, "kbd-scrolllock"), KBD_LED_TRIGGER(VC_NUMLOCK, "kbd-numlock"), KBD_LED_TRIGGER(VC_CAPSLOCK, "kbd-capslock"), KBD_LED_TRIGGER(VC_KANALOCK, "kbd-kanalock"), From 49ea06561154e7f9610fb212d55cc1eadc66f726 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 21 Oct 2016 06:33:29 -0700 Subject: [PATCH 167/199] scsi: megaraid_sas: For SRIOV enabled firmware, ensure VF driver waits for 30secs before reset commit 18e1c7f68a5814442abad849abe6eacbf02ffd7c upstream. For SRIOV enabled firmware, if there is a OCR(online controller reset) possibility driver set the convert flag to 1, which is not happening if there are outstanding commands even after 180 seconds. As driver does not set convert flag to 1 and still making the OCR to run, VF(Virtual function) driver is directly writing on to the register instead of waiting for 30 seconds. Setting convert flag to 1 will cause VF driver will wait for 30 secs before going for reset. Signed-off-by: Kiran Kumar Kasturi Signed-off-by: Sumit Saxena Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 021b994fdae8..d3c4187c1724 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2648,6 +2648,7 @@ int megasas_wait_for_outstanding_fusion(struct megasas_instance *instance, dev_err(&instance->pdev->dev, "pending commands remain after waiting, " "will reset adapter scsi%d.\n", instance->host->host_no); + *convert = 1; retval = 1; } out: From bccd78746f8838db63ddd7fea65e4f76ec0434d5 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 21 Oct 2016 06:33:35 -0700 Subject: [PATCH 168/199] scsi: megaraid_sas: Do not set MPI2_TYPE_CUDA for JBOD FP path for FW which does not support JBOD sequence map commit d5573584429254a14708cf8375c47092b5edaf2c upstream. Signed-off-by: Sumit Saxena Reviewed-by: Hannes Reinecke Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index d3c4187c1724..96007633ad39 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1856,6 +1856,8 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, io_request->DevHandle = pd_sync->seq[pd_index].devHandle; pRAID_Context->regLockFlags |= (MR_RL_FLAGS_SEQ_NUM_ENABLE|MR_RL_FLAGS_GRANT_DESTINATION_CUDA); + pRAID_Context->Type = MPI2_TYPE_CUDA; + pRAID_Context->nseg = 0x1; } else if (fusion->fast_path_io) { pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id); pRAID_Context->configSeqNum = 0; @@ -1891,12 +1893,10 @@ megasas_build_syspd_fusion(struct megasas_instance *instance, pRAID_Context->timeoutValue = cpu_to_le16((os_timeout_value > timeout_limit) ? timeout_limit : os_timeout_value); - if (fusion->adapter_type == INVADER_SERIES) { - pRAID_Context->Type = MPI2_TYPE_CUDA; - pRAID_Context->nseg = 0x1; + if (fusion->adapter_type == INVADER_SERIES) io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH); - } + cmd->request_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT); From 1cc0f9488b913a798a53eb7e4a73211c4d64a24c Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Fri, 9 Dec 2016 17:16:31 +0100 Subject: [PATCH 169/199] scsi: zfcp: fix use-after-"free" in FC ingress path after TMF commit dac37e15b7d511e026a9313c8c46794c144103cd upstream. When SCSI EH invokes zFCP's callbacks for eh_device_reset_handler() and eh_target_reset_handler(), it expects us to relent the ownership over the given scsi_cmnd and all other scsi_cmnds within the same scope - LUN or target - when returning with SUCCESS from the callback ('release' them). SCSI EH can then reuse those commands. We did not follow this rule to release commands upon SUCCESS; and if later a reply arrived for one of those supposed to be released commands, we would still make use of the scsi_cmnd in our ingress tasklet. This will at least result in undefined behavior or a kernel panic because of a wrong kernel pointer dereference. To fix this, we NULLify all pointers to scsi_cmnds (struct zfcp_fsf_req *)->data in the matching scope if a TMF was successful. This is done under the locks (struct zfcp_adapter *)->abort_lock and (struct zfcp_reqlist *)->lock to prevent the requests from being removed from the request-hashtable, and the ingress tasklet from making use of the scsi_cmnd-pointer in zfcp_fsf_fcp_cmnd_handler(). For cases where a reply arrives during SCSI EH, but before we get a chance to NULLify the pointer - but before we return from the callback -, we assume that the code is protected from races via the CAS operation in blk_complete_request() that is called in scsi_done(). The following stacktrace shows an example for a crash resulting from the previous behavior: Unable to handle kernel pointer dereference at virtual kernel address fffffee17a672000 Oops: 0038 [#1] SMP CPU: 2 PID: 0 Comm: swapper/2 Not tainted task: 00000003f7ff5be0 ti: 00000003f3d38000 task.ti: 00000003f3d38000 Krnl PSW : 0404d00180000000 00000000001156b0 (smp_vcpu_scheduled+0x18/0x40) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 EA:3 Krnl GPRS: 000000200000007e 0000000000000000 fffffee17a671fd8 0000000300000015 ffffffff80000000 00000000005dfde8 07000003f7f80e00 000000004fa4e800 000000036ce8d8f8 000000036ce8d9c0 00000003ece8fe00 ffffffff969c9e93 00000003fffffffd 000000036ce8da10 00000000003bf134 00000003f3b07918 Krnl Code: 00000000001156a2: a7190000 lghi %r1,0 00000000001156a6: a7380015 lhi %r3,21 #00000000001156aa: e32050000008 ag %r2,0(%r5) >00000000001156b0: 482022b0 lh %r2,688(%r2) 00000000001156b4: ae123000 sigp %r1,%r2,0(%r3) 00000000001156b8: b2220020 ipm %r2 00000000001156bc: 8820001c srl %r2,28 00000000001156c0: c02700000001 xilf %r2,1 Call Trace: ([<0000000000000000>] 0x0) [<000003ff807bdb8e>] zfcp_fsf_fcp_cmnd_handler+0x3de/0x490 [zfcp] [<000003ff807be30a>] zfcp_fsf_req_complete+0x252/0x800 [zfcp] [<000003ff807c0a48>] zfcp_fsf_reqid_check+0xe8/0x190 [zfcp] [<000003ff807c194e>] zfcp_qdio_int_resp+0x66/0x188 [zfcp] [<000003ff80440c64>] qdio_kick_handler+0xdc/0x310 [qdio] [<000003ff804463d0>] __tiqdio_inbound_processing+0xf8/0xcd8 [qdio] [<0000000000141fd4>] tasklet_action+0x9c/0x170 [<0000000000141550>] __do_softirq+0xe8/0x258 [<000000000010ce0a>] do_softirq+0xba/0xc0 [<000000000014187c>] irq_exit+0xc4/0xe8 [<000000000046b526>] do_IRQ+0x146/0x1d8 [<00000000005d6a3c>] io_return+0x0/0x8 [<00000000005d6422>] vtime_stop_cpu+0x4a/0xa0 ([<0000000000000000>] 0x0) [<0000000000103d8a>] arch_cpu_idle+0xa2/0xb0 [<0000000000197f94>] cpu_startup_entry+0x13c/0x1f8 [<0000000000114782>] smp_start_secondary+0xda/0xe8 [<00000000005d6efe>] restart_int_handler+0x56/0x6c [<0000000000000000>] 0x0 Last Breaking-Event-Address: [<00000000003bf12e>] arch_spin_lock_wait+0x56/0xb0 Suggested-by: Steffen Maier Signed-off-by: Benjamin Block Fixes: ea127f9754 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git) Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.h | 11 ++++++ drivers/s390/scsi/zfcp_reqlist.h | 30 ++++++++++++++++- drivers/s390/scsi/zfcp_scsi.c | 57 ++++++++++++++++++++++++++++++-- 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 36d07584271d..2d06b5d2c05b 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -388,4 +388,15 @@ void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); } +/** + * zfcp_dbf_scsi_nullcmnd() - trace NULLify of SCSI command in dev/tgt-reset. + * @scmnd: SCSI command that was NULLified. + * @fsf_req: request that owned @scmnd. + */ +static inline void zfcp_dbf_scsi_nullcmnd(struct scsi_cmnd *scmnd, + struct zfcp_fsf_req *fsf_req) +{ + _zfcp_dbf_scsi("scfc__1", 3, scmnd, fsf_req); +} + #endif /* ZFCP_DBF_H */ diff --git a/drivers/s390/scsi/zfcp_reqlist.h b/drivers/s390/scsi/zfcp_reqlist.h index 7c2c6194dfca..703fce59befe 100644 --- a/drivers/s390/scsi/zfcp_reqlist.h +++ b/drivers/s390/scsi/zfcp_reqlist.h @@ -4,7 +4,7 @@ * Data structure and helper functions for tracking pending FSF * requests. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2016 */ #ifndef ZFCP_REQLIST_H @@ -180,4 +180,32 @@ static inline void zfcp_reqlist_move(struct zfcp_reqlist *rl, spin_unlock_irqrestore(&rl->lock, flags); } +/** + * zfcp_reqlist_apply_for_all() - apply a function to every request. + * @rl: the requestlist that contains the target requests. + * @f: the function to apply to each request; the first parameter of the + * function will be the target-request; the second parameter is the same + * pointer as given with the argument @data. + * @data: freely chosen argument; passed through to @f as second parameter. + * + * Uses :c:macro:`list_for_each_entry` to iterate over the lists in the hash- + * table (not a 'safe' variant, so don't modify the list). + * + * Holds @rl->lock over the entire request-iteration. + */ +static inline void +zfcp_reqlist_apply_for_all(struct zfcp_reqlist *rl, + void (*f)(struct zfcp_fsf_req *, void *), void *data) +{ + struct zfcp_fsf_req *req; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&rl->lock, flags); + for (i = 0; i < ZFCP_REQ_LIST_BUCKETS; i++) + list_for_each_entry(req, &rl->buckets[i], list) + f(req, data); + spin_unlock_irqrestore(&rl->lock, flags); +} + #endif /* ZFCP_REQLIST_H */ diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 9069f98a1817..11cd18c134c1 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -3,7 +3,7 @@ * * Interface to Linux SCSI midlayer. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -209,6 +209,57 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) return retval; } +struct zfcp_scsi_req_filter { + u8 tmf_scope; + u32 lun_handle; + u32 port_handle; +}; + +static void zfcp_scsi_forget_cmnd(struct zfcp_fsf_req *old_req, void *data) +{ + struct zfcp_scsi_req_filter *filter = + (struct zfcp_scsi_req_filter *)data; + + /* already aborted - prevent side-effects - or not a SCSI command */ + if (old_req->data == NULL || old_req->fsf_command != FSF_QTCB_FCP_CMND) + return; + + /* (tmf_scope == FCP_TMF_TGT_RESET || tmf_scope == FCP_TMF_LUN_RESET) */ + if (old_req->qtcb->header.port_handle != filter->port_handle) + return; + + if (filter->tmf_scope == FCP_TMF_LUN_RESET && + old_req->qtcb->header.lun_handle != filter->lun_handle) + return; + + zfcp_dbf_scsi_nullcmnd((struct scsi_cmnd *)old_req->data, old_req); + old_req->data = NULL; +} + +static void zfcp_scsi_forget_cmnds(struct zfcp_scsi_dev *zsdev, u8 tm_flags) +{ + struct zfcp_adapter *adapter = zsdev->port->adapter; + struct zfcp_scsi_req_filter filter = { + .tmf_scope = FCP_TMF_TGT_RESET, + .port_handle = zsdev->port->handle, + }; + unsigned long flags; + + if (tm_flags == FCP_TMF_LUN_RESET) { + filter.tmf_scope = FCP_TMF_LUN_RESET; + filter.lun_handle = zsdev->lun_handle; + } + + /* + * abort_lock secures against other processings - in the abort-function + * and normal cmnd-handler - of (struct zfcp_fsf_req *)->data + */ + write_lock_irqsave(&adapter->abort_lock, flags); + zfcp_reqlist_apply_for_all(adapter->req_list, zfcp_scsi_forget_cmnd, + &filter); + write_unlock_irqrestore(&adapter->abort_lock, flags); +} + static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(scpnt->device); @@ -241,8 +292,10 @@ static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); retval = FAILED; - } else + } else { zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); + zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); + } zfcp_fsf_req_free(fsf_req); return retval; From 3b3739dfa69cf5ada8e5a999990e0fb51f00e6b8 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 9 Dec 2016 17:16:32 +0100 Subject: [PATCH 170/199] scsi: zfcp: do not trace pure benign residual HBA responses at default level commit 56d23ed7adf3974f10e91b643bd230e9c65b5f79 upstream. Since quite a while, Linux issues enough SCSI commands per scsi_device which successfully return with FCP_RESID_UNDER, FSF_FCP_RSP_AVAILABLE, and SAM_STAT_GOOD. This floods the HBA trace area and we cannot see other and important HBA trace records long enough. Therefore, do not trace HBA response errors for pure benign residual under counts at the default trace level. This excludes benign residual under count combined with other validity bits set in FCP_RSP_IU, such as FCP_SNS_LEN_VAL. For all those other cases, we still do want to see both the HBA record and the corresponding SCSI record by default. Signed-off-by: Steffen Maier Fixes: a54ca0f62f95 ("[SCSI] zfcp: Redesign of the debug tracing for HBA records.") Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.h | 30 ++++++++++++++++++++++++++++-- drivers/s390/scsi/zfcp_fsf.h | 3 ++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h index 2d06b5d2c05b..db186d44cfaf 100644 --- a/drivers/s390/scsi/zfcp_dbf.h +++ b/drivers/s390/scsi/zfcp_dbf.h @@ -2,7 +2,7 @@ * zfcp device driver * debug feature declarations * - * Copyright IBM Corp. 2008, 2015 + * Copyright IBM Corp. 2008, 2016 */ #ifndef ZFCP_DBF_H @@ -283,6 +283,30 @@ struct zfcp_dbf { struct zfcp_dbf_scsi scsi_buf; }; +/** + * zfcp_dbf_hba_fsf_resp_suppress - true if we should not trace by default + * @req: request that has been completed + * + * Returns true if FCP response with only benign residual under count. + */ +static inline +bool zfcp_dbf_hba_fsf_resp_suppress(struct zfcp_fsf_req *req) +{ + struct fsf_qtcb *qtcb = req->qtcb; + u32 fsf_stat = qtcb->header.fsf_status; + struct fcp_resp *fcp_rsp; + u8 rsp_flags, fr_status; + + if (qtcb->prefix.qtcb_type != FSF_IO_COMMAND) + return false; /* not an FCP response */ + fcp_rsp = (struct fcp_resp *)&qtcb->bottom.io.fcp_rsp; + rsp_flags = fcp_rsp->fr_flags; + fr_status = fcp_rsp->fr_status; + return (fsf_stat == FSF_FCP_RSP_AVAILABLE) && + (rsp_flags == FCP_RESID_UNDER) && + (fr_status == SAM_STAT_GOOD); +} + static inline void zfcp_dbf_hba_fsf_resp(char *tag, int level, struct zfcp_fsf_req *req) { @@ -304,7 +328,9 @@ void zfcp_dbf_hba_fsf_response(struct zfcp_fsf_req *req) zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); } else if (qtcb->header.fsf_status != FSF_GOOD) { - zfcp_dbf_hba_fsf_resp("fs_ferr", 1, req); + zfcp_dbf_hba_fsf_resp("fs_ferr", + zfcp_dbf_hba_fsf_resp_suppress(req) + ? 5 : 1, req); } else if ((req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || (req->fsf_command == FSF_QTCB_OPEN_LUN)) { diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index be1c04b334c5..ea3c76ac0de1 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -3,7 +3,7 @@ * * Interface to the FSF support functions. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef FSF_H @@ -78,6 +78,7 @@ #define FSF_APP_TAG_CHECK_FAILURE 0x00000082 #define FSF_REF_TAG_CHECK_FAILURE 0x00000083 #define FSF_ADAPTER_STATUS_AVAILABLE 0x000000AD +#define FSF_FCP_RSP_AVAILABLE 0x000000AF #define FSF_UNKNOWN_COMMAND 0x000000E2 #define FSF_UNKNOWN_OP_SUBTYPE 0x000000E3 #define FSF_INVALID_COMMAND_OPTION 0x000000E5 From 565ae61d8995916da836b98f8c2f12a4192525fa Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 9 Dec 2016 17:16:33 +0100 Subject: [PATCH 171/199] scsi: zfcp: fix rport unblock race with LUN recovery commit 6f2ce1c6af37191640ee3ff6e8fc39ea10352f4c upstream. It is unavoidable that zfcp_scsi_queuecommand() has to finish requests with DID_IMM_RETRY (like fc_remote_port_chkready()) during the time window when zfcp detected an unavailable rport but fc_remote_port_delete(), which is asynchronous via zfcp_scsi_schedule_rport_block(), has not yet blocked the rport. However, for the case when the rport becomes available again, we should prevent unblocking the rport too early. In contrast to other FCP LLDDs, zfcp has to open each LUN with the FCP channel hardware before it can send I/O to a LUN. So if a port already has LUNs attached and we unblock the rport just after port recovery, recoveries of LUNs behind this port can still be pending which in turn force zfcp_scsi_queuecommand() to unnecessarily finish requests with DID_IMM_RETRY. This also opens a time window with unblocked rport (until the followup LUN reopen recovery has finished). If a scsi_cmnd timeout occurs during this time window fc_timed_out() cannot work as desired and such command would indeed time out and trigger scsi_eh. This prevents a clean and timely path failover. This should not happen if the path issue can be recovered on FC transport layer such as path issues involving RSCNs. Fix this by only calling zfcp_scsi_schedule_rport_register(), to asynchronously trigger fc_remote_port_add(), after all LUN recoveries as children of the rport have finished and no new recoveries of equal or higher order were triggered meanwhile. Finished intentionally includes any recovery result no matter if successful or failed (still unblock rport so other successful LUNs work). For simplicity, we check after each finished LUN recovery if there is another LUN recovery pending on the same port and then do nothing. We handle the special case of a successful recovery of a port without LUN children the same way without changing this case's semantics. For debugging we introduce 2 new trace records written if the rport unblock attempt was aborted due to still unfinished or freshly triggered recovery. The records are only written above the default trace level. Benjamin noticed the important special case of new recovery that can be triggered between having given up the erp_lock and before calling zfcp_erp_action_cleanup() within zfcp_erp_strategy(). We must avoid the following sequence: ERP thread rport_work other context ------------------------- -------------- -------------------------------- port is unblocked, rport still blocked, due to pending/running ERP action, so ((port->status & ...UNBLOCK) != 0) and (port->rport == NULL) unlock ERP zfcp_erp_action_cleanup() case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_erp_try_rport_unblock() ((status & ...UNBLOCK) != 0) [OLD!] zfcp_erp_port_reopen() lock ERP zfcp_erp_port_block() port->status clear ...UNBLOCK unlock ERP zfcp_scsi_schedule_rport_block() port->rport_task = RPORT_DEL queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task != RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_block() if (!port->rport) return zfcp_scsi_schedule_rport_register() port->rport_task = RPORT_ADD queue_work(rport_work) zfcp_scsi_rport_work() (port->rport_task == RPORT_ADD) port->rport_task = RPORT_NONE zfcp_scsi_rport_register() (port->rport == NULL) rport = fc_remote_port_add() port->rport = rport; Now the rport was erroneously unblocked while the zfcp_port is blocked. This is another situation we want to avoid due to scsi_eh potential. This state would at least remain until the new recovery from the other context finished successfully, or potentially forever if it failed. In order to close this race, we take the erp_lock inside zfcp_erp_try_rport_unblock() when checking the status of zfcp_port or LUN. With that, the possible corresponding rport state sequences would be: (unblock[ERP thread],block[other context]) if the ERP thread gets erp_lock first and still sees ((port->status & ...UNBLOCK) != 0), (block[other context],NOP[ERP thread]) if the ERP thread gets erp_lock after the other context has already cleard ...UNBLOCK from port->status. Since checking fields of struct erp_action is unsafe because they could have been overwritten (re-used for new recovery) meanwhile, we only check status of zfcp_port and LUN since these are only changed under erp_lock elsewhere. Regarding the check of the proper status flags (port or port_forced are similar to the shown adapter recovery): [zfcp_erp_adapter_shutdown()] zfcp_erp_adapter_reopen() zfcp_erp_adapter_block() * clear UNBLOCK ---------------------------------------+ zfcp_scsi_schedule_rports_block() | write_lock_irqsave(&adapter->erp_lock, flags);-------+ | zfcp_erp_action_enqueue() | | zfcp_erp_setup_act() | | * set ERP_INUSE -----------------------------------|--|--+ write_unlock_irqrestore(&adapter->erp_lock, flags);--+ | | .context-switch. | | zfcp_erp_thread() | | zfcp_erp_strategy() | | write_lock_irqsave(&adapter->erp_lock, flags);------+ | | ... | | | zfcp_erp_strategy_check_target() | | | zfcp_erp_strategy_check_adapter() | | | zfcp_erp_adapter_unblock() | | | * set UNBLOCK -----------------------------------|--+ | zfcp_erp_action_dequeue() | | * clear ERP_INUSE ---------------------------------|-----+ ... | write_unlock_irqrestore(&adapter->erp_lock, flags);-+ Hence, we should check for both UNBLOCK and ERP_INUSE because they are interleaved. Also we need to explicitly check ERP_FAILED for the link down case which currently does not clear the UNBLOCK flag in zfcp_fsf_link_down_info_eval(). Signed-off-by: Steffen Maier Fixes: 8830271c4819 ("[SCSI] zfcp: Dont fail SCSI commands when transitioning to blocked fc_rport") Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors") Fixes: 5f852be9e11d ("[SCSI] zfcp: Fix deadlock between zfcp ERP and SCSI") Fixes: 338151e06608 ("[SCSI] zfcp: make use of fc_remote_port_delete when target port is unavailable") Fixes: 3859f6a248cb ("[PATCH] zfcp: add rports to enable scsi_add_device to work again") Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/s390/scsi/zfcp_dbf.c | 17 ++++++++-- drivers/s390/scsi/zfcp_erp.c | 61 +++++++++++++++++++++++++++++++++-- drivers/s390/scsi/zfcp_ext.h | 4 ++- drivers/s390/scsi/zfcp_scsi.c | 4 +-- 4 files changed, 77 insertions(+), 9 deletions(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 581001989937..d5bf36ec8a75 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -289,11 +289,12 @@ void zfcp_dbf_rec_trig(char *tag, struct zfcp_adapter *adapter, /** - * zfcp_dbf_rec_run - trace event related to running recovery + * zfcp_dbf_rec_run_lvl - trace event related to running recovery + * @level: trace level to be used for event * @tag: identifier for event * @erp: erp_action running */ -void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +void zfcp_dbf_rec_run_lvl(int level, char *tag, struct zfcp_erp_action *erp) { struct zfcp_dbf *dbf = erp->adapter->dbf; struct zfcp_dbf_rec *rec = &dbf->rec_buf; @@ -319,10 +320,20 @@ void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) else rec->u.run.rec_count = atomic_read(&erp->adapter->erp_counter); - debug_event(dbf->rec, 1, rec, sizeof(*rec)); + debug_event(dbf->rec, level, rec, sizeof(*rec)); spin_unlock_irqrestore(&dbf->rec_lock, flags); } +/** + * zfcp_dbf_rec_run - trace event related to running recovery + * @tag: identifier for event + * @erp: erp_action running + */ +void zfcp_dbf_rec_run(char *tag, struct zfcp_erp_action *erp) +{ + zfcp_dbf_rec_run_lvl(1, tag, erp); +} + /** * zfcp_dbf_rec_run_wka - trace wka port event with info like running recovery * @tag: identifier for event diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index a59d678125bd..7ccfce559034 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -3,7 +3,7 @@ * * Error Recovery Procedures (ERP). * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #define KMSG_COMPONENT "zfcp" @@ -1204,6 +1204,62 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action) } } +/** + * zfcp_erp_try_rport_unblock - unblock rport if no more/new recovery + * @port: zfcp_port whose fc_rport we should try to unblock + */ +static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) +{ + unsigned long flags; + struct zfcp_adapter *adapter = port->adapter; + int port_status; + struct Scsi_Host *shost = adapter->scsi_host; + struct scsi_device *sdev; + + write_lock_irqsave(&adapter->erp_lock, flags); + port_status = atomic_read(&port->status); + if ((port_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (port_status & (ZFCP_STATUS_COMMON_ERP_INUSE | + ZFCP_STATUS_COMMON_ERP_FAILED)) != 0) { + /* new ERP of severity >= port triggered elsewhere meanwhile or + * local link down (adapter erp_failed but not clear unblock) + */ + zfcp_dbf_rec_run_lvl(4, "ertru_p", &port->erp_action); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + spin_lock(shost->host_lock); + __shost_for_each_device(sdev, shost) { + struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); + int lun_status; + + if (zsdev->port != port) + continue; + /* LUN under port of interest */ + lun_status = atomic_read(&zsdev->status); + if ((lun_status & ZFCP_STATUS_COMMON_ERP_FAILED) != 0) + continue; /* unblock rport despite failed LUNs */ + /* LUN recovery not given up yet [maybe follow-up pending] */ + if ((lun_status & ZFCP_STATUS_COMMON_UNBLOCKED) == 0 || + (lun_status & ZFCP_STATUS_COMMON_ERP_INUSE) != 0) { + /* LUN blocked: + * not yet unblocked [LUN recovery pending] + * or meanwhile blocked [new LUN recovery triggered] + */ + zfcp_dbf_rec_run_lvl(4, "ertru_l", &zsdev->erp_action); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); + return; + } + } + /* now port has no child or all children have completed recovery, + * and no ERP of severity >= port was meanwhile triggered elsewhere + */ + zfcp_scsi_schedule_rport_register(port); + spin_unlock(shost->host_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) { struct zfcp_adapter *adapter = act->adapter; @@ -1214,6 +1270,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) case ZFCP_ERP_ACTION_REOPEN_LUN: if (!(act->status & ZFCP_STATUS_ERP_NO_REF)) scsi_device_put(sdev); + zfcp_erp_try_rport_unblock(port); break; case ZFCP_ERP_ACTION_REOPEN_PORT: @@ -1224,7 +1281,7 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) */ if (act->step != ZFCP_ERP_STEP_UNINITIALIZED) if (result == ZFCP_ERP_SUCCEEDED) - zfcp_scsi_schedule_rport_register(port); + zfcp_erp_try_rport_unblock(port); /* fall through */ case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: put_device(&port->dev); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index c8fed9fa1cca..21c8c689b02b 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -3,7 +3,7 @@ * * External function declarations. * - * Copyright IBM Corp. 2002, 2015 + * Copyright IBM Corp. 2002, 2016 */ #ifndef ZFCP_EXT_H @@ -35,6 +35,8 @@ extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, struct zfcp_port *, struct scsi_device *, u8, u8); extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); +extern void zfcp_dbf_rec_run_lvl(int level, char *tag, + struct zfcp_erp_action *erp); extern void zfcp_dbf_rec_run_wka(char *, struct zfcp_fc_wka_port *, u64); extern void zfcp_dbf_hba_fsf_uss(char *, struct zfcp_fsf_req *); extern void zfcp_dbf_hba_fsf_res(char *, int, struct zfcp_fsf_req *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 11cd18c134c1..07ffdbb5107f 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -88,9 +88,7 @@ int zfcp_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scpnt) } if (unlikely(!(status & ZFCP_STATUS_COMMON_UNBLOCKED))) { - /* This could be either - * open LUN pending: this is temporary, will result in - * open LUN or ERP_FAILED, so retry command + /* This could be * call to rport_delete pending: mimic retry from * fc_remote_port_chkready until rport is BLOCKED */ From dbb67e1d585d81a720fa75cee767e49dc4a50164 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 13 Dec 2016 09:25:21 +0800 Subject: [PATCH 172/199] scsi: avoid a permanent stop of the scsi device's request queue commit d2a145252c52792bc59e4767b486b26c430af4bb upstream. A race between scanning and fc_remote_port_delete() may result in a permanent stop if the device gets blocked before scsi_sysfs_add_sdev() and unblocked after. The reason is that blocking a device sets both the SDEV_BLOCKED state and the QUEUE_FLAG_STOPPED. However, scsi_sysfs_add_sdev() unconditionally sets SDEV_RUNNING which causes the device to be ignored by scsi_target_unblock() and thus never have its QUEUE_FLAG_STOPPED cleared leading to a device which is apparently running but has a stopped queue. We actually have two places where SDEV_RUNNING is set: once in scsi_add_lun() which respects the blocked flag and once in scsi_sysfs_add_sdev() which doesn't. Since the second set is entirely spurious, simply remove it to fix the problem. Reported-by: Zengxi Chen Signed-off-by: Wei Fang Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 7232d43e2207..4477e999ec70 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1031,10 +1031,6 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) struct request_queue *rq = sdev->request_queue; struct scsi_target *starget = sdev->sdev_target; - error = scsi_device_set_state(sdev, SDEV_RUNNING); - if (error) - return error; - error = scsi_target_add(starget); if (error) return error; From 429a533a3d9ac849b4c9c3ceed14aa4282301748 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 19 Dec 2016 11:38:38 -0800 Subject: [PATCH 173/199] ARC: mm: arc700: Don't assume 2 colours for aliasing VIPT dcache commit 08fe007968b2b45e831daf74899f79a54d73f773 upstream. An ARC700 customer reported linux boot crashes when upgrading to bigger L1 dcache (64K from 32K). Turns out they had an aliasing VIPT config and current code only assumed 2 colours, while theirs had 4. So default to 4 colours and complain if there are fewer. Ideally this needs to be a Kconfig option, but heck that's too much of hassle for a single user. Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/cacheflush.h | 6 ++++-- arch/arc/mm/cache.c | 11 ++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index fbe3587c4f36..56aeb5efe604 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -85,6 +85,10 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 +#define CACHE_COLORS_NUM 4 +#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) +#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) + /* * Simple wrapper over config option * Bootup code ensures that hardware matches kernel configuration @@ -94,8 +98,6 @@ static inline int cache_is_vipt_aliasing(void) return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); } -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & 1) - /* * checks if two addresses (after page aligning) index into same cache set */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index aaf1e2d1d900..d81b6d7e11e7 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -960,11 +960,16 @@ void arc_cache_init(void) /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ if (is_isa_arcompact()) { int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE); - if (dc->alias && !handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dc->alias && handled) + if (dc->alias) { + if (!handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + if (CACHE_COLORS_NUM != num_colors) + panic("CACHE_COLORS_NUM not optimized for config\n"); + } else if (!dc->alias && handled) { panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } } } From 656c9abd574bc4be3890cd2d104a422dd93ca3aa Mon Sep 17 00:00:00 2001 From: Yves-Alexis Perez Date: Fri, 11 Nov 2016 11:28:40 -0800 Subject: [PATCH 174/199] firmware: fix usermode helper fallback loading commit 2e700f8d85975f516ccaad821278c1fe66b2cc98 upstream. When you use the firmware usermode helper fallback with a timeout value set to a value greater than INT_MAX (2147483647) a cast overflow issue causes the timeout value to go negative and breaks all usermode helper loading. This regression was introduced through commit 68ff2a00dbf5 ("firmware_loader: handle timeout via wait_for_completion_interruptible_timeout()") on kernel v4.0. The firmware_class drivers relies on the firmware usermode helper fallback as a mechanism to look for firmware if the direct filesystem search failed only if: a) You've enabled CONFIG_FW_LOADER_USER_HELPER_FALLBACK (not many distros): Then all of these callers will rely on the fallback mechanism in case the firmware is not found through an initial direct filesystem lookup: o request_firmware() o request_firmware_into_buf() o request_firmware_nowait() b) If you've only enabled CONFIG_FW_LOADER_USER_HELPER (most distros): Then only callers using request_firmware_nowait() with the second argument set to false, this explicitly is requesting the UMH firmware fallback to be relied on in case the first filesystem lookup fails. Using Coccinelle SmPL grammar we have identified only two drivers explicitly requesting the UMH firmware fallback mechanism: - drivers/firmware/dell_rbu.c - drivers/leds/leds-lp55xx-common.c Since most distributions only enable CONFIG_FW_LOADER_USER_HELPER the biggest impact of this regression are users of the dell_rbu and leds-lp55xx-common device driver which required the UMH to find their respective needed firmwares. The default timeout for the UMH is set to 60 seconds always, as of commit 68ff2a00dbf5 ("firmware_loader: handle timeout via wait_for_completion_interruptible_timeout()") the timeout was bumped to MAX_JIFFY_OFFSET ((LONG_MAX >> 1)-1). Additionally the MAX_JIFFY_OFFSET value was also used if the timeout was configured by a user to 0. The following works: echo 2147483647 > /sys/class/firmware/timeout But both of the following set the timeout to MAX_JIFFY_OFFSET even if we display 0 back to userspace: echo 2147483648 > /sys/class/firmware/timeout cat /sys/class/firmware/timeout 0 echo 0> /sys/class/firmware/timeout cat /sys/class/firmware/timeout 0 A max value of INT_MAX (2147483647) seconds is therefore implicit due to the another cast with simple_strtol(). This fixes the secondary cast (the first one is simple_strtol() but its an issue only by forcing an implicit limit) by re-using the timeout variable and only setting retval in appropriate cases. Lastly worth noting systemd had ripped out the UMH firmware fallback mechanism from udev since udev 2014 via commit be2ea723b1d023b3d ("udev: remove userspace firmware loading support"), so as of systemd v217. Signed-off-by: Yves-Alexis Perez Fixes: 68ff2a00dbf5 "firmware_loader: handle timeout via wait_for_completion_interruptible_timeout()" Cc: Luis R. Rodriguez Cc: Ming Lei Cc: Bjorn Andersson Cc: Greg Kroah-Hartman Acked-by: Luis R. Rodriguez Reviewed-by: Bjorn Andersson [mcgrof@kernel.org: gave commit log a whole lot of love] Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 8524450e75bd..ccfd268148a8 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -942,13 +942,14 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, timeout = MAX_JIFFY_OFFSET; } - retval = wait_for_completion_interruptible_timeout(&buf->completion, + timeout = wait_for_completion_interruptible_timeout(&buf->completion, timeout); - if (retval == -ERESTARTSYS || !retval) { + if (timeout == -ERESTARTSYS || !timeout) { + retval = timeout; mutex_lock(&fw_lock); fw_load_abort(fw_priv); mutex_unlock(&fw_lock); - } else if (retval > 0) { + } else if (timeout > 0) { retval = 0; } From b988320dab539678a19466834012f75d7883cfc7 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 21 Nov 2016 12:13:58 +0100 Subject: [PATCH 175/199] s390/vmlogrdr: fix IUCV buffer allocation commit 5457e03de918f7a3e294eb9d26a608ab8a579976 upstream. The buffer for iucv_message_receive() needs to be below 2 GB. In __iucv_message_receive(), the buffer address is casted to an u32, which would result in either memory corruption or an addressing exception when using addresses >= 2 GB. Fix this by using GFP_DMA for the buffer allocation. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/char/vmlogrdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 799c1524c779..4b8de3e70cf2 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -872,7 +872,7 @@ static int __init vmlogrdr_init(void) goto cleanup; for (i=0; i < MAXMINOR; ++i ) { - sys_ser[i].buffer = (char *) get_zeroed_page(GFP_KERNEL); + sys_ser[i].buffer = (char *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!sys_ser[i].buffer) { rc = -ENOMEM; break; From d78006d2345f87889918a8a7aa3764628ff84263 Mon Sep 17 00:00:00 2001 From: Josh Cartwright Date: Thu, 13 Oct 2016 10:44:33 -0500 Subject: [PATCH 176/199] sc16is7xx: Drop bogus use of IRQF_ONESHOT commit 04da73803c05dc1150ccc31cbf93e8cd56679c09 upstream. The use of IRQF_ONESHOT when registering an interrupt handler with request_irq() is non-sensical. Not only that, it also prevents the handler from being threaded when it otherwise should be w/ IRQ_FORCED_THREADING is enabled. This causes the following deadlock observed by Sean Nyekjaer on -rt: Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM [..] rt_spin_lock_slowlock from queue_kthread_work queue_kthread_work from sc16is7xx_irq sc16is7xx_irq [sc16is7xx] from handle_irq_event_percpu handle_irq_event_percpu from handle_irq_event handle_irq_event from handle_level_irq handle_level_irq from generic_handle_irq generic_handle_irq from mxc_gpio_irq_handler mxc_gpio_irq_handler from mx3_gpio_irq_handler mx3_gpio_irq_handler from generic_handle_irq generic_handle_irq from __handle_domain_irq __handle_domain_irq from gic_handle_irq gic_handle_irq from __irq_svc __irq_svc from rt_spin_unlock rt_spin_unlock from kthread_worker_fn kthread_worker_fn from kthread kthread from ret_from_fork Fixes: 9e6f4ca3e567 ("sc16is7xx: use kthread_worker for tx_work and irq") Reported-by: Sean Nyekjaer Signed-off-by: Josh Cartwright Cc: linux-rt-users@vger.kernel.org Cc: Jakub Kicinski Cc: linux-serial@vger.kernel.org Cc: Sebastian Andrzej Siewior Signed-off-by: Julia Cartwright Acked-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index edb5305b9d4d..7d5ee8a13ac6 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1230,7 +1230,7 @@ static int sc16is7xx_probe(struct device *dev, /* Setup interrupt */ ret = devm_request_irq(dev, irq, sc16is7xx_irq, - IRQF_ONESHOT | flags, dev_name(dev), s); + flags, dev_name(dev), s); if (!ret) return 0; From 1c8841c9b7d27da058c62da21c189f4bad4edac2 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 27 Nov 2016 19:32:32 +0300 Subject: [PATCH 177/199] md/raid5: limit request size according to implementation limits commit e8d7c33232e5fdfa761c3416539bc5b4acd12db5 upstream. Current implementation employ 16bit counter of active stripes in lower bits of bio->bi_phys_segments. If request is big enough to overflow this counter bio will be completed and freed too early. Fortunately this not happens in default configuration because several other limits prevent that: stripe_cache_size * nr_disks effectively limits count of active stripes. And small max_sectors_kb at lower disks prevent that during normal read/write operations. Overflow easily happens in discard if it's enabled by module parameter "devices_handle_discard_safely" and stripe_cache_size is set big enough. This patch limits requests size with 256Mb - 8Kb to prevent overflows. Signed-off-by: Konstantin Khlebnikov Cc: Shaohua Li Cc: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 10ce885445f6..7af976934441 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6980,6 +6980,15 @@ static int run(struct mddev *mddev) stripe = (stripe | (stripe-1)) + 1; mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; + + /* + * We use 16-bit counter of active stripes in bi_phys_segments + * (minus one for over-loaded initialization) + */ + blk_queue_max_hw_sectors(mddev->queue, 0xfffe * STRIPE_SECTORS); + blk_queue_max_discard_sectors(mddev->queue, + 0xfffe * STRIPE_SECTORS); + /* * unaligned part of discard request will be ignored, so can't * guarantee discard_zeroes_data From ddf5718adfb873ecf41b38e46197498bcc8e2262 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 7 Nov 2016 15:09:58 +1100 Subject: [PATCH 178/199] KVM: PPC: Book3S HV: Save/restore XER in checkpointed register state commit 0d808df06a44200f52262b6eb72bcb6042f5a7c5 upstream. When switching from/to a guest that has a transaction in progress, we need to save/restore the checkpointed register state. Although XER is part of the CPU state that gets checkpointed, the code that does this saving and restoring doesn't save/restore XER. This fixes it by saving and restoring the XER. To allow userspace to read/write the checkpointed XER value, we also add a new ONE_REG specifier. The visible effect of this bug is that the guest may see its XER value being corrupted when it uses transactions. Fixes: e4e38121507a ("KVM: PPC: Book3S HV: Add transactional memory support") Fixes: 0a8eccefcb34 ("KVM: PPC: Book3S HV: Add missing code for transaction reclaim on guest exit") Signed-off-by: Paul Mackerras Reviewed-by: Thomas Huth Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++++ 6 files changed, 14 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 092ee9fbaf2b..df8ab4fc240a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1991,6 +1991,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TM_VSCR | 32 PPC | KVM_REG_PPC_TM_DSCR | 64 PPC | KVM_REG_PPC_TM_TAR | 64 + PPC | KVM_REG_PPC_TM_XER | 64 | | MIPS | KVM_REG_MIPS_R0 | 64 ... diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cfa758c6b4f6..a92d95aee42d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -545,6 +545,7 @@ struct kvm_vcpu_arch { u64 tfiar; u32 cr_tm; + u64 xer_tm; u64 lr_tm; u64 ctr_tm; u64 amr_tm; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index ab4d4732c492..720b71a636c8 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -587,6 +587,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 221d584d089f..40da69163d51 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -584,6 +584,7 @@ int main(void) DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a7352b59e6f9..3c3a367b6e59 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1186,6 +1186,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: *val = get_reg_val(id, vcpu->arch.cr_tm); break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; case KVM_REG_PPC_TM_LR: *val = get_reg_val(id, vcpu->arch.lr_tm); break; @@ -1393,6 +1396,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: vcpu->arch.cr_tm = set_reg_val(id, *val); break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; case KVM_REG_PPC_TM_LR: vcpu->arch.lr_tm = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 974f73df00bb..1a743f87b37d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2514,11 +2514,13 @@ kvmppc_save_tm: mfctr r7 mfspr r8, SPRN_AMR mfspr r10, SPRN_TAR + mfxer r11 std r5, VCPU_LR_TM(r9) stw r6, VCPU_CR_TM(r9) std r7, VCPU_CTR_TM(r9) std r8, VCPU_AMR_TM(r9) std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) /* Restore r12 as trap number. */ lwz r12, VCPU_TRAP(r9) @@ -2611,11 +2613,13 @@ kvmppc_restore_tm: ld r7, VCPU_CTR_TM(r4) ld r8, VCPU_AMR_TM(r4) ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) mtlr r5 mtcr r6 mtctr r7 mtspr SPRN_AMR, r8 mtspr SPRN_TAR, r9 + mtxer r10 /* * Load up PPR and DSCR values but don't put them in the actual SPRs From 129e4323df377297408f2da99a0e48e01a7c60fd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Nov 2016 16:43:28 +1100 Subject: [PATCH 179/199] KVM: PPC: Book3S HV: Don't lose hardware R/C bit updates in H_PROTECT commit f064a0de1579fabded8990bed93971e30deb9ecb upstream. The hashed page table MMU in POWER processors can update the R (reference) and C (change) bits in a HPTE at any time until the HPTE has been invalidated and the TLB invalidation sequence has completed. In kvmppc_h_protect, which implements the H_PROTECT hypercall, we read the HPTE, modify the second doubleword, invalidate the HPTE in memory, do the TLB invalidation sequence, and then write the modified value of the second doubleword back to memory. In doing so we could overwrite an R/C bit update done by hardware between when we read the HPTE and when the TLB invalidation completed. To fix this we re-read the second doubleword after the TLB invalidation and OR in the (possibly) new values of R and C. We can use an OR since hardware only ever sets R and C, never clears them. This race was found by code inspection. In principle this bug could cause occasional guest memory corruption under host memory pressure. Fixes: a8606e20e41a ("KVM: PPC: Handle some PAPR hcalls in the kernel", 2011-06-29) Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 91700518bbf3..d509ff5c87b0 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -653,6 +653,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + /* Don't lose R/C bit updates done by hardware */ + r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); } } From 19aa9c14989e7a3fe04b444f146a05ff857b058e Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 12 Dec 2016 11:01:37 -0800 Subject: [PATCH 180/199] kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF) commit ef85b67385436ddc1998f45f1d6a210f935b3388 upstream. When L2 exits to L0 due to "exception or NMI", software exceptions (#BP and #OF) for which L1 has requested an intercept should be handled by L1 rather than L0. Previously, only hardware exceptions were forwarded to L1. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 268df707b5ce..bb620df05d0d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1247,10 +1247,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; } -static inline bool is_exception(u32 intr_info) +static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); + == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); } static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, @@ -5234,7 +5234,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_machine_check(intr_info)) return handle_machine_check(vcpu); - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) + if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -7722,7 +7722,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: - if (!is_exception(intr_info)) + if (is_nmi(intr_info)) return false; else if (is_page_fault(intr_info)) return enable_ept; @@ -8329,8 +8329,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) { + if (is_nmi(exit_intr_info)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); kvm_after_handle_nmi(&vmx->vcpu); From 2ef502e860f12b9e305151eef3bbb9ea4009a60d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 8 Dec 2016 12:48:26 -0500 Subject: [PATCH 181/199] ftrace/x86_32: Set ftrace_stub to weak to prevent gcc from using short jumps to it commit 847fa1a6d3d00f3bdf68ef5fa4a786f644a0dd67 upstream. With new binutils, gcc may get smart with its optimization and change a jmp from a 5 byte jump to a 2 byte one even though it was jumping to a global function. But that global function existed within a 2 byte radius, and gcc was able to optimize it. Unfortunately, that jump was also being modified when function graph tracing begins. Since ftrace expected that jump to be 5 bytes, but it was only two, it overwrote code after the jump, causing a crash. This was fixed for x86_64 with commit 8329e818f149, with the same subject as this commit, but nothing was done for x86_32. Fixes: d61f82d06672 ("ftrace: use dynamic patching for updating mcount calls") Reported-by: Colin Ian King Tested-by: Colin Ian King Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f3b6d54e0042..ae678ad128a9 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -766,8 +766,8 @@ ftrace_graph_call: jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +/* This is weak to keep gas from relaxing the jumps */ +WEAK(ftrace_stub) ret END(ftrace_caller) From 88e41441ccd45eb6d2f68b6260a62b8d8e6e0316 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 29 Nov 2016 23:23:06 -0200 Subject: [PATCH 182/199] platform/x86: asus-nb-wmi.c: Add X45U quirk commit e74e259939275a5dd4e0d02845c694f421e249ad upstream. Without this patch, the Asus X45U wireless card can't be turned on (hard-blocked), but after a suspend/resume it just starts working. Following this bug report[1], there are other cases like this one, but this Asus is the only model that I can test. [1] https://ubuntuforums.org/showthread.php?t=2181558 Signed-off-by: Marcos Paulo de Souza Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-nb-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 131fee2b093e..a3661cc44f86 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -126,6 +126,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_wapf4, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X45U", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X45U"), + }, + .driver_data = &quirk_asus_wapf4, + }, { .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X456UA", From e945df4c6bc2e3e188e02b199ce1766f557890e5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 8 Dec 2016 20:54:49 -0500 Subject: [PATCH 183/199] fgraph: Handle a case where a tracer ignores set_graph_notrace commit 794de08a16cf1fc1bf785dc48f66d36218cf6d88 upstream. Both the wakeup and irqsoff tracers can use the function graph tracer when the display-graph option is set. The problem is that they ignore the notrace file, and record the entry of functions that would be ignored by the function_graph tracer. This causes the trace->depth to be recorded into the ring buffer. The set_graph_notrace uses a trick by adding a large negative number to the trace->depth when a graph function is to be ignored. On trace output, the graph function uses the depth to record a stack of functions. But since the depth is negative, it accesses the array with a negative number and causes an out of bounds access that can cause a kernel oops or corrupt data. Have the print functions handle cases where a tracer still records functions even when they are in set_graph_notrace. Also add warnings if the depth is below zero before accessing the array. Note, the function graph logic will still prevent the return of these functions from being recorded, which means that they will be left hanging without a return. For example: # echo '*spin*' > set_graph_notrace # echo 1 > options/display-graph # echo wakeup > current_tracer # cat trace [...] _raw_spin_lock() { preempt_count_add() { do_raw_spin_lock() { update_rq_clock(); Where it should look like: _raw_spin_lock() { preempt_count_add(); do_raw_spin_lock(); } update_rq_clock(); Cc: Namhyung Kim Fixes: 29ad23b00474 ("ftrace: Add set_graph_notrace filter") Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_functions_graph.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a663cbb84107..7fd6f5a26143 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -780,6 +780,10 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data = per_cpu_ptr(data->cpu_data, cpu); + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + /* * Comments display at + 1 to depth. Since * this is a leaf function, keep the comments @@ -788,7 +792,8 @@ print_graph_entry_leaf(struct trace_iterator *iter, cpu_data->depth = call->depth - 1; /* No need to keep this function around for this depth */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = 0; } @@ -818,11 +823,16 @@ print_graph_entry_nested(struct trace_iterator *iter, struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; + /* If a graph tracer ignored set_graph_notrace */ + if (call->depth < -1) + call->depth += FTRACE_NOTRACE_DEPTH; + cpu_data = per_cpu_ptr(data->cpu_data, cpu); cpu_data->depth = call->depth; /* Save this function pointer to see if the exit matches */ - if (call->depth < FTRACE_RETFUNC_DEPTH) + if (call->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(call->depth < 0)) cpu_data->enter_funcs[call->depth] = call->func; } @@ -1052,7 +1062,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, */ cpu_data->depth = trace->depth - 1; - if (trace->depth < FTRACE_RETFUNC_DEPTH) { + if (trace->depth < FTRACE_RETFUNC_DEPTH && + !WARN_ON_ONCE(trace->depth < 0)) { if (cpu_data->enter_funcs[trace->depth] != trace->func) func_match = 0; cpu_data->enter_funcs[trace->depth] = 0; From 0de381ca35b5e997903c25297f0b0c7ecc74342e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:21:17 -0800 Subject: [PATCH 184/199] IB/mad: Fix an array index check commit 2fe2f378dd45847d2643638c07a7658822087836 upstream. The array ib_mad_mgmt_class_table.method_table has MAX_MGMT_CLASS (80) elements. Hence compare the array index with that value instead of with IB_MGMT_MAX_METHODS (128). This patch avoids that Coverity reports the following: Overrunning array class->method_table of 80 8-byte elements at element index 127 (byte offset 1016) using index convert_mgmt_class(mad_hdr->mgmt_class) (which evaluates to 127). Fixes: commit b7ab0b19a85f ("IB/mad: Verify mgmt class in received MADs") Signed-off-by: Bart Van Assche Cc: Sean Hefty Reviewed-by: Hal Rosenstock Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2281de122038..8d84c563ba75 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1745,7 +1745,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= - IB_MGMT_MAX_METHODS) + ARRAY_SIZE(class->method_table)) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; From 7b13692156167666ba9bc5e30e41fe0eecd86bd0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:21:41 -0800 Subject: [PATCH 185/199] IPoIB: Avoid reading an uninitialized member variable commit 11b642b84e8c43e8597de031678d15c08dd057bc upstream. This patch avoids that Coverity reports the following: Using uninitialized value port_attr.state when calling printk Fixes: commit 94232d9ce817 ("IPoIB: Start multicast join process only on active ports") Signed-off-by: Bart Van Assche Cc: Erez Shitrit Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 87799de90a1d..8ec99bdea76b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -563,8 +563,11 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; - if (ib_query_port(priv->ca, priv->port, &port_attr) || - port_attr.state != IB_PORT_ACTIVE) { + if (ib_query_port(priv->ca, priv->port, &port_attr)) { + ipoib_dbg(priv, "ib_query_port() failed\n"); + return; + } + if (port_attr.state != IB_PORT_ACTIVE) { ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", port_attr.state); return; From b7f73ada92a396b7ca987695af3243944c757295 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 Nov 2016 10:22:17 -0800 Subject: [PATCH 186/199] IB/multicast: Check ib_find_pkey() return value commit d3a2418ee36a59bc02e9d454723f3175dcf4bfd9 upstream. This patch avoids that Coverity complains about not checking the ib_find_pkey() return value. Fixes: commit 547af76521b3 ("IB/multicast: Report errors on multicast groups if P_key changes") Signed-off-by: Bart Van Assche Cc: Sean Hefty Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 6aa648cb5381..2cd97977b988 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -517,8 +517,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, process_join_error(group, status); else { int mgids_changed, is_mgid0; - ib_find_pkey(group->port->dev->device, group->port->port_num, - be16_to_cpu(rec->pkey), &pkey_index); + + if (ib_find_pkey(group->port->dev->device, + group->port->port_num, be16_to_cpu(rec->pkey), + &pkey_index)) + pkey_index = MCAST_INVALID_PKEY_INDEX; spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && From 952a9f5af9f8565abd50cb8daee178176f2e3aeb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 19 Dec 2016 18:00:05 +0100 Subject: [PATCH 187/199] IB/cma: Fix a race condition in iboe_addr_get_sgid() commit fba332b079029c2f4f7e84c1c1cd8e3867310c90 upstream. Code that dereferences the struct net_device ip_ptr member must be protected with an in_dev_get() / in_dev_put() pair. Hence insert calls to these functions. Fixes: commit 7b85627b9f02 ("IB/cma: IBoE (RoCE) IP-based GID addressing") Signed-off-by: Bart Van Assche Reviewed-by: Moni Shoua Cc: Or Gerlitz Cc: Roland Dreier Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- include/rdma/ib_addr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 11528591d0d7..a78ff97eb249 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -197,10 +197,12 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { - ip4 = (struct in_device *)dev->ip_ptr; - if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) + ip4 = in_dev_get(dev); + if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) { ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, (struct in6_addr *)gid); + in_dev_put(ip4); + } dev_put(dev); } } From 44685f03dd5648407c5cf8d5a1a0eefd24036a27 Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Sat, 22 Oct 2016 13:34:36 -0200 Subject: [PATCH 188/199] media: solo6x10: fix lockup by avoiding delayed register write commit 5fc4b067ec082c3127e0156f800769b7e0dce078 upstream. This fixes a lockup at device probing which happens on some solo6010 hardware samples. This is a regression introduced by commit e1ceb25a1569 ("[media] SOLO6x10: remove unneeded register locking and barriers") The observed lockup happens in solo_set_motion_threshold() called from solo_motion_config(). This extra "flushing" is not fundamentally needed for every write, but apparently the code in driver assumes such behaviour at last in some places. Actual fix was proposed by Hans Verkuil. Fixes: e1ceb25a1569 ("[media] SOLO6x10: remove unneeded register locking and barriers") Signed-off-by: Andrey Utkin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/solo6x10/solo6x10.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/pci/solo6x10/solo6x10.h b/drivers/media/pci/solo6x10/solo6x10.h index 4ab6586c0467..f53e59e9c0ea 100644 --- a/drivers/media/pci/solo6x10/solo6x10.h +++ b/drivers/media/pci/solo6x10/solo6x10.h @@ -286,7 +286,10 @@ static inline u32 solo_reg_read(struct solo_dev *solo_dev, int reg) static inline void solo_reg_write(struct solo_dev *solo_dev, int reg, u32 data) { + u16 val; + writel(data, solo_dev->reg_base + reg); + pci_read_config_word(solo_dev->pdev, PCI_STATUS, &val); } static inline void solo_irq_on(struct solo_dev *dev, u32 mask) From 8db00756afbdfbe3392a0678971f783b8dbad868 Mon Sep 17 00:00:00 2001 From: Jingkui Wang Date: Mon, 12 Dec 2016 13:51:46 -0800 Subject: [PATCH 189/199] Input: drv260x - fix input device's parent assignment commit 5a8a6b89c15766446d845671d574a9243b6d8786 upstream. We were assigning I2C bus controller instead of client as parent device. Besides being logically wrong, it messed up with devm handling of input device. As a result we were leaving input device and event node behind after rmmod-ing the driver, which lead to a kernel oops if one were to access the event node later. Let's remove the assignment and rely on devm_input_allocate_device() to set it up properly for us. Signed-off-by: Jingkui Wang Fixes: 7132fe4f5687 ("Input: drv260x - add TI drv260x haptics driver") Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/drv260x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index 2adfd86c869a..930424e55439 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -592,7 +592,6 @@ static int drv260x_probe(struct i2c_client *client, } haptics->input_dev->name = "drv260x:haptics"; - haptics->input_dev->dev.parent = client->dev.parent; haptics->input_dev->close = drv260x_close; input_set_drvdata(haptics->input_dev, haptics); input_set_capability(haptics->input_dev, EV_FF, FF_RUMBLE); From edfe6a79f905a1252ac6783771b90b64572109a8 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 21 Oct 2016 16:45:38 -0400 Subject: [PATCH 190/199] PCI: Check for PME in targeted sleep state commit 6496ebd7edf446fccf8266a1a70ffcb64252593e upstream. One some systems, the firmware does not allow certain PCI devices to be put in deep D-states. This can cause problems for wakeup signalling, if the device does not support PME# in the deepest allowed suspend state. For example, Pierre reports that on his system, ACPI does not permit his xHCI host controller to go into D3 during runtime suspend -- but D3 is the only state in which the controller can generate PME# signals. As a result, the controller goes into runtime suspend but never wakes up, so it doesn't work properly. USB devices plugged into the controller are never detected. If the device relies on PME# for wakeup signals but is not capable of generating PME# in the target state, the PCI core should accurately report that it cannot do wakeup from runtime suspend. This patch modifies the pci_dev_run_wake() routine to add this check. Reported-by: Pierre de Villemereuil Tested-by: Pierre de Villemereuil Signed-off-by: Alan Stern Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki CC: Lukas Wunner Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 42d8617352ae..e311a9bf2c90 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2043,6 +2043,10 @@ bool pci_dev_run_wake(struct pci_dev *dev) if (!dev->pme_support) return false; + /* PME-capable in principle, but not from the intended sleep state */ + if (!pci_pme_capable(dev, pci_target_state(dev))) + return false; + while (bus->parent) { struct pci_dev *bridge = bus->self; From b66e3126569e25e11bc3913e41f6f39445508338 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 2 Dec 2016 16:35:09 +0100 Subject: [PATCH 191/199] libceph: verify authorize reply on connect commit 5c056fdc5b474329037f2aa18401bd73033e0ce0 upstream. After sending an authorizer (ceph_x_authorize_a + ceph_x_authorize_b), the client gets back a ceph_x_authorize_reply, which it is supposed to verify to ensure the authenticity and protect against replay attacks. The code for doing this is there (ceph_x_verify_authorizer_reply(), ceph_auth_verify_authorizer_reply() + plumbing), but it is never invoked by the the messenger. AFAICT this goes back to 2009, when ceph authentication protocols support was added to the kernel client in 4e7a5dcd1bba ("ceph: negotiate authentication protocol; implement AUTH_NONE protocol"). The second param of ceph_connection_operations::verify_authorizer_reply is unused all the way down. Pass 0 to facilitate backporting, and kill it in the next commit. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 63ae5dd24fc5..b8d927c56494 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2042,6 +2042,19 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); + if (con->auth_reply_buf) { + /* + * Any connection that defines ->get_authorizer() + * should also define ->verify_authorizer_reply(). + * See get_connect_authorizer(). + */ + ret = con->ops->verify_authorizer_reply(con, 0); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } + } + switch (con->in_reply.tag) { case CEPH_MSGR_TAG_FEATURES: pr_err("%s%lld %s feature set mismatch," From 8a2bcaae1bc7a33d51d668d0661e0636a4aa7bc8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Sep 2016 21:42:32 -0400 Subject: [PATCH 192/199] nfs_write_end(): fix handling of short copies commit c0cf3ef5e0f47e385920450b245d22bead93e7ad upstream. What matters when deciding if we should make a page uptodate is not how much we _wanted_ to copy, but how much we actually have copied. As it is, on architectures that do not zero tail on short copy we can leave uninitialized data in page marked uptodate. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 93e236429c5d..dc875cd0e11d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -407,7 +407,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, */ if (!PageUptodate(page)) { unsigned pglen = nfs_page_length(page); - unsigned end = offset + len; + unsigned end = offset + copied; if (pglen == 0) { zero_user_segments(page, 0, offset, From cadaba838f1bfe74ea4ab21bb675f56a32e4efca Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 29 Nov 2016 10:47:32 -0800 Subject: [PATCH 193/199] powerpc/ps3: Fix system hang with GCC 5 builds commit 6dff5b67054e17c91bd630bcdda17cfca5aa4215 upstream. GCC 5 generates different code for this bootwrapper null check that causes the PS3 to hang very early in its bootup. This check is of limited value, so just get rid of it. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/boot/ps3-head.S | 5 ----- arch/powerpc/boot/ps3.c | 8 +------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S index b6fcbaf5027b..3dc44b05fb97 100644 --- a/arch/powerpc/boot/ps3-head.S +++ b/arch/powerpc/boot/ps3-head.S @@ -57,11 +57,6 @@ __system_reset_overlay: bctr 1: - /* Save the value at addr zero for a null pointer write check later. */ - - li r4, 0 - lwz r3, 0(r4) - /* Primary delays then goes to _zimage_start in wrapper. */ or 31, 31, 31 /* db16cyc */ diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index 4ec2d86d3c50..a05558a7e51a 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -119,13 +119,12 @@ void ps3_copy_vectors(void) flush_cache((void *)0x100, 512); } -void platform_init(unsigned long null_check) +void platform_init(void) { const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */ void *chosen; unsigned long ft_addr; u64 rm_size; - unsigned long val; console_ops.write = ps3_console_write; platform_ops.exit = ps3_exit; @@ -153,11 +152,6 @@ void platform_init(unsigned long null_check) printf(" flat tree at 0x%lx\n\r", ft_addr); - val = *(unsigned long *)0; - - if (val != null_check) - printf("null check failed: %lx != %lx\n\r", val, null_check); - ((kernel_entry_t)0)(ft_addr, 0, NULL); ps3_exit(); From e5de1c724c060bb8a963aaed8e09e7d1a5f3ad1c Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 6 Oct 2016 13:42:19 +0000 Subject: [PATCH 194/199] powerpc: Convert cmp to cmpd in idle enter sequence commit 80f23935cadb1c654e81951f5a8b7ceae0acc1b4 upstream. PowerPC's "cmp" instruction has four operands. Normally people write "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently people forget, and write "cmp" with just three operands. With older binutils this is silently accepted as if this was "cmpw", while often "cmpd" is wanted. With newer binutils GAS will complain about this for 64-bit code. For 32-bit code it still silently assumes "cmpw" is what is meant. In this instance the code comes directly from ISA v2.07, including the cmp, but cmpd is correct. Backport to stable so that new toolchains can build old kernels. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Segher Boessenkool Signed-off-by: Michael Ellerman Signed-off-by: Joel Stanley Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/idle_power7.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 112ccf497562..73f638789a38 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -44,7 +44,7 @@ std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ +1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ b . From e321f384d8a58e546469cc8d3ca04257ef474d4c Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 21 Nov 2016 16:35:30 -0800 Subject: [PATCH 195/199] target/user: Fix use-after-free of tcmu_cmds if they are expired commit d0905ca757bc40bd1ebc261a448a521b064777d7 upstream. Don't free the cmd in tcmu_check_expired_cmd, it's still referenced by an entry in our cmd_id->cmd idr. If userspace ever resumes processing, tcmu_handle_completions() will use the now-invalid cmd pointer. Instead, don't free cmd. It will be freed by tcmu_handle_completion() if userspace ever recovers, or tcmu_free_device if not. Reported-by: Bryant G Ly Tested-by: Bryant G Ly Signed-off-by: Andy Grover Signed-off-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_user.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 5e6d6cb348fc..a7d30e894cab 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -645,8 +645,6 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION); cmd->se_cmd = NULL; - kmem_cache_free(tcmu_cmd_cache, cmd); - return 0; } From 7fb5a936457dff3616644d1725f28002048819f7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 24 Nov 2016 22:10:23 +0000 Subject: [PATCH 196/199] kconfig/nconf: Fix hang when editing symbol with a long prompt commit 79e51b5c2deea542b3bb8c66e0d502230b017dde upstream. Currently it is impossible to edit the value of a config symbol with a prompt longer than (terminal width - 2) characters. dialog_inputbox() calculates a negative x-offset for the input window and newwin() fails as this is invalid. It also doesn't check for this failure, so it busy-loops calling wgetch(NULL) which immediately returns -1. The additions in the offset calculations also don't match the intended size of the window. Limit the window size and calculate the offset similarly to show_scroll_win(). Fixes: 692d97c380c6 ("kconfig: new configuration interface (nconfig)") Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- scripts/kconfig/nconf.gui.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 8275f0e55106..4b2f44c20caf 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -364,12 +364,14 @@ int dialog_inputbox(WINDOW *main_window, WINDOW *prompt_win; WINDOW *form_win; PANEL *panel; - int i, x, y; + int i, x, y, lines, columns, win_lines, win_cols; int res = -1; int cursor_position = strlen(init); int cursor_form_win; char *result = *resultp; + getmaxyx(stdscr, lines, columns); + if (strlen(init)+1 > *result_len) { *result_len = strlen(init)+1; *resultp = result = realloc(result, *result_len); @@ -386,14 +388,19 @@ int dialog_inputbox(WINDOW *main_window, if (title) prompt_width = max(prompt_width, strlen(title)); + win_lines = min(prompt_lines+6, lines-2); + win_cols = min(prompt_width+7, columns-2); + prompt_lines = max(win_lines-6, 0); + prompt_width = max(win_cols-7, 0); + /* place dialog in middle of screen */ - y = (getmaxy(stdscr)-(prompt_lines+4))/2; - x = (getmaxx(stdscr)-(prompt_width+4))/2; + y = (lines-win_lines)/2; + x = (columns-win_cols)/2; strncpy(result, init, *result_len); /* create the windows */ - win = newwin(prompt_lines+6, prompt_width+7, y, x); + win = newwin(win_lines, win_cols, y, x); prompt_win = derwin(win, prompt_lines+1, prompt_width, 2, 2); form_win = derwin(win, 1, prompt_width, prompt_lines+3, 2); keypad(form_win, TRUE); From d85727365859108cbcf832c2b3c38358ddc7638b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Dec 2016 13:42:06 -0500 Subject: [PATCH 197/199] sg_write()/bsg_write() is not fit to be called under KERNEL_DS commit 128394eff343fc6d2f32172f03e24829539c5835 upstream. Both damn things interpret userland pointers embedded into the payload; worse, they are actually traversing those. Leaving aside the bad API design, this is very much _not_ safe to call with KERNEL_DS. Bail out early if that happens. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- block/bsg.c | 3 +++ drivers/scsi/sg.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/block/bsg.c b/block/bsg.c index d214e929ce18..b9a53615bdef 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -655,6 +655,9 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) dprintk("%s: write %Zd bytes\n", bd->name, count); + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + bsg_set_block(bd, file); bytes_written = 0; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ae7d9bdf409c..a1c29b0afb22 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -592,6 +592,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + return -EINVAL; + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, From 9f11a0ab6a9650de4b458b2a8759a32c2e4e89f0 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 21 Dec 2016 11:28:49 +0100 Subject: [PATCH 198/199] net: mvpp2: fix dma unmapping of TX buffers for fragments commit 8354491c9d5b06709384cea91d13019bf5e61449 upstream. Since commit 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX buffers unmapping"), we are not correctly DMA unmapping TX buffers for fragments. Indeed, the mvpp2_txq_inc_put() function only stores in the txq_cpu->tx_buffs[] array the physical address of the buffer to be DMA-unmapped when skb != NULL. In addition, when DMA-unmapping, we use skb_headlen(skb) to get the size to be unmapped. Both of this works fine for TX descriptors that are associated directly to a SKB, but not the ones that are used for fragments, with a NULL pointer as skb: - We have a NULL physical address when calling DMA unmap - skb_headlen(skb) crashes because skb is NULL This causes random crashes when fragments are used. To solve this problem, we need to: - Store the physical address of the buffer to be unmapped unconditionally, regardless of whether it is tied to a SKB or not. - Store the length of the buffer to be unmapped, which requires a new field. Instead of adding a third array to store the length of the buffer to be unmapped, and as suggested by David Miller, this commit refactors the tx_buffs[] and tx_skb[] arrays of 'struct mvpp2_txq_pcpu' into a separate structure 'mvpp2_txq_pcpu_buf', to which a 'size' field is added. Therefore, instead of having three arrays to allocate/free, we have a single one, which also improve data locality, reducing the impact on the CPU cache. Fixes: 71ce391dfb784 ("net: mvpp2: enable proper per-CPU TX buffers unmapping") Reported-by: Raphael G Cc: Raphael G Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvpp2.c | 59 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index a4beccf1fd46..25aba9886990 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -772,6 +772,17 @@ struct mvpp2_rx_desc { u32 reserved8; }; +struct mvpp2_txq_pcpu_buf { + /* Transmitted SKB */ + struct sk_buff *skb; + + /* Physical address of transmitted buffer */ + dma_addr_t phys; + + /* Size transmitted */ + size_t size; +}; + /* Per-CPU Tx queue control */ struct mvpp2_txq_pcpu { int cpu; @@ -787,11 +798,8 @@ struct mvpp2_txq_pcpu { /* Number of Tx DMA descriptors reserved for each CPU */ int reserved_num; - /* Array of transmitted skb */ - struct sk_buff **tx_skb; - - /* Array of transmitted buffers' physical addresses */ - dma_addr_t *tx_buffs; + /* Infos about transmitted buffers */ + struct mvpp2_txq_pcpu_buf *buffs; /* Index of last TX DMA descriptor that was inserted */ int txq_put_index; @@ -981,10 +989,11 @@ static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu, struct sk_buff *skb, struct mvpp2_tx_desc *tx_desc) { - txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb; - if (skb) - txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] = - tx_desc->buf_phys_addr; + struct mvpp2_txq_pcpu_buf *tx_buf = + txq_pcpu->buffs + txq_pcpu->txq_put_index; + tx_buf->skb = skb; + tx_buf->size = tx_desc->data_size; + tx_buf->phys = tx_desc->buf_phys_addr; txq_pcpu->txq_put_index++; if (txq_pcpu->txq_put_index == txq_pcpu->size) txq_pcpu->txq_put_index = 0; @@ -4403,17 +4412,16 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, int i; for (i = 0; i < num; i++) { - dma_addr_t buf_phys_addr = - txq_pcpu->tx_buffs[txq_pcpu->txq_get_index]; - struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index]; + struct mvpp2_txq_pcpu_buf *tx_buf = + txq_pcpu->buffs + txq_pcpu->txq_get_index; mvpp2_txq_inc_get(txq_pcpu); - dma_unmap_single(port->dev->dev.parent, buf_phys_addr, - skb_headlen(skb), DMA_TO_DEVICE); - if (!skb) + dma_unmap_single(port->dev->dev.parent, tx_buf->phys, + tx_buf->size, DMA_TO_DEVICE); + if (!tx_buf->skb) continue; - dev_kfree_skb_any(skb); + dev_kfree_skb_any(tx_buf->skb); } } @@ -4664,15 +4672,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); txq_pcpu->size = txq->size; - txq_pcpu->tx_skb = kmalloc(txq_pcpu->size * - sizeof(*txq_pcpu->tx_skb), - GFP_KERNEL); - if (!txq_pcpu->tx_skb) - goto error; - - txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size * - sizeof(dma_addr_t), GFP_KERNEL); - if (!txq_pcpu->tx_buffs) + txq_pcpu->buffs = kmalloc(txq_pcpu->size * + sizeof(struct mvpp2_txq_pcpu_buf), + GFP_KERNEL); + if (!txq_pcpu->buffs) goto error; txq_pcpu->count = 0; @@ -4686,8 +4689,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, error: for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->tx_skb); - kfree(txq_pcpu->tx_buffs); + kfree(txq_pcpu->buffs); } dma_free_coherent(port->dev->dev.parent, @@ -4706,8 +4708,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); - kfree(txq_pcpu->tx_skb); - kfree(txq_pcpu->tx_buffs); + kfree(txq_pcpu->buffs); } if (txq->descs) From cdd86b9722657feffdca5a12ecc34c30be64e618 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2017 08:08:10 +0100 Subject: [PATCH 199/199] Linux 4.4.41 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5b5937780408..855e71066174 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 40 +SUBLEVEL = 41 EXTRAVERSION = NAME = Blurry Fish Butt