From 779a5077d9cf5c53ad368a0843b814b10139dd1f Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Thu, 17 Jan 2019 14:20:14 +0000 Subject: [PATCH 001/104] amd-xgbe: Fix mdio access for non-zero ports and clause 45 PHYs [ Upstream commit 5ab3121beeb76aa6090195b67d237115860dd9ec ] The XGBE hardware has support for performing MDIO operations using an MDIO command request. The driver mistakenly uses the mdio port address as the MDIO command request device address instead of the MDIO command request port address. Additionally, the driver does not properly check for and create a clause 45 MDIO command. Check the supplied MDIO register to determine if the request is a clause 45 operation (MII_ADDR_C45). For a clause 45 operation, extract the device address and register number from the supplied MDIO register and use them to set the MDIO command request device address and register number fields. For a clause 22 operation, the MDIO request device address is set to zero and the MDIO command request register number is set to the supplied MDIO register. In either case, the supplied MDIO port address is used as the MDIO command request port address. Fixes: 732f2ab7afb9 ("amd-xgbe: Add support for MDIO attached PHYs") Signed-off-by: Tom Lendacky Tested-by: Shyam Sundar S K Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 2 -- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 22 +++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index d272dc6984ac..b40d4377cc71 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -431,8 +431,6 @@ #define MAC_MDIOSCAR_PA_WIDTH 5 #define MAC_MDIOSCAR_RA_INDEX 0 #define MAC_MDIOSCAR_RA_WIDTH 16 -#define MAC_MDIOSCAR_REG_INDEX 0 -#define MAC_MDIOSCAR_REG_WIDTH 21 #define MAC_MDIOSCCDR_BUSY_INDEX 22 #define MAC_MDIOSCCDR_BUSY_WIDTH 1 #define MAC_MDIOSCCDR_CMD_INDEX 16 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 1e929a1e4ca7..4666084eda16 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1284,6 +1284,20 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, } } +static unsigned int xgbe_create_mdio_sca(int port, int reg) +{ + unsigned int mdio_sca, da; + + da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); + + return mdio_sca; +} + static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, int reg, u16 val) { @@ -1291,9 +1305,7 @@ static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; @@ -1317,9 +1329,7 @@ static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; From e287968a3837458cd489bdfad7095bb5a10a61b6 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Thu, 17 Jan 2019 09:46:41 +0800 Subject: [PATCH 002/104] net: bridge: Fix ethernet header pointer before check skb forwardable [ Upstream commit 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 ] The skb header should be set to ethernet header before using is_skb_forwardable. Because the ethernet header length has been considered in is_skb_forwardable(including dev->hard_header_len length). To reproduce the issue: 1, add 2 ports on linux bridge br using following commands: $ brctl addbr br $ brctl addif br eth0 $ brctl addif br eth1 2, the MTU of eth0 and eth1 is 1500 3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4) from eth0 to eth1 So the expect result is packet larger than 1500 cannot pass through eth0 and eth1. But currently, the packet passes through success, it means eth1's MTU limit doesn't take effect. Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path") Cc: bridge@lists.linux-foundation.org Cc: Nkolay Aleksandrov Cc: Roopa Prabhu Cc: Stephen Hemminger Signed-off-by: Yunjian Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_forward.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 2cb8da465b98..48ddc60b4fbd 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -36,10 +36,10 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; - skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && @@ -98,12 +98,11 @@ static void __br_forward(const struct net_bridge_port *to, net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { - if (!is_skb_forwardable(skb->dev, skb)) { + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); + else br_netpoll_send_skb(to, skb); - } return; } br_hook = NF_BR_LOCAL_OUT; From 40f2f08030fa59f634ed8313f4c9578416f2a108 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 17 Jan 2019 15:34:38 +0000 Subject: [PATCH 003/104] net: Fix usage of pskb_trim_rcsum [ Upstream commit 6c57f0458022298e4da1729c67bd33ce41c14e7a ] In certain cases, pskb_trim_rcsum() may change skb pointers. Reinitialize header pointers afterwards to avoid potential use-after-frees. Add a note in the documentation of pskb_trim_rcsum(). Found by KASAN. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 1 + include/linux/skbuff.h | 1 + net/bridge/br_netfilter_ipv6.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 1 + net/ipv4/ip_input.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 62dc564b251d..f22639f0116a 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (pskb_trim_rcsum(skb, len)) goto drop; + ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60a2e7646985..5d69e208e8d9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3178,6 +3178,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. + * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 96c072e71ea2..5811208863b7 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) IPSTATS_MIB_INDISCARDS); goto drop; } + hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) goto drop; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 08cbed7d940e..419e8edf23ba 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) return false; + ip6h = ipv6_hdr(skb); thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 27c863f6dd83..6f977b0fef54 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -489,6 +489,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto drop; } + iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ From 1a864e38b39e9f28ae1d635379e6d52b2d8245a4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 10 Jan 2019 22:48:36 +0100 Subject: [PATCH 004/104] net: phy: marvell: Errata for mv88e6390 internal PHYs [ Upstream commit 8cbcdc1a51999ca81db2956608b917aacd28d837 ] The VOD can be out of spec, unless some magic value is poked into an undocumented register in an undocumented page. Fixes: e4cf8a38fc0d ("net: phy: Marvell: Add mv88e6390 internal PHY") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/marvell.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index f7c69ca34056..b3aa0027c0ff 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1063,6 +1063,39 @@ static int m88e1145_config_init(struct phy_device *phydev) return 0; } +/* The VOD can be out of specification on link up. Poke an + * undocumented register, in an undocumented page, with a magic value + * to fix this. + */ +static int m88e6390_errata(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_BMCR, + BMCR_ANENABLE | BMCR_SPEED1000 | BMCR_FULLDPLX); + if (err) + return err; + + usleep_range(300, 400); + + err = phy_write_paged(phydev, 0xf8, 0x08, 0x36); + if (err) + return err; + + return genphy_soft_reset(phydev); +} + +static int m88e6390_config_aneg(struct phy_device *phydev) +{ + int err; + + err = m88e6390_errata(phydev); + if (err) + return err; + + return m88e1510_config_aneg(phydev); +} + /** * fiber_lpa_to_ethtool_lpa_t * @lpa: value of the MII_LPA register for fiber link @@ -2313,7 +2346,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = m88e6390_probe, .config_init = &marvell_config_init, - .config_aneg = &m88e1510_config_aneg, + .config_aneg = &m88e6390_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, From 3e4cd0677715b4ba2aee3a76f25f144c0f90b3d2 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 16 Jan 2019 10:53:58 +0100 Subject: [PATCH 005/104] net: phy: mdio_bus: add missing device_del() in mdiobus_register() error handling [ Upstream commit e40e2a2e78664fa90ea4b9bdf4a84efce2fea9d9 ] The current code in __mdiobus_register() doesn't properly handle failures returned by the devm_gpiod_get_optional() call: it returns immediately, without unregistering the device that was added by the call to device_register() earlier in the function. This leaves a stale device, which then causes a NULL pointer dereference in the code that handles deferred probing: [ 1.489982] Unable to handle kernel NULL pointer dereference at virtual address 00000074 [ 1.498110] pgd = (ptrval) [ 1.500838] [00000074] *pgd=00000000 [ 1.504432] Internal error: Oops: 17 [#1] SMP ARM [ 1.509133] Modules linked in: [ 1.512192] CPU: 1 PID: 51 Comm: kworker/1:3 Not tainted 4.20.0-00039-g3b73a4cc8b3e-dirty #99 [ 1.520708] Hardware name: Xilinx Zynq Platform [ 1.525261] Workqueue: events deferred_probe_work_func [ 1.530403] PC is at klist_next+0x10/0xfc [ 1.534403] LR is at device_for_each_child+0x40/0x94 [ 1.539361] pc : [] lr : [] psr: 200e0013 [ 1.545628] sp : ceeefe68 ip : 00000001 fp : ffffe000 [ 1.550863] r10: 00000000 r9 : c0c66790 r8 : 00000000 [ 1.556079] r7 : c0457d44 r6 : 00000000 r5 : ceeefe8c r4 : cfa2ec78 [ 1.562604] r3 : 00000064 r2 : c0457d44 r1 : ceeefe8c r0 : 00000064 [ 1.569129] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 1.576263] Control: 18c5387d Table: 0ed7804a DAC: 00000051 [ 1.582013] Process kworker/1:3 (pid: 51, stack limit = 0x(ptrval)) [ 1.588280] Stack: (0xceeefe68 to 0xceef0000) [ 1.592630] fe60: cfa2ec78 c0c03c08 00000000 c0457d44 00000000 c0c66790 [ 1.600814] fe80: 00000000 c0455d90 ceeefeac 00000064 00000000 0d7a542e cee9d494 cfa2ec78 [ 1.608998] fea0: cfa2ec78 00000000 c0457d44 c0457d7c cee9d494 c0c03c08 00000000 c0455dac [ 1.617182] fec0: cf98ba44 cf926a00 cee9d494 0d7a542e 00000000 cf935a10 cf935a10 cf935a10 [ 1.625366] fee0: c0c4e9b8 c0457d7c c0c4e80c 00000001 cf935a10 c0457df4 cf935a10 c0c4e99c [ 1.633550] ff00: c0c4e99c c045a27c c0c4e9c4 ced63f80 cfde8a80 cfdebc00 00000000 c013893c [ 1.641734] ff20: cfde8a80 cfde8a80 c07bd354 ced63f80 ced63f94 cfde8a80 00000008 c0c02d00 [ 1.649936] ff40: cfde8a98 cfde8a80 ffffe000 c0139a30 ffffe000 c0c6624a c07bd354 00000000 [ 1.658120] ff60: ffffe000 cee9e780 ceebfe00 00000000 ceeee000 ced63f80 c0139788 cf8cdea4 [ 1.666304] ff80: cee9e79c c013e598 00000001 ceebfe00 c013e44c 00000000 00000000 00000000 [ 1.674488] ffa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 [ 1.682671] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1.690855] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 1.699058] [] (klist_next) from [] (device_for_each_child+0x40/0x94) [ 1.707241] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) [ 1.716476] [] (device_reorder_to_tail) from [] (device_for_each_child+0x5c/0x94) [ 1.725692] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) [ 1.734927] [] (device_reorder_to_tail) from [] (device_pm_move_to_tail+0x28/0x40) [ 1.744235] [] (device_pm_move_to_tail) from [] (deferred_probe_work_func+0x58/0x8c) [ 1.753746] [] (deferred_probe_work_func) from [] (process_one_work+0x210/0x4fc) [ 1.762888] [] (process_one_work) from [] (worker_thread+0x2a8/0x5c0) [ 1.771072] [] (worker_thread) from [] (kthread+0x14c/0x154) [ 1.778482] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 1.785689] Exception stack(0xceeeffb0 to 0xceeefff8) [ 1.790739] ffa0: 00000000 00000000 00000000 00000000 [ 1.798923] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1.807107] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 1.813724] Code: e92d47f0 e1a05000 e8900048 e1a00003 (e5937010) [ 1.819844] ---[ end trace 3c2c0c8b65399ec9 ]--- The actual error that we had from devm_gpiod_get_optional() was -EPROBE_DEFER, due to the GPIO being provided by a driver that is probed later than the Ethernet controller driver. To fix this, we simply add the missing device_del() invocation in the error path. Fixes: 69226896ad636 ("mdio_bus: Issue GPIO RESET to PHYs") Signed-off-by: Thomas Petazzoni Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/mdio_bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 98f4b1f706df..15c5586d74ff 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -391,6 +391,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) if (IS_ERR(gpiod)) { dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", bus->id); + device_del(&bus->dev); return PTR_ERR(gpiod); } else if (gpiod) { bus->reset_gpiod = gpiod; From 02239e797ac75f89a05622a27d04695f08c1ca89 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 10 Jan 2019 20:21:02 +0100 Subject: [PATCH 006/104] net/sched: act_tunnel_key: fix memory leak in case of action replace [ Upstream commit 9174c3df1cd181c14913138d50ccbe539bb08335 ] running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_tunnel_key.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 681f6f04e7da..0f6601fdf889 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -197,6 +197,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, }; +static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) +{ + if (!p) + return; + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(&p->tcft_enc_metadata->dst); + kfree_rcu(p, rcu); +} + static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, @@ -360,8 +369,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); - if (params_new) - kfree_rcu(params_new, rcu); + tunnel_key_release_params(params_new); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -385,12 +393,7 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); - if (params) { - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(¶ms->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); - } + tunnel_key_release_params(params); } static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, From 916c27c8cf883e4650c280223db520cd7a5f161b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 11 Jan 2019 18:55:42 -0800 Subject: [PATCH 007/104] net_sched: refetch skb protocol for each filter [ Upstream commit cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 ] Martin reported a set of filters don't work after changing from reclassify to continue. Looking into the code, it looks like skb protocol is not always fetched for each iteration of the filters. But, as demonstrated by Martin, TC actions could modify skb->protocol, for example act_vlan, this means we have to refetch skb protocol in each iteration, rather than using the one we fetch in the beginning of the loop. This bug is _not_ introduced by commit 3b3ae880266d ("net: sched: consolidate tc_classify{,_compat}"), technically, if act_vlan is the only action that modifies skb protocol, then it is commit c7e2b9689ef8 ("sched: introduce vlan action") which introduced this bug. Reported-by: Martin Olsson Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 70f144ac5e1d..2167c6ca55e3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -960,7 +960,6 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; const struct tcf_proto *orig_tp = tp; @@ -970,6 +969,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + __be16 protocol = tc_skb_protocol(skb); int err; if (tp->protocol != protocol && @@ -1002,7 +1002,6 @@ reset: } tp = first_tp; - protocol = tc_skb_protocol(skb); goto reclassify; #endif } From 3d997bf0074e0643842f0d554c732447a5f31e85 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 14 Jan 2019 09:16:56 +0000 Subject: [PATCH 008/104] openvswitch: Avoid OOB read when parsing flow nlattrs [ Upstream commit 04a4af334b971814eedf4e4a413343ad3287d9a9 ] For nested and variable attributes, the expected length of an attribute is not known and marked by a negative number. This results in an OOB read when the expected length is later used to check if the attribute is all zeros. Fix this by using the actual length of the attribute rather than the expected length. Signed-off-by: Ross Lagerwall Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 865ecef68196..c7b6010b2c09 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -500,7 +500,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1 << type; a[type] = nla; } From 1688e75cae7dba65d5597b11dddf70d546e46d6c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 16 Jan 2019 16:54:42 +0800 Subject: [PATCH 009/104] vhost: log dirty page correctly [ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] Vhost dirty page logging API is designed to sync through GPA. But we try to log GIOVA when device IOTLB is enabled. This is wrong and may lead to missing data after migration. To solve this issue, when logging with device IOTLB enabled, we will: 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to get HVA, for writable descriptor, get HVA through iovec. For used ring update, translate its GIOVA to HVA 2) traverse the GPA->HVA mapping to get the possible GPA and log through GPA. Pay attention this reverse mapping is not guaranteed to be unique, so we should log each possible GPA in this case. This fix the failure of scp to guest during migration. In -next, we will probably support passing GIOVA->GPA instead of GIOVA->HVA. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Jintack Lim Cc: Jintack Lim Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 3 +- drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++------- drivers/vhost/vhost.h | 3 +- 3 files changed, 87 insertions(+), 16 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4e656f89cb22..a86aa65ad66d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1024,7 +1024,8 @@ static void handle_rx(struct vhost_net *net) if (nvq->done_idx > VHOST_NET_BATCH) vhost_net_signal_used(nvq); if (unlikely(vq_log)) - vhost_log_write(vq, vq_log, log, vhost_len); + vhost_log_write(vq, vq_log, log, vhost_len, + vq->iov, in); total_len += vhost_len; if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { vhost_poll_queue(&vq->poll); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 55e5aa662ad5..c66fc8308b5e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1733,13 +1733,87 @@ static int log_write(void __user *log_base, return r; } +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) +{ + struct vhost_umem *umem = vq->umem; + struct vhost_umem_node *u; + u64 start, end, l, min; + int r; + bool hit = false; + + while (len) { + min = len; + /* More than one GPAs can be mapped into a single HVA. So + * iterate all possible umems here to be safe. + */ + list_for_each_entry(u, &umem->umem_list, link) { + if (u->userspace_addr > hva - 1 + len || + u->userspace_addr - 1 + u->size < hva) + continue; + start = max(u->userspace_addr, hva); + end = min(u->userspace_addr - 1 + u->size, + hva - 1 + len); + l = end - start + 1; + r = log_write(vq->log_base, + u->start + start - u->userspace_addr, + l); + if (r < 0) + return r; + hit = true; + min = min(l, min); + } + + if (!hit) + return -EFAULT; + + len -= min; + hva += min; + } + + return 0; +} + +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) +{ + struct iovec iov[64]; + int i, ret; + + if (!vq->iotlb) + return log_write(vq->log_base, vq->log_addr + used_offset, len); + + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, + len, iov, 64, VHOST_ACCESS_WO); + if (ret) + return ret; + + for (i = 0; i < ret; i++) { + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (ret) + return ret; + } + + return 0; +} + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len) + unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); + + if (vq->iotlb) { + for (i = 0; i < count; i++) { + r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (r < 0) + return r; + } + return 0; + } + for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); @@ -1769,9 +1843,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof vq->used->flags); + log_used(vq, (used - (void __user *)vq->used), + sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -1789,9 +1862,8 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof *vhost_avail_event(vq)); + log_used(vq, (used - (void __user *)vq->used), + sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -2191,10 +2263,8 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ - log_write(vq->log_base, - vq->log_addr + - ((void __user *)used - (void __user *)vq->used), - count * sizeof *used); + log_used(vq, ((void __user *)used - (void __user *)vq->used), + count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); @@ -2236,9 +2306,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ - log_write(vq->log_base, - vq->log_addr + offsetof(struct vring_used, idx), - sizeof vq->used->idx); + log_used(vq, offsetof(struct vring_used, idx), + sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 466ef7542291..1b675dad5e05 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -205,7 +205,8 @@ bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len); + unsigned int log_num, u64 len, + struct iovec *iov, int count); int vq_iotlb_prefetch(struct vhost_virtqueue *vq); struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); From bc4e2300e44ad206169d3370535d271329e79360 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 18 Jan 2019 15:57:56 +0000 Subject: [PATCH 010/104] mlxsw: pci: Increase PCI SW reset timeout [ Upstream commit d2f372ba0914e5722ac28e15f2ed2db61bcf0e44 ] Spectrum-2 PHY layer introduces a calibration period which is a part of the Spectrum-2 firmware boot process. Hence increase the SW timeout waiting for the firmware to come out of boot. This does not increase system boot time in cases where the firmware PHY calibration process is done quickly. Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") Signed-off-by: Nir Dotan Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 83f452b7ccbb..092bd6b3a210 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF From adbf7e5809943d891baa0268cfede1c948a9217d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 9 Jan 2019 09:57:39 +0000 Subject: [PATCH 011/104] net: ipv4: Fix memory leak in network namespace dismantle [ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ] IPv4 routing tables are flushed in two cases: 1. In response to events in the netdev and inetaddr notification chains 2. When a network namespace is being dismantled In both cases only routes associated with a dead nexthop group are flushed. However, a nexthop group will only be marked as dead in case it is populated with actual nexthops using a nexthop device. This is not the case when the route in question is an error route (e.g., 'blackhole', 'unreachable'). Therefore, when a network namespace is being dismantled such routes are not flushed and leaked [1]. To reproduce: # ip netns add blue # ip -n blue route add unreachable 192.0.2.0/24 # ip netns del blue Fix this by not skipping error routes that are not marked with RTNH_F_DEAD when flushing the routing tables. To prevent the flushing of such routes in case #1, add a parameter to fib_table_flush() that indicates if the table is flushed as part of namespace dismantle or not. Note that this problem does not exist in IPv6 since error routes are associated with the loopback device. [1] unreferenced object 0xffff888066650338 (size 56): comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ backtrace: [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000003a8b605b>] 0xffffffffffffffff unreferenced object 0xffff888061621c88 (size 48): comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) hex dump (first 32 bytes): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... backtrace: [<00000000733609e3>] fib_table_insert+0x978/0x1500 [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000003a8b605b>] 0xffffffffffffffff Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/fib_trie.c | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c9b7b136939d..95eed32d8c6b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -230,7 +230,7 @@ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct net *net, struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0113993e9b2c..958e185a8e8d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -203,7 +203,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(net, tb); + flushed += fib_table_flush(net, tb, false); } if (flushed) @@ -1357,7 +1357,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(net, tb); + fib_table_flush(net, tb, true); fib_free_table(tb); } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5bc0c89e81e4..3955a6d7ea66 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct net *net, struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || - tb->tb_id != fa->tb_id) { + if (!fi || tb->tb_id != fa->tb_id || + (!(fi->fib_flags & RTNH_F_DEAD) && + !fib_props[fa->fa_type].error)) { + slen = fa->fa_slen; + continue; + } + + /* Do not flush error routes if network namespace is + * not being dismantled + */ + if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } From c82f4684d330c4aac94fae52b902799c4f76d190 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 18 Jan 2019 15:57:59 +0000 Subject: [PATCH 012/104] mlxsw: spectrum_fid: Update dummy FID index [ Upstream commit a11dcd6497915ba79d95ef4fe2541aaac27f6201 ] When using a tc flower action of egress mirred redirect, the driver adds an implicit FID setting action. This implicit action sets a dummy FID to the packet and is used as part of a design for trapping unmatched flows in OVS. While this implicit FID setting action is supposed to be a NOP when a redirect action is added, in Spectrum-2 the FID record is consulted as the dummy FID index is an 802.1D FID index and the packet is dropped instead of being redirected. Set the dummy FID index value to be within 802.1Q range. This satisfies both Spectrum-1 which ignores the FID and Spectrum-2 which identifies it as an 802.1Q FID and will then follow the redirect action. Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") Signed-off-by: Nir Dotan Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 715d24ff937e..562c4429eec7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -696,8 +696,8 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { .type = MLXSW_SP_FID_TYPE_DUMMY, .fid_size = sizeof(struct mlxsw_sp_fid), - .start_index = MLXSW_SP_RFID_BASE - 1, - .end_index = MLXSW_SP_RFID_BASE - 1, + .start_index = VLAN_N_VID - 1, + .end_index = VLAN_N_VID - 1, .ops = &mlxsw_sp_fid_dummy_ops, }; From bdafc159ac8ca616c6c7cd4ecee1f264c4d9606e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 18 Jan 2019 15:57:55 +0000 Subject: [PATCH 013/104] mlxsw: pci: Ring CQ's doorbell before RDQ's When a packet should be trapped to the CPU the device consumes a WQE (work queue element) from an RDQ (receive descriptor queue) and copies the packet to the address specified in the WQE. The device then tries to post a CQE (completion queue element) that contains various metadata (e.g., ingress port) about the packet to a CQ (completion queue). In case the device managed to consume a WQE, but did not manage to post the corresponding CQE, it will get stuck. This unlikely situation can be triggered due to the scheme the driver is currently using to process CQEs. The driver will consume up to 512 CQEs at a time and after processing each corresponding WQE it will ring the RDQ's doorbell, letting the device know that a new WQE was posted for it to consume. Only after processing all the CQEs (up to 512), the driver will ring the CQ's doorbell, letting the device know that new ones can be posted. Fix this by having the driver ring the CQ's doorbell for every processed CQE, but before ringing the RDQ's doorbell. This guarantees that whenever we post a new WQE, there is a corresponding CQE available. Copy the currently processed CQE to prevent the device from overwriting it with a new CQE after ringing the doorbell. Note that the driver still arms the CQ only after processing all the pending CQEs, so that interrupts for this CQ will only be delivered after the driver finished its processing. Before commit 8404f6f2e8ed ("mlxsw: pci: Allow to use CQEs of version 1 and version 2") the issue was virtually impossible to trigger since the number of CQEs was twice the number of WQEs and the number of CQEs processed at a time was equal to the number of available WQEs. Fixes: 8404f6f2e8ed ("mlxsw: pci: Allow to use CQEs of version 1 and version 2") Signed-off-by: Ido Schimmel Reported-by: Semion Lisyansky Tested-by: Semion Lisyansky Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 +++++++----- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 5890fdfd62c3..c7901a3f2a79 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -604,29 +604,31 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + + memcpy(ncqe, cqe, q->elem_size); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); if (sendq) { struct mlxsw_pci_queue *sdq; sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, cqe); + wqe_counter, ncqe); q->u.cq.comp_sdq_count++; } else { struct mlxsw_pci_queue *rdq; rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->u.cq.v, cqe); + wqe_counter, q->u.cq.v, ncqe); q->u.cq.comp_rdq_count++; } if (++items == credits) break; } - if (items) { - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + if (items) mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - } } static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 092bd6b3a210..72cdaa01d56d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -53,6 +53,7 @@ #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ #define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE #define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) #define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) From 84bf74307c881222a3387497cff896f60ea6b44f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 16 Jan 2019 16:53:52 +0100 Subject: [PATCH 014/104] net/sched: cls_flower: allocate mask dynamically in fl_change() [ Upstream commit 2cddd20147826aef283115abb00012d4dafe3cdb ] Recent changes (especially 05cd271fd61a ("cls_flower: Support multiple masks per priority")) in the fl_flow_mask structure grow it and its current size e.g. on x86_64 with defconfig is 760 bytes and more than 1024 bytes with some debug options enabled. Prior the mentioned commit its size was 176 bytes (using defconfig on x86_64). With regard to this fact it's reasonable to allocate this structure dynamically in fl_change() to reduce its stack size. v2: - use kzalloc() instead of kcalloc() Fixes: 05cd271fd61a ("cls_flower: Support multiple masks per priority") Cc: Jiri Pirko Cc: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_flower.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7fade7107f95..84893bc67531 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1176,17 +1176,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; + struct fl_flow_mask *mask; struct nlattr **tb; - struct fl_flow_mask mask = {}; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; - tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); - if (!tb) + mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); + if (!mask) return -ENOBUFS; + tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); + if (!tb) { + err = -ENOBUFS; + goto errout_mask_alloc; + } + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) @@ -1229,12 +1235,12 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, tp->chain->tmplt_priv, extack); if (err) goto errout_idr; - err = fl_check_assign_mask(head, fnew, fold, &mask); + err = fl_check_assign_mask(head, fnew, fold, mask); if (err) goto errout_idr; @@ -1281,6 +1287,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } kfree(tb); + kfree(mask); return 0; errout_mask: @@ -1294,6 +1301,8 @@ errout: kfree(fnew); errout_tb: kfree(tb); +errout_mask_alloc: + kfree(mask); return err; } From e3fa624ee7aff48d6933d31b50c93b77e54ef583 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 15 Jan 2019 11:40:02 -0500 Subject: [PATCH 015/104] udp: with udp_segment release on error path [ Upstream commit 0f149c9fec3cd720628ecde83bfc6f64c1e7dcb6 ] Failure __ip_append_data triggers udp_flush_pending_frames, but these tests happen later. The skb must be freed directly. Fixes: bec1f6f697362 ("udp: generate gso with UDP_SEGMENT") Reported-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/udp.c | 16 ++++++++++++---- net/ipv6/udp.c | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f8183fdce5b2..e45a5e19e509 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -785,15 +785,23 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (sk->sk_no_check_tx) + } + if (sk->sk_no_check_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b36694b6716e..76ba2f34ef6b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1056,15 +1056,23 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (udp_sk(sk)->no_check6_tx) + } + if (udp_sk(sk)->no_check6_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; From 0449da6fc203562c56fd17f612230e2db05315b4 Mon Sep 17 00:00:00 2001 From: Olivier Matz Date: Wed, 9 Jan 2019 10:57:21 +0100 Subject: [PATCH 016/104] ip6_gre: fix tunnel list corruption for x-netns [ Upstream commit ab5098fa25b91cb6fe0a0676f17abb64f2bbf024 ] In changelink ops, the ip6gre_net pointer is retrieved from dev_net(dev), which is wrong in case of x-netns. Thus, the tunnel is not unlinked from its current list and is relinked into another net namespace. This corrupts the tunnel lists and can later trigger a kernel oops. Fix this by retrieving the netns from device private area. Fixes: c8632fc30bb0 ("net: ip6_gre: Split up ip6gre_changelink()") Cc: Petr Machata Signed-off-by: Olivier Matz Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c270726b01b0..c0aad1f9ab15 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2047,9 +2047,9 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); struct __ip6_tnl_parm p; - struct ip6_tnl *t; t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) From 552cd931b483c8c0bd19fa67fb2453fa942edc32 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 14 Jan 2019 18:10:06 +0800 Subject: [PATCH 017/104] erspan: build the header with the right proto according to erspan_ver [ Upstream commit 20704bd1633dd5afb29a321d3a615c9c8e9c9d05 ] As said in draft-foschiano-erspan-03#section4: Different frame variants known as "ERSPAN Types" can be distinguished based on the GRE "Protocol Type" field value: Type I and II's value is 0x88BE while Type III's is 0x22EB [ETYPES]. So set it properly in erspan_xmit() according to erspan_ver. While at it, also remove the unused parameter 'proto' in erspan_fb_xmit(). Fixes: 94d7d8f29287 ("ip6_gre: add erspan v2 support") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 23 ++++++++++++++--------- net/ipv6/ip6_gre.c | 6 ++++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5ef5df3a06f1..0bfad3e72509 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -570,8 +570,7 @@ err_free_skb: dev->stats.tx_dropped++; } -static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, - __be16 proto) +static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; @@ -579,10 +578,10 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, struct erspan_metadata *md; struct rtable *rt = NULL; bool truncate = false; + __be16 df, proto; struct flowi4 fl; int tunnel_hlen; int version; - __be16 df; int nhoff; int thoff; @@ -627,18 +626,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); + proto = htons(ETH_P_ERSPAN); } else if (version == 2) { erspan_build_header_v2(skb, ntohl(tunnel_id_to_key32(key->tun_id)), md->u.md2.dir, get_hwid(&md->u.md2), truncate, true); + proto = htons(ETH_P_ERSPAN2); } else { goto err_free_rt; } gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(tunnel->o_seqno++)); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -722,12 +723,13 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + __be16 proto; if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { - erspan_fb_xmit(skb, dev, skb->protocol); + erspan_fb_xmit(skb, dev); return NETDEV_TX_OK; } @@ -743,19 +745,22 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - if (tunnel->erspan_ver == 1) + if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); - else if (tunnel->erspan_ver == 2) + proto = htons(ETH_P_ERSPAN); + } else if (tunnel->erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); - else + proto = htons(ETH_P_ERSPAN2); + } else { goto free_skb; + } tunnel->parms.o_flags &= ~TUNNEL_KEY; - __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); + __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; free_skb: diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c0aad1f9ab15..2805d78c9658 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -938,6 +938,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __u8 dsfield = false; struct flowi6 fl6; int err = -EINVAL; + __be16 proto; __u32 mtu; int nhoff; int thoff; @@ -1051,8 +1052,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } /* Push GRE header. */ - gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); + proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) + : htons(ETH_P_ERSPAN2); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) From c9fe9d194d4513d5902a4e79db0092ace010e572 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 11 Jan 2019 00:15:21 +0100 Subject: [PATCH 018/104] net: phy: marvell: Fix deadlock from wrong locking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e0a7328fad9979104f73e19bedca821ef3262ae1 ] m88e1318_set_wol() takes the lock as part of phy_select_page(). Don't take the lock again with phy_read(), use the unlocked __phy_read(). Fixes: 424ca4c55121 ("net: phy: marvell: fix paged access races") Reported-by: Åke Rehnman Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/marvell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b3aa0027c0ff..d71be15c8c69 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1451,7 +1451,7 @@ static int m88e1318_set_wol(struct phy_device *phydev, * before enabling it if !phy_interrupt_is_valid() */ if (!phy_interrupt_is_valid(phydev)) - phy_read(phydev, MII_M1011_IEVENT); + __phy_read(phydev, MII_M1011_IEVENT); /* Enable the WOL interrupt */ err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0, From 6c4d069aec0fbbfd2e244b84c014539d1daad988 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 10 Jan 2019 11:17:42 +0800 Subject: [PATCH 019/104] ip6_gre: update version related info when changing link [ Upstream commit 80b3671e9377916bf2b02e56113fa7377ce5705a ] We forgot to update ip6erspan version related info when changing link, which will cause setting new hwid failed. Reported-by: Jianlin Shi Fixes: 94d7d8f292870 ("ip6_gre: add erspan v2 support") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 2805d78c9658..345e6839f031 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1187,6 +1187,10 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; t->parms.fwmark = p->fwmark; + t->parms.erspan_ver = p->erspan_ver; + t->parms.index = p->index; + t->parms.dir = p->dir; + t->parms.hwid = p->hwid; dst_cache_reset(&t->dst_cache); } From 2cade15d58caad78600b6d23b4f6ec7ea48ec6c6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Jan 2019 14:40:33 -0500 Subject: [PATCH 020/104] tcp: allow MSG_ZEROCOPY transmission also in CLOSE_WAIT state [ Upstream commit 13d7f46386e060df31b727c9975e38306fa51e7a ] TCP transmission with MSG_ZEROCOPY fails if the peer closes its end of the connection and so transitions this socket to CLOSE_WAIT state. Transmission in close wait state is acceptable. Other similar tests in the stack (e.g., in FastOpen) accept both states. Relax this test, too. Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg276886.html Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg227390.html Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Reported-by: Marek Majkowski Signed-off-by: Willem de Bruijn CC: Yuchung Cheng CC: Neal Cardwell CC: Soheil Hassas Yeganeh CC: Alexey Kodanev Acked-by: Soheil Hassas Yeganeh Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a32a0f4cc138..87fe44197aa1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1186,7 +1186,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { err = -EINVAL; goto out_err; } From adfda26bdf47e8b2d2979a018528304dfaa5bdb4 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 13 Jan 2019 14:24:47 +0200 Subject: [PATCH 021/104] mei: me: mark LBG devices as having dma support commit 173436ba800d01178a8b19e5de4a8cb02c0db760 upstream. The LBG server platform sports DMA support. Cc: #v5.0+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index ea4e152270a3..5c78cb77ba62 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -88,7 +88,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, From f8982204cbea7bb392480c2d18192b3e5ba5acf6 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 13 Jan 2019 14:24:48 +0200 Subject: [PATCH 022/104] mei: me: add denverton innovation engine device IDs commit f7ee8ead151f9d0b8dac6ab6c3ff49bbe809c564 upstream. Add the Denverton innovation engine (IE) device ids. The IE is an ME-like device which provides HW security offloading. Cc: Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index e4b10b2d1a08..23739a60517f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -127,6 +127,8 @@ #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define MEI_DEV_ID_DNV_IE 0x19E5 /* Denverton IE */ + #define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 5c78cb77ba62..c8e21c894a5f 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -93,6 +93,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, From a70e5cd09361ff0e5ba8e1caa7167720667ea8bd Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 11 Jan 2019 17:29:45 +0100 Subject: [PATCH 023/104] USB: leds: fix regression in usbport led trigger commit 91f7d2e89868fcac0e750a28230fdb1ad4512137 upstream. The patch "usb: simplify usbport trigger" together with "leds: triggers: add device attribute support" caused an regression for the usbport trigger. it will no longer enumerate any active usb hub ports under the "ports" directory in the sysfs class directory, if the usb host drivers are fully initialized before the usbport trigger was loaded. The reason is that the usbport driver tries to register the sysfs entries during the activate() callback. And this will fail with -2 / ENOENT because the patch "leds: triggers: add device attribute support" made it so that the sysfs "ports" group was only being added after the activate() callback succeeded. This version of the patch reverts parts of the "usb: simplify usbport trigger" patch and restores usbport trigger's functionality. Fixes: 6f7b0bad8839 ("usb: simplify usbport trigger") Signed-off-by: Christian Lamparter Cc: stable Acked-by: Jacek Anaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/ledtrig-usbport.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c index dc7f7fd71684..c12ac56606c3 100644 --- a/drivers/usb/core/ledtrig-usbport.c +++ b/drivers/usb/core/ledtrig-usbport.c @@ -119,11 +119,6 @@ static const struct attribute_group ports_group = { .attrs = ports_attrs, }; -static const struct attribute_group *ports_groups[] = { - &ports_group, - NULL -}; - /*************************************** * Adding & removing ports ***************************************/ @@ -307,6 +302,7 @@ static int usbport_trig_notify(struct notifier_block *nb, unsigned long action, static int usbport_trig_activate(struct led_classdev *led_cdev) { struct usbport_trig_data *usbport_data; + int err; usbport_data = kzalloc(sizeof(*usbport_data), GFP_KERNEL); if (!usbport_data) @@ -315,6 +311,9 @@ static int usbport_trig_activate(struct led_classdev *led_cdev) /* List of ports */ INIT_LIST_HEAD(&usbport_data->ports); + err = sysfs_create_group(&led_cdev->dev->kobj, &ports_group); + if (err) + goto err_free; usb_for_each_dev(usbport_data, usbport_trig_add_usb_dev_ports); usbport_trig_update_count(usbport_data); @@ -322,8 +321,11 @@ static int usbport_trig_activate(struct led_classdev *led_cdev) usbport_data->nb.notifier_call = usbport_trig_notify; led_set_trigger_data(led_cdev, usbport_data); usb_register_notify(&usbport_data->nb); - return 0; + +err_free: + kfree(usbport_data); + return err; } static void usbport_trig_deactivate(struct led_classdev *led_cdev) @@ -335,6 +337,8 @@ static void usbport_trig_deactivate(struct led_classdev *led_cdev) usbport_trig_remove_port(usbport_data, port); } + sysfs_remove_group(&led_cdev->dev->kobj, &ports_group); + usb_unregister_notify(&usbport_data->nb); kfree(usbport_data); @@ -344,7 +348,6 @@ static struct led_trigger usbport_led_trigger = { .name = "usbport", .activate = usbport_trig_activate, .deactivate = usbport_trig_deactivate, - .groups = ports_groups, }; static int __init usbport_trig_init(void) From 4d984aab54a6cc10cb7790fcd9d4a204fb67ecdd Mon Sep 17 00:00:00 2001 From: Max Schulze Date: Mon, 7 Jan 2019 08:31:49 +0100 Subject: [PATCH 024/104] USB: serial: simple: add Motorola Tetra TPG2200 device id commit b81c2c33eab79dfd3650293b2227ee5c6036585c upstream. Add new Motorola Tetra device id for Motorola Solutions TETRA PEI device T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=0cad ProdID=9016 Rev=24.16 S: Manufacturer=Motorola Solutions, Inc. S: Product=TETRA PEI interface C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=usb_serial_simple I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=usb_serial_simple Signed-off-by: Max Schulze Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/usb-serial-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 4d0273508043..edbbb13d6de6 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -85,7 +85,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ - { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ + { USB_DEVICE(0x0cad, 0x9012) }, /* MTP6550 */ \ + { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ From 8e7320b9f55159f8e006eadfb3b767407d3a8fe5 Mon Sep 17 00:00:00 2001 From: Charles Yeh Date: Tue, 15 Jan 2019 23:13:56 +0800 Subject: [PATCH 025/104] USB: serial: pl2303: add new PID to support PL2303TB commit 4dcf9ddc9ad5ab649abafa98c5a4d54b1a33dabb upstream. Add new PID to support PL2303TB (TYPE_HX) Signed-off-by: Charles Yeh Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index d5b38f096698..5a6df6e9ad57 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -46,6 +46,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_TB) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID), diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 4e2554d55362..559941ca884d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -8,6 +8,7 @@ #define PL2303_VENDOR_ID 0x067b #define PL2303_PRODUCT_ID 0x2303 +#define PL2303_PRODUCT_ID_TB 0x2304 #define PL2303_PRODUCT_ID_RSAQ2 0x04bb #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 @@ -20,6 +21,7 @@ #define PL2303_PRODUCT_ID_MOTOROLA 0x0307 #define PL2303_PRODUCT_ID_ZTEK 0xe1f1 + #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 From 3e05ceedf1439eea0a1306e30935ef5ee65e5d4f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 10 Jan 2019 15:41:09 +0800 Subject: [PATCH 026/104] ceph: clear inode pointer when snap realm gets dropped by its inode commit d95e674c01cfb5461e8b9fdeebf6d878c9b80b2f upstream. snap realm and corresponding inode have pointers to each other. The two pointer should get clear at the same time. Otherwise, snap realm's pointer may reference freed inode. Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: "Yan, Zheng" Reviewed-by: Luis Henriques Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index eadffaa39f4e..c7542e8dd096 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1030,6 +1030,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci) list_del_init(&ci->i_snap_realm_item); ci->i_snap_realm_counter++; ci->i_snap_realm = NULL; + if (realm->ino == ci->i_vino.ino) + realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, realm); From d6847f539bd6ac2f8b75f0a6ae65c0955985d003 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 25 Dec 2018 20:29:48 -0600 Subject: [PATCH 027/104] ASoC: atom: fix a missing check of snd_pcm_lib_malloc_pages commit 44fabd8cdaaa3acb80ad2bb3b5c61ae2136af661 upstream. snd_pcm_lib_malloc_pages() may fail, so let's check its status and return its error code upstream. Signed-off-by: Kangjie Lu Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 6c36da560877..e662400873ec 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -399,7 +399,13 @@ static int sst_media_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params)); + int ret; + + ret = + snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(params)); + if (ret) + return ret; memset(substream->runtime->dma_area, 0, params_buffer_bytes(params)); return 0; } From 4fedd516d5548aa3841b66b61d75ea4c40e7e08e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 15 Jan 2019 11:57:23 -0600 Subject: [PATCH 028/104] ASoC: rt5514-spi: Fix potential NULL pointer dereference commit 060d0bf491874daece47053c4e1fb0489eb867d2 upstream. There is a potential NULL pointer dereference in case devm_kzalloc() fails and returns NULL. Fix this by adding a NULL check on rt5514_dsp. This issue was detected with the help of Coccinelle. Fixes: 6eebf35b0e4a ("ASoC: rt5514: add rt5514 SPI driver") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt5514-spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 6478d10c4f4a..cdb1f40009ab 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -278,6 +278,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_component *component) rt5514_dsp = devm_kzalloc(component->dev, sizeof(*rt5514_dsp), GFP_KERNEL); + if (!rt5514_dsp) + return -ENOMEM; rt5514_dsp->dev = &rt5514_spi->dev; mutex_init(&rt5514_dsp->dma_lock); From 0af64fda917df00a8d199f7635e20a06acd2a3b7 Mon Sep 17 00:00:00 2001 From: b-ak Date: Mon, 7 Jan 2019 22:30:22 +0530 Subject: [PATCH 029/104] ASoC: tlv320aic32x4: Kernel OOPS while entering DAPM standby mode commit 667e9334fa64da2273e36ce131b05ac9e47c5769 upstream. During the bootup of the kernel, the DAPM bias level is in the OFF state. As soon as the DAPM framework kicks in it pushes the codec into STANDBY state. The probe function doesn't prepare the clock, and STANDBY state does a clk_disable_unprepare() without checking the previous state. This leads to an OOPS. Not transitioning from an OFF state to the STANDBY state fixes the problem. Signed-off-by: b-ak Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tlv320aic32x4.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index e2b5a11b16d1..f03195d2ab2e 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -822,6 +822,10 @@ static int aic32x4_set_bias_level(struct snd_soc_component *component, case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: + /* Initial cold start */ + if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) + break; + /* Switch off BCLK_N Divider */ snd_soc_component_update_bits(component, AIC32X4_BCLKN, AIC32X4_BCLKEN, 0); From cf8ea8d536a4db6f04f8c2ca06ad0a94f45042d7 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 17 Dec 2018 18:06:14 -0600 Subject: [PATCH 030/104] clk: socfpga: stratix10: fix rate calculation for pll clocks commit c0a636e4cc2eb39244d23c0417c117be4c96a7fe upstream. The main PLL calculation has a mistake. We should be using the multiplying the VCO frequency, not the parent clock frequency. Fixes: 07afb8db7340 ("clk: socfpga: stratix10: add clock driver for Stratix10 platform") Cc: linux-stable@vger.kernel.org Signed-off-by: Dinh Nguyen Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/socfpga/clk-pll-s10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/clk-pll-s10.c b/drivers/clk/socfpga/clk-pll-s10.c index 2d5d8b43727e..c4d0b6f6abf2 100644 --- a/drivers/clk/socfpga/clk-pll-s10.c +++ b/drivers/clk/socfpga/clk-pll-s10.c @@ -43,7 +43,7 @@ static unsigned long clk_pll_recalc_rate(struct clk_hw *hwclk, /* Read mdiv and fdiv from the fdbck register */ reg = readl(socfpgaclk->hw.reg + 0x4); mdiv = (reg & SOCFPGA_PLL_MDIV_MASK) >> SOCFPGA_PLL_MDIV_SHIFT; - vco_freq = (unsigned long long)parent_rate * (mdiv + 6); + vco_freq = (unsigned long long)vco_freq * (mdiv + 6); return (unsigned long)vco_freq; } From afb4a7ca7818c419bea3d26fc7b03c60ec5a2426 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 2 Jan 2019 08:59:31 -0600 Subject: [PATCH 031/104] clk: socfpga: stratix10: fix naming convention for the fixed-clocks commit b488517b28a47d16b228ce8dcf07f5cb8e5b3dc5 upstream. The fixed clocks in the DTS file have a hyphen, but the clock driver has the fixed clocks using underbar. Thus the clock driver cannot detect the other fixed clocks correctly. Change the fixed clock names to a hyphen. Fixes: 07afb8db7340 ("clk: socfpga: stratix10: add clock driver for Stratix10 platform") Cc: linux-stable@vger.kernel.org Signed-off-by: Dinh Nguyen Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/socfpga/clk-s10.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index 5b238fc314ac..8281dfbf38c2 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -12,17 +12,17 @@ #include "stratix10-clk.h" -static const char * const pll_mux[] = { "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk",}; +static const char * const pll_mux[] = { "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk",}; static const char * const cntr_mux[] = { "main_pll", "periph_pll", - "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk"}; -static const char * const boot_mux[] = { "osc1", "cb_intosc_hs_div2_clk",}; + "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk"}; +static const char * const boot_mux[] = { "osc1", "cb-intosc-hs-div2-clk",}; static const char * const noc_free_mux[] = {"main_noc_base_clk", "peri_noc_base_clk", - "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk"}; + "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk"}; static const char * const emaca_free_mux[] = {"peri_emaca_clk", "boot_clk"}; static const char * const emacb_free_mux[] = {"peri_emacb_clk", "boot_clk"}; @@ -33,14 +33,14 @@ static const char * const s2f_usr1_free_mux[] = {"peri_s2f_usr1_clk", "boot_clk" static const char * const psi_ref_free_mux[] = {"peri_psi_ref_clk", "boot_clk"}; static const char * const mpu_mux[] = { "mpu_free_clk", "boot_clk",}; -static const char * const s2f_usr0_mux[] = {"f2s_free_clk", "boot_clk"}; +static const char * const s2f_usr0_mux[] = {"f2s-free-clk", "boot_clk"}; static const char * const emac_mux[] = {"emaca_free_clk", "emacb_free_clk"}; static const char * const noc_mux[] = {"noc_free_clk", "boot_clk"}; static const char * const mpu_free_mux[] = {"main_mpu_base_clk", "peri_mpu_base_clk", - "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk"}; + "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk"}; /* clocks in AO (always on) controller */ static const struct stratix10_pll_clock s10_pll_clks[] = { From a719cbe07847346f6f42a6133e231603d752c69d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 1 Jan 2019 18:54:26 +0900 Subject: [PATCH 032/104] inotify: Fix fd refcount leak in inotify_add_watch(). commit 125892edfe69915a227d8d125ff0e1cd713178f4 upstream. Commit 4d97f7d53da7dc83 ("inotify: Add flag IN_MASK_CREATE for inotify_add_watch()") forgot to call fdput() before bailing out. Fixes: 4d97f7d53da7dc83 ("inotify: Add flag IN_MASK_CREATE for inotify_add_watch()") CC: stable@vger.kernel.org Signed-off-by: Tetsuo Handa Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/inotify/inotify_user.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ac6978d3208c..780bba695453 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -724,8 +724,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ - if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) - return -EINVAL; + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { + ret = -EINVAL; + goto fput_and_out; + } /* verify that this is indeed an inotify instance */ if (unlikely(f.file->f_op != &inotify_fops)) { From 2173f5a1b0d7a8253c4b8a9e8cc71ccfe8257235 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 11 Jan 2019 17:15:53 +0800 Subject: [PATCH 033/104] ALSA: hda/realtek - Fix typo for ALC225 model commit 82aa0d7e09840704d9a37434fef1770179d663fb upstream. Fix typo for model alc255-dell1 to alc225-dell1. Enable headset mode support for new WYSE NB platform. Fixes: a26d96c7802e ("ALSA: hda/realtek - Comprehensive model list for ALC259 & co") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8b9f2487969b..f39f34e12fb6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6842,7 +6842,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC293_FIXUP_LENOVO_SPK_NOISE, .name = "lenovo-spk-noise"}, {.id = ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, .name = "lenovo-hotkey"}, {.id = ALC255_FIXUP_DELL_SPK_NOISE, .name = "dell-spk-noise"}, - {.id = ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "alc255-dell1"}, + {.id = ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "alc225-dell1"}, {.id = ALC295_FIXUP_DISABLE_DAC3, .name = "alc295-disable-dac3"}, {.id = ALC280_FIXUP_HP_HEADSET_MIC, .name = "alc280-hp-headset"}, {.id = ALC221_FIXUP_HP_FRONT_MIC, .name = "alc221-hp-mic"}, From cf662d989425695f98b5dc6b00c29dceb896bd94 Mon Sep 17 00:00:00 2001 From: Anthony Wong Date: Sat, 19 Jan 2019 12:22:31 +0800 Subject: [PATCH 034/104] ALSA: hda - Add mute LED support for HP ProBook 470 G5 commit 699390381a7bae2fab01a22f742a17235c44ed8a upstream. Support speaker and mic mute LEDs on HP ProBook 470 G5. BugLink: https://bugs.launchpad.net/bugs/1811254 Signed-off-by: Anthony Wong Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index f9176e3b4d37..31a84a5a1338 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -931,6 +931,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), From 7bb78e62f7126b060bd1a5391e3871aa9035ee64 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 14 Jan 2019 18:16:48 +0300 Subject: [PATCH 035/104] ARCv2: lib: memeset: fix doing prefetchw outside of buffer commit e6a72b7daeeb521753803550f0ed711152bb2555 upstream. ARCv2 optimized memset uses PREFETCHW instruction for prefetching the next cache line but doesn't ensure that the line is not past the end of the buffer. PRETECHW changes the line ownership and marks it dirty, which can cause issues in SMP config when next line was already owned by other core. Fix the issue by avoiding the PREFETCHW Some more details: The current code has 3 logical loops (ignroing the unaligned part) (a) Big loop for doing aligned 64 bytes per iteration with PREALLOC (b) Loop for 32 x 2 bytes with PREFETCHW (c) any left over bytes loop (a) was already eliding the last 64 bytes, so PREALLOC was safe. The fix was removing PREFETCW from (b). Another potential issue (applicable to configs with 32 or 128 byte L1 cache line) is that PREALLOC assumes 64 byte cache line and may not do the right thing specially for 32b. While it would be easy to adapt, there are no known configs with those lie sizes, so for now, just compile out PREALLOC in such cases. Signed-off-by: Eugeniy Paltsev Cc: stable@vger.kernel.org #4.4+ Signed-off-by: Vineet Gupta [vgupta: rewrote changelog, used asm .macro vs. "C" macro] Signed-off-by: Greg Kroah-Hartman --- arch/arc/lib/memset-archs.S | 40 +++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index 62ad4bcb841a..f230bb7092fd 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -7,11 +7,39 @@ */ #include +#include -#undef PREALLOC_NOT_AVAIL +/* + * The memset implementation below is optimized to use prefetchw and prealloc + * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) + * If you want to implement optimized memset for other possible L1 data cache + * line lengths (32B and 128B) you should rewrite code carefully checking + * we don't call any prefetchw/prealloc instruction for L1 cache lines which + * don't belongs to memset area. + */ + +#if L1_CACHE_SHIFT == 6 + +.macro PREALLOC_INSTR reg, off + prealloc [\reg, \off] +.endm + +.macro PREFETCHW_INSTR reg, off + prefetchw [\reg, \off] +.endm + +#else + +.macro PREALLOC_INSTR +.endm + +.macro PREFETCHW_INSTR +.endm + +#endif ENTRY_CFI(memset) - prefetchw [r0] ; Prefetch the write location + PREFETCHW_INSTR r0, 0 ; Prefetch the first write location mov.f 0, r2 ;;; if size is zero jz.d [blink] @@ -48,11 +76,8 @@ ENTRY_CFI(memset) lpnz @.Lset64bytes ;; LOOP START -#ifdef PREALLOC_NOT_AVAIL - prefetchw [r3, 64] ;Prefetch the next write location -#else - prealloc [r3, 64] -#endif + PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching + #ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] @@ -85,7 +110,6 @@ ENTRY_CFI(memset) lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes lpnz .Lset32bytes ;; LOOP START - prefetchw [r3, 32] ;Prefetch the next write location #ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] From 2f0d2f3ace1f752e868020fa21665570b8b5418c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 19 Dec 2018 19:16:16 +0300 Subject: [PATCH 036/104] ARC: adjust memblock_reserve of kernel memory commit a3010a0465383300f909f62b8a83f83ffa7b2517 upstream. In setup_arch_memory we reserve the memory area wherein the kernel is located. Current implementation may reserve more memory than it actually required in case of CONFIG_LINUX_LINK_BASE is not equal to CONFIG_LINUX_RAM_BASE. This happens because we calculate start of the reserved region relatively to the CONFIG_LINUX_RAM_BASE and end of the region relatively to the CONFIG_LINUX_RAM_BASE. For example in case of HSDK board we wasted 256MiB of physical memory: ------------------->8------------------------------ Memory: 770416K/1048576K available (5496K kernel code, 240K rwdata, 1064K rodata, 2200K init, 275K bss, 278160K reserved, 0K cma-reserved) ------------------->8------------------------------ Fix that. Fixes: 9ed68785f7f2b ("ARC: mm: Decouple RAM base address from kernel link addr") Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index ba145065c579..f890b2f9f82f 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -138,7 +138,8 @@ void __init setup_arch_memory(void) */ memblock_add_node(low_mem_start, low_mem_sz, 0); - memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); + memblock_reserve(CONFIG_LINUX_LINK_BASE, + __pa(_end) - CONFIG_LINUX_LINK_BASE); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) From 8cbca17381ac78e0e287a99fd3575114b6143345 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 17 Dec 2018 12:54:23 +0300 Subject: [PATCH 037/104] ARC: perf: map generic branches to correct hardware condition commit 3affbf0e154ee351add6fcc254c59c3f3947fa8f upstream. So far we've mapped branches to "ijmp" which also counts conditional branches NOT taken. This makes us different from other architectures such as ARM which seem to be counting only taken branches. So use "ijmptak" hardware condition which only counts (all jump instructions that are taken) 'ijmptak' event is available on both ARCompact and ARCv2 ISA based cores. Signed-off-by: Eugeniy Paltsev Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta [vgupta: reworked changelog] Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 9185541035cc..6958545390f0 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = { /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */ + /* All jump instructions that are taken */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ #ifdef CONFIG_ISA_ARCV2 [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", From b563764443a302aab197550870ca57fda65662b8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Jan 2019 12:44:57 +0100 Subject: [PATCH 038/104] s390/mm: always force a load of the primary ASCE on context switch commit a38662084c8bdb829ff486468c7ea801c13fcc34 upstream. The ASCE of an mm_struct can be modified after a task has been created, e.g. via crst_table_downgrade for a compat process. The active_mm logic to avoid the switch_mm call if the next task is a kernel thread can lead to a situation where switch_mm is called where 'prev == next' is true but 'prev->context.asce == next->context.asce' is not. This can lead to a situation where a CPU uses the outdated ASCE to run a task. The result can be a crash, endless loops and really subtle problem due to TLBs being created with an invalid ASCE. Cc: stable@kernel.org # v3.15+ Fixes: 53e857f30867 ("s390/mm,tlb: race of lazy TLB flush vs. recreation") Reported-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/mmu_context.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f1ab9420ccfb..09b61d0e491f 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -89,8 +89,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - if (prev == next) - return; S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); /* Clear previous user-ASCE from CR1 and CR7 */ @@ -102,7 +100,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, __ctl_load(S390_lowcore.vdso_asce, 7, 7); clear_cpu_flag(CIF_ASCE_SECONDARY); } - cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + if (prev != next) + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch From e0d573a08f234722fe21a5729d2e945b775c5201 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 9 Nov 2018 09:21:47 +0100 Subject: [PATCH 039/104] s390/early: improve machine detection commit 03aa047ef2db4985e444af6ee1c1dd084ad9fb4c upstream. Right now the early machine detection code check stsi 3.2.2 for "KVM" and set MACHINE_IS_VM if this is different. As the console detection uses diagnose 8 if MACHINE_IS_VM returns true this will crash Linux early for any non z/VM system that sets a different value than KVM. So instead of assuming z/VM, do not set any of MACHINE_IS_LPAR, MACHINE_IS_VM, or MACHINE_IS_KVM. CC: stable@vger.kernel.org Reviewed-by: Heiko Carstens Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/early.c | 4 ++-- arch/s390/kernel/setup.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 5b28b434f8a1..e7e6608b996c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -64,10 +64,10 @@ static noinline __init void detect_machine_type(void) if (stsi(vmms, 3, 2, 2) || !vmms->count) return; - /* Running under KVM? If not we assume z/VM */ + /* Detect known hypervisors */ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; - else + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c637c12f9e37..a0097f8bada8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -882,6 +882,8 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running under KVM in 64-bit mode\n"); else if (MACHINE_IS_LPAR) pr_info("Linux is running natively in 64-bit mode\n"); + else + pr_info("Linux is running as a guest in 64-bit mode\n"); /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ From 049c7b068dd15fedfb863bcfbde0241acbbb808f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 9 Jan 2019 13:00:03 +0100 Subject: [PATCH 040/104] s390/smp: fix CPU hotplug deadlock with CPU rescan commit b7cb707c373094ce4008d4a6ac9b6b366ec52da5 upstream. smp_rescan_cpus() is called without the device_hotplug_lock, which can lead to a dedlock when a new CPU is found and immediately set online by a udev rule. This was observed on an older kernel version, where the cpu_hotplug_begin() loop was still present, and it resulted in hanging chcpu and systemd-udev processes. This specific deadlock will not show on current kernels. However, there may be other possible deadlocks, and since smp_rescan_cpus() can still trigger a CPU hotplug operation, the device_hotplug_lock should be held. For reference, this was the deadlock with the old cpu_hotplug_begin() loop: chcpu (rescan) systemd-udevd echo 1 > /sys/../rescan -> smp_rescan_cpus() -> (*) get_online_cpus() (increases refcount) -> smp_add_present_cpu() (new CPU found) -> register_cpu() -> device_add() -> udev "add" event triggered -----------> udev rule sets CPU online -> echo 1 > /sys/.../online -> lock_device_hotplug_sysfs() (this is missing in rescan path) -> device_online() -> (**) device_lock(new CPU dev) -> cpu_up() -> cpu_hotplug_begin() (loops until refcount == 0) -> deadlock with (*) -> bus_probe_device() -> device_attach() -> device_lock(new CPU dev) -> deadlock with (**) Fix this by taking the device_hotplug_lock in the CPU rescan path. Cc: Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/smp.c | 4 ++++ drivers/s390/char/sclp_config.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2f8f7d7dd9a8..e5735ab30488 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1152,7 +1152,11 @@ static ssize_t __ref rescan_store(struct device *dev, { int rc; + rc = lock_device_hotplug_sysfs(); + if (rc) + return rc; rc = smp_rescan_cpus(); + unlock_device_hotplug(); return rc ? rc : count; } static DEVICE_ATTR_WO(rescan); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 194ffd5c8580..039b2074db7e 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -60,7 +60,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work) static void __ref sclp_cpu_change_notify(struct work_struct *work) { + lock_device_hotplug(); smp_rescan_cpus(); + unlock_device_hotplug(); } static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) From 0479bdbf55d8ded806ca6b877f1e81d5733fe802 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 16 Jan 2019 10:46:16 -0600 Subject: [PATCH 041/104] misc: ibmvsm: Fix potential NULL pointer dereference commit e25df7812c91f62581301f9a7ac102acf92e4937 upstream. There is a potential NULL pointer dereference in case kzalloc() fails and returns NULL. Fix this by adding a NULL check on *session* Also, update the function header with information about the expected return on failure and remove unnecessary variable rc. This issue was detected with the help of Coccinelle. Fixes: 0eca353e7ae7 ("misc: IBM Virtual Management Channel Driver (VMC)") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmvmc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c index b8aaa684c397..2ed23c99f59f 100644 --- a/drivers/misc/ibmvmc.c +++ b/drivers/misc/ibmvmc.c @@ -820,21 +820,24 @@ static int ibmvmc_send_msg(struct crq_server_adapter *adapter, * * Return: * 0 - Success + * Non-zero - Failure */ static int ibmvmc_open(struct inode *inode, struct file *file) { struct ibmvmc_file_session *session; - int rc = 0; pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__, (unsigned long)inode, (unsigned long)file, ibmvmc.state); session = kzalloc(sizeof(*session), GFP_KERNEL); + if (!session) + return -ENOMEM; + session->file = file; file->private_data = session; - return rc; + return 0; } /** From d3faea2d152eba15ccf5f6e7e84ddb0c49f5dfca Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 9 Jan 2019 13:02:36 -0600 Subject: [PATCH 042/104] char/mwave: fix potential Spectre v1 vulnerability commit 701956d4018e5d5438570e39e8bda47edd32c489 upstream. ipcnum is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/char/mwave/mwavedd.c:299 mwave_ioctl() warn: potential spectre issue 'pDrvData->IPCs' [w] (local cap) Fix this by sanitizing ipcnum before using it to index pDrvData->IPCs. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/char/mwave/mwavedd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index b5e3103c1175..e43c876a9223 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "smapi.h" #include "mwavedd.h" #include "3780i.h" @@ -289,6 +290,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" " ipcnum %x entry usIntCount %x\n", @@ -317,6 +320,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" " ipcnum %x, usIntCount %x\n", @@ -383,6 +388,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); mutex_lock(&mwave_mutex); if (pDrvData->IPCs[ipcnum].bIsEnabled == true) { pDrvData->IPCs[ipcnum].bIsEnabled = false; From 4a559dfe660378507ccda1aeb556cb89a47be7eb Mon Sep 17 00:00:00 2001 From: Liming Sun Date: Fri, 18 Jan 2019 13:12:06 -0500 Subject: [PATCH 043/104] mmc: dw_mmc-bluefield: : Fix the license information commit f3716b8ae9347797b73896725f192c3a7b0069b5 upstream. The SPDX license identifier and the boiler plate text are contradicting. Only the SPDX license identifier is needed. The other one is removed. Fixes: 86958dcc5ad7 ("mmc: dw_mmc-bluefield: Add driver extension") Cc: stable@vger.kernel.org Reviewed-by: David Woods Signed-off-by: Liming Sun Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc-bluefield.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/mmc/host/dw_mmc-bluefield.c b/drivers/mmc/host/dw_mmc-bluefield.c index 54c3fbb4a391..db56d4f58aaa 100644 --- a/drivers/mmc/host/dw_mmc-bluefield.c +++ b/drivers/mmc/host/dw_mmc-bluefield.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018 Mellanox Technologies. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include From 75a08b9a0fcb5e87bbc8e0429a16665d045cd7de Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 11 Jan 2019 00:01:35 +0100 Subject: [PATCH 044/104] mmc: meson-gx: Free irq in release() callback commit bb364890323cca6e43f13e86d190ebf34a7d8cea upstream. Because the irq was requested through device managed resources API (devm_request_threaded_irq()) it was freed after meson_mmc_remove() completion, thus after mmc_free_host() has reclaimed meson_host memory. As this irq is IRQF_SHARED, while using CONFIG_DEBUG_SHIRQ, its handler get called by free_irq(). So meson_mmc_irq() was called after the meson_host memory reclamation and was using invalid memory. We ended up with the following scenario: device_release_driver() meson_mmc_remove() mmc_free_host() /* Freeing host memory */ ... devres_release_all() devm_irq_release() __free_irq() meson_mmc_irq() /* Uses freed memory */ To avoid this, the irq is released in meson_mmc_remove() and in mseon_mmc_probe() error path before mmc_free_host() gets called. Reported-by: Elie Roudninski Signed-off-by: Remi Pommarel Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/meson-gx-mmc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index c201c378537e..ef9deaa361c7 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -174,6 +174,8 @@ struct meson_host { struct sd_emmc_desc *descs; dma_addr_t descs_dma_addr; + int irq; + bool vqmmc_enabled; }; @@ -1181,7 +1183,7 @@ static int meson_mmc_probe(struct platform_device *pdev) struct resource *res; struct meson_host *host; struct mmc_host *mmc; - int ret, irq; + int ret; mmc = mmc_alloc_host(sizeof(struct meson_host), &pdev->dev); if (!mmc) @@ -1228,8 +1230,8 @@ static int meson_mmc_probe(struct platform_device *pdev) goto free_host; } - irq = platform_get_irq(pdev, 0); - if (irq <= 0) { + host->irq = platform_get_irq(pdev, 0); + if (host->irq <= 0) { dev_err(&pdev->dev, "failed to get interrupt resource.\n"); ret = -EINVAL; goto free_host; @@ -1283,9 +1285,8 @@ static int meson_mmc_probe(struct platform_device *pdev) writel(IRQ_CRC_ERR | IRQ_TIMEOUTS | IRQ_END_OF_CHAIN, host->regs + SD_EMMC_IRQ_EN); - ret = devm_request_threaded_irq(&pdev->dev, irq, meson_mmc_irq, - meson_mmc_irq_thread, IRQF_SHARED, - NULL, host); + ret = request_threaded_irq(host->irq, meson_mmc_irq, + meson_mmc_irq_thread, IRQF_SHARED, NULL, host); if (ret) goto err_init_clk; @@ -1303,7 +1304,7 @@ static int meson_mmc_probe(struct platform_device *pdev) if (host->bounce_buf == NULL) { dev_err(host->dev, "Unable to map allocate DMA bounce buffer.\n"); ret = -ENOMEM; - goto err_init_clk; + goto err_free_irq; } host->descs = dma_alloc_coherent(host->dev, SD_EMMC_DESC_BUF_LEN, @@ -1322,6 +1323,8 @@ static int meson_mmc_probe(struct platform_device *pdev) err_bounce_buf: dma_free_coherent(host->dev, host->bounce_buf_size, host->bounce_buf, host->bounce_dma_addr); +err_free_irq: + free_irq(host->irq, host); err_init_clk: clk_disable_unprepare(host->mmc_clk); err_core_clk: @@ -1339,6 +1342,7 @@ static int meson_mmc_remove(struct platform_device *pdev) /* disable interrupts */ writel(0, host->regs + SD_EMMC_IRQ_EN); + free_irq(host->irq, host); dma_free_coherent(host->dev, SD_EMMC_DESC_BUF_LEN, host->descs, host->descs_dma_addr); From 3209eeded8635c8a96781f2dd362d460682a1cec Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Mon, 7 Jan 2019 18:28:58 +0100 Subject: [PATCH 045/104] staging: rtl8188eu: Add device code for D-Link DWA-121 rev B1 commit 5f74a8cbb38d10615ed46bc3e37d9a4c9af8045a upstream. This device was added to the stand-alone driver on github. Add it to the staging driver as well. Link: https://github.com/lwfinger/rtl8188eu/commit/a0619a07cd1e Signed-off-by: Michael Straube Acked-by: Larry Finger Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 28cbd6b3d26c..dfee6985efa6 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -35,6 +35,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ From 80250b48803ab0aa3eb8e0d798a4742b26ef84d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 20 Jan 2019 10:46:58 +0100 Subject: [PATCH 046/104] tty: Handle problem if line discipline does not have receive_buf commit 27cfb3a53be46a54ec5e0bd04e51995b74c90343 upstream. Some tty line disciplines do not have a receive buf callback, so properly check for that before calling it. If they do not have this callback, just eat the character quietly, as we can't fail this call. Reported-by: Jann Horn Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 052ec16a4e84..e7d192ebecd7 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2188,7 +2188,8 @@ static int tiocsti(struct tty_struct *tty, char __user *p) ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; - ld->ops->receive_buf(tty, &ch, &mbz, 1); + if (ld->ops->receive_buf) + ld->ops->receive_buf(tty, &ch, &mbz, 1); tty_ldisc_deref(ld); return 0; } From 6d15ef2c912ab0ba3602322ea3227467212a4ca0 Mon Sep 17 00:00:00 2001 From: Samir Virmani Date: Wed, 16 Jan 2019 10:28:07 -0800 Subject: [PATCH 047/104] uart: Fix crash in uart_write and uart_put_char commit aff9cf5955185d1f183227e46c5f8673fa483813 upstream. We were experiencing a crash similar to the one reported as part of commit:a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()") in our testbed as well. We continue to observe the same crash after integrating the commit a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()") On reviewing the change, the port lock should be taken prior to checking for if (!circ->buf) in fn. __uart_put_char and other fns. that update the buffer uart_state->xmit. Traceback: [11/27/2018 06:24:32.4870] Unable to handle kernel NULL pointer dereference at virtual address 0000003b [11/27/2018 06:24:32.4950] PC is at memcpy+0x48/0x180 [11/27/2018 06:24:32.4950] LR is at uart_write+0x74/0x120 [11/27/2018 06:24:32.4950] pc : [] lr : [] pstate: 000001c5 [11/27/2018 06:24:32.4950] sp : ffffffc076433d30 [11/27/2018 06:24:32.4950] x29: ffffffc076433d30 x28: 0000000000000140 [11/27/2018 06:24:32.4950] x27: ffffffc0009b9d5e x26: ffffffc07ce36580 [11/27/2018 06:24:32.4950] x25: 0000000000000000 x24: 0000000000000140 [11/27/2018 06:24:32.4950] x23: ffffffc000891200 x22: ffffffc01fc34000 [11/27/2018 06:24:32.4950] x21: 0000000000000fff x20: 0000000000000076 [11/27/2018 06:24:32.4950] x19: 0000000000000076 x18: 0000000000000000 [11/27/2018 06:24:32.4950] x17: 000000000047cf08 x16: ffffffc000099e68 [11/27/2018 06:24:32.4950] x15: 0000000000000018 x14: 776d726966205948 [11/27/2018 06:24:32.4950] x13: 50203a6c6974755f x12: 74647075205d3333 [11/27/2018 06:24:32.4950] x11: 3a35323a36203831 x10: 30322f37322f3131 [11/27/2018 06:24:32.4950] x9 : 5b205d303638342e x8 : 746164206f742070 [11/27/2018 06:24:32.4950] x7 : 7520736920657261 x6 : 000000000000003b [11/27/2018 06:24:32.4950] x5 : 000000000000817a x4 : 0000000000000008 [11/27/2018 06:24:32.4950] x3 : 2f37322f31312a5b x2 : 000000000000006e [11/27/2018 06:24:32.4950] x1 : ffffffc0009b9cf0 x0 : 000000000000003b [11/27/2018 06:24:32.4950] CPU2: stopping [11/27/2018 06:24:32.4950] CPU: 2 PID: 0 Comm: swapper/2 Tainted: P D O 4.1.51 #3 [11/27/2018 06:24:32.4950] Hardware name: Broadcom-v8A (DT) [11/27/2018 06:24:32.4950] Call trace: [11/27/2018 06:24:32.4950] [] dump_backtrace+0x0/0x150 [11/27/2018 06:24:32.4950] [] show_stack+0x14/0x20 [11/27/2018 06:24:32.4950] [] dump_stack+0x90/0xb0 [11/27/2018 06:24:32.4950] [] handle_IPI+0x18c/0x1a0 [11/27/2018 06:24:32.4950] [] gic_handle_irq+0x88/0x90 Fixes: a5ba1d95e46e ("uart: fix race between uart_put_char() and uart_shutdown()") Cc: stable Signed-off-by: Samir Virmani Acked-by: Tycho Andersen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index ad126f51d549..7fe679413188 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -550,10 +550,12 @@ static int uart_put_char(struct tty_struct *tty, unsigned char c) int ret = 0; circ = &state->xmit; - if (!circ->buf) - return 0; - port = uart_port_lock(state, flags); + if (!circ->buf) { + uart_port_unlock(port, flags); + return 0; + } + if (port && uart_circ_chars_free(circ) != 0) { circ->buf[circ->head] = c; circ->head = (circ->head + 1) & (UART_XMIT_SIZE - 1); @@ -586,11 +588,13 @@ static int uart_write(struct tty_struct *tty, return -EL3HLT; } - circ = &state->xmit; - if (!circ->buf) - return 0; - port = uart_port_lock(state, flags); + circ = &state->xmit; + if (!circ->buf) { + uart_port_unlock(port, flags); + return 0; + } + while (port) { c = CIRC_SPACE_TO_END(circ->head, circ->tail, UART_XMIT_SIZE); if (count < c) From 71d1a74f36a7b39623abfc71159b1b835c0750ab Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Tue, 1 Jan 2019 12:28:53 -0800 Subject: [PATCH 048/104] tty/n_hdlc: fix __might_sleep warning commit fc01d8c61ce02c034e67378cd3e645734bc18c8c upstream. Fix __might_sleep warning[1] in tty/n_hdlc.c read due to copy_to_user call while current is TASK_INTERRUPTIBLE. This is a false positive since the code path does not depend on current state remaining TASK_INTERRUPTIBLE. The loop breaks out and sets TASK_RUNNING after calling copy_to_user. This patch supresses the warning by setting TASK_RUNNING before calling copy_to_user. [1] https://syzkaller.appspot.com/bug?id=17d5de7f1fcab794cb8c40032f893f52de899324 Signed-off-by: Paul Fulghum Reported-by: syzbot Cc: Tetsuo Handa Cc: Alan Cox Cc: stable Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index dabb391909aa..bb63519db7ae 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -597,6 +597,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, /* too large for caller's buffer */ ret = -EOVERFLOW; } else { + __set_current_state(TASK_RUNNING); if (copy_to_user(buf, rbuf->buf, rbuf->count)) ret = -EFAULT; else From bfe482b9b299d4a79c76d6c7879b1faee85111b1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 4 Jan 2019 15:19:42 +0100 Subject: [PATCH 049/104] hv_balloon: avoid touching uninitialized struct page during tail onlining commit da8ced360ca8ad72d8f41f5c8fcd5b0e63e1555f upstream. Hyper-V memory hotplug protocol has 2M granularity and in Linux x86 we use 128M. To deal with it we implement partial section onlining by registering custom page onlining callback (hv_online_page()). Later, when more memory arrives we try to online the 'tail' (see hv_bring_pgs_online()). It was found that in some cases this 'tail' onlining causes issues: BUG: Bad page state in process kworker/0:2 pfn:109e3a page:ffffe08344278e80 count:0 mapcount:1 mapping:0000000000000000 index:0x0 flags: 0xfffff80000000() raw: 000fffff80000000 dead000000000100 dead000000000200 0000000000000000 raw: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 page dumped because: nonzero mapcount ... Workqueue: events hot_add_req [hv_balloon] Call Trace: dump_stack+0x5c/0x80 bad_page.cold.112+0x7f/0xb2 free_pcppages_bulk+0x4b8/0x690 free_unref_page+0x54/0x70 hv_page_online_one+0x5c/0x80 [hv_balloon] hot_add_req.cold.24+0x182/0x835 [hv_balloon] ... Turns out that we now have deferred struct page initialization for memory hotplug so e.g. memory_block_action() in drivers/base/memory.c does pages_correctly_probed() check and in that check it avoids inspecting struct pages and checks sections instead. But in Hyper-V balloon driver we do PageReserved(pfn_to_page()) check and this is now wrong. Switch to checking online_section_nr() instead. Signed-off-by: Vitaly Kuznetsov Cc: stable@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b1b788082793..d2a735ac9ba1 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -888,12 +888,14 @@ static unsigned long handle_pg_range(unsigned long pg_start, pfn_cnt -= pgs_ol; /* * Check if the corresponding memory block is already - * online by checking its last previously backed page. - * In case it is we need to bring rest (which was not - * backed previously) online too. + * online. It is possible to observe struct pages still + * being uninitialized here so check section instead. + * In case the section is online we need to bring the + * rest of pfns (which were not backed previously) + * online too. */ if (start_pfn > has->start_pfn && - !PageReserved(pfn_to_page(start_pfn - 1))) + online_section_nr(pfn_to_section_nr(start_pfn))) hv_bring_pgs_online(has, start_pfn, pgs_ol); } From a912e16faeda0f047aa334d062b70f20126f48bc Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 17 Dec 2018 20:16:09 +0000 Subject: [PATCH 050/104] Drivers: hv: vmbus: Check for ring when getting debug info commit ba50bf1ce9a51fc97db58b96d01306aa70bc3979 upstream. fc96df16a1ce is good and can already fix the "return stack garbage" issue, but let's also improve hv_ringbuffer_get_debuginfo(), which would silently return stack garbage, if people forget to check channel->state or ring_info->ring_buffer, when using the function in the future. Having an error check in the function would eliminate the potential risk. Add a Fixes tag to indicate the patch depdendency. Fixes: fc96df16a1ce ("Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels") Cc: stable@vger.kernel.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: Stephen Hemminger Signed-off-by: Dexuan Cui Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 29 +++++++------ drivers/hv/vmbus_drv.c | 91 +++++++++++++++++++++++++++------------- include/linux/hyperv.h | 5 ++- 3 files changed, 78 insertions(+), 47 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 3e90eb91db45..6cb45f256107 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -164,26 +164,25 @@ hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, } /* Get various debug metrics for the specified ring buffer. */ -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info) +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info) { u32 bytes_avail_towrite; u32 bytes_avail_toread; - if (ring_info->ring_buffer) { - hv_get_ringbuffer_availbytes(ring_info, - &bytes_avail_toread, - &bytes_avail_towrite); + if (!ring_info->ring_buffer) + return -EINVAL; - debug_info->bytes_avail_toread = bytes_avail_toread; - debug_info->bytes_avail_towrite = bytes_avail_towrite; - debug_info->current_read_index = - ring_info->ring_buffer->read_index; - debug_info->current_write_index = - ring_info->ring_buffer->write_index; - debug_info->current_interrupt_mask = - ring_info->ring_buffer->interrupt_mask; - } + hv_get_ringbuffer_availbytes(ring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + debug_info->bytes_avail_toread = bytes_avail_toread; + debug_info->bytes_avail_towrite = bytes_avail_towrite; + debug_info->current_read_index = ring_info->ring_buffer->read_index; + debug_info->current_write_index = ring_info->ring_buffer->write_index; + debug_info->current_interrupt_mask + = ring_info->ring_buffer->interrupt_mask; + return 0; } EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2c6d5c7a4445..9aa18f387a34 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -313,12 +313,16 @@ static ssize_t out_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } static DEVICE_ATTR_RO(out_intr_mask); @@ -328,12 +332,15 @@ static ssize_t out_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -344,12 +351,15 @@ static ssize_t out_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); @@ -360,12 +370,15 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } static DEVICE_ATTR_RO(out_read_bytes_avail); @@ -376,12 +389,15 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(out_write_bytes_avail); @@ -391,12 +407,15 @@ static ssize_t in_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } static DEVICE_ATTR_RO(in_intr_mask); @@ -406,12 +425,15 @@ static ssize_t in_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_read_index); } static DEVICE_ATTR_RO(in_read_index); @@ -421,12 +443,15 @@ static ssize_t in_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_write_index); } static DEVICE_ATTR_RO(in_write_index); @@ -437,12 +462,15 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } static DEVICE_ATTR_RO(in_read_bytes_avail); @@ -453,12 +481,15 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(in_write_bytes_avail); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5185a16b19ba..bbde887ed393 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1166,8 +1166,9 @@ struct hv_ring_buffer_debug_info { u32 bytes_avail_towrite; }; -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info); + +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); /* Vmbus interface */ #define vmbus_driver_register(driver) \ From f4abbb16ed9add95be72e85b9b5db8529e736b74 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 10 Jan 2019 16:33:55 -0500 Subject: [PATCH 051/104] vgacon: unconfuse vc_origin when using soft scrollback commit bfd8d8fe98b8792f362cd210a7873969f8d2fc04 upstream. When CONFIG_VGACON_SOFT_SCROLLBACK is selected, the VGA display memory index and vc_visible_origin don't change when scrollback is activated. The actual screen content is saved away and the scrollbackdata is copied over it. However the vt code, and /dev/vcs devices in particular, still expect vc_origin to always point at the actual screen content not the displayed scrollback content. So adjust vc_origin to point at the saved screen content when scrollback is active and set it back to vc_visible_origin when restoring the screen. This fixes /dev/vcsa that return scrollback content when they shouldn't (onli /dev/vcsa without a number should), and also fixes /dev/vcsu that should return scrollback content when scrollback is active but currently doesn't. An unnecessary call to vga_set_mem_top() is also removed. Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/vgacon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 09731b2f6815..c6b3bdbbdbc9 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -271,6 +271,7 @@ static void vgacon_scrollback_update(struct vc_data *c, int t, int count) static void vgacon_restore_screen(struct vc_data *c) { + c->vc_origin = c->vc_visible_origin; vgacon_scrollback_cur->save = 0; if (!vga_is_gfx && !vgacon_scrollback_cur->restore) { @@ -287,8 +288,7 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) int start, end, count, soff; if (!lines) { - c->vc_visible_origin = c->vc_origin; - vga_set_mem_top(c); + vgacon_restore_screen(c); return; } @@ -298,6 +298,7 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) if (!vgacon_scrollback_cur->save) { vgacon_cursor(c, CM_ERASE); vgacon_save_screen(c); + c->vc_origin = (unsigned long)c->vc_screenbuf; vgacon_scrollback_cur->save = 1; } @@ -335,7 +336,7 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) int copysize; int diff = c->vc_rows - count; - void *d = (void *) c->vc_origin; + void *d = (void *) c->vc_visible_origin; void *s = (void *) c->vc_screenbuf; count *= c->vc_size_row; From 07b9e5e35e8f4189a0a3b87beaca1094d5c18b24 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 17 Jan 2019 08:21:24 -0800 Subject: [PATCH 052/104] CIFS: Fix possible hang during async MTU reads and writes commit acc58d0bab55a50e02c25f00bd6a210ee121595f upstream. When doing MTU i/o we need to leave some credits for possible reopen requests and other operations happening in parallel. Currently we leave 1 credit which is not enough even for reopen only: we need at least 2 credits if durable handle reconnect fails. Also there may be other operations at the same time including compounding ones which require 3 credits at a time each. Fix this by leaving 8 credits which is big enough to cover most scenarios. Was able to reproduce this when server was configured to give out fewer credits than usual. The proper fix would be to reconnect a file handle first and then obtain credits for an MTU request but this leads to bigger code changes and should happen in other patches. Cc: Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f44bb4a304e9..0c2bbeab5f64 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -154,14 +154,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, scredits = server->credits; /* can deadlock with reopen */ - if (scredits == 1) { + if (scredits <= 8) { *num = SMB2_MAX_BUFFER_SIZE; *credits = 0; break; } - /* leave one credit for a possible reopen */ - scredits--; + /* leave some credits for reopen and other ops */ + scredits -= 8; *num = min_t(unsigned int, size, scredits * SMB2_MAX_BUFFER_SIZE); From 0380ed9b1cd39a582cd14b21072d59fb66bc7d50 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 17 Jan 2019 15:29:26 -0800 Subject: [PATCH 053/104] CIFS: Fix credits calculations for reads with errors commit 8004c78c68e894e4fd5ac3c22cc22eb7dc24cabc upstream. Currently we mark MID as malformed if we get an error from server in a read response. This leads to not properly processing credits in the readv callback. Fix this by marking such a response as normal received response and process it appropriately. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5657b79dbc99..269471c8f42b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1458,18 +1458,26 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server) } static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, + bool malformed) { int length; - struct cifs_readdata *rdata = mid->callback_data; length = cifs_discard_remaining_data(server); - dequeue_mid(mid, rdata->result); + dequeue_mid(mid, malformed); mid->resp_buf = server->smallbuf; server->smallbuf = NULL; return length; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + struct cifs_readdata *rdata = mid->callback_data; + + return __cifs_readv_discard(server, mid, rdata->result); +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1511,12 +1519,23 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return -1; } + /* set up first two iov for signature check and to get credits */ + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = 4; + rdata->iov[1].iov_base = buf + 4; + rdata->iov[1].iov_len = server->total_read - 4; + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", + rdata->iov[1].iov_base, rdata->iov[1].iov_len); + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); - return cifs_readv_discard(server, mid); + /* normal error on read response */ + return __cifs_readv_discard(server, mid, false); } /* Is there enough to get to the rest of the READ_RSP header? */ @@ -1560,14 +1579,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) server->total_read += length; } - /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = buf + 4; - rdata->iov[1].iov_len = server->total_read - 4; - cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n", - rdata->iov[0].iov_base, server->total_read); - /* how much data is in the response? */ #ifdef CONFIG_CIFS_SMB_DIRECT use_rdma_mr = rdata->mr; From 2ae6fedbd5cb4144f5cf40681b9a3ea5464d31bd Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jan 2019 15:38:11 -0800 Subject: [PATCH 054/104] CIFS: Fix credit calculation for encrypted reads with errors commit ec678eae746dd25766a61c4095e2b649d3b20b09 upstream. We do need to account for credits received in error responses to read requests on encrypted sessions. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0c2bbeab5f64..ffb55c174482 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2901,11 +2901,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, server->ops->is_status_pending(buf, server, 0)) return -1; - rdata->result = server->ops->map_error(buf, false); + /* set up first two iov to get credits */ + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = 4; + rdata->iov[1].iov_base = buf + 4; + rdata->iov[1].iov_len = + min_t(unsigned int, buf_len, server->vals->read_rsp_size) - 4; + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", + rdata->iov[1].iov_base, rdata->iov[1].iov_len); + + rdata->result = server->ops->map_error(buf, true); if (rdata->result != 0) { cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); - dequeue_mid(mid, rdata->result); + /* normal error on read response */ + dequeue_mid(mid, false); return 0; } @@ -2978,14 +2990,6 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return 0; } - /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = buf + 4; - rdata->iov[1].iov_len = server->vals->read_rsp_size - 4; - cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", - rdata->iov[0].iov_base, server->vals->read_rsp_size); - length = rdata->copy_into_pages(server, rdata, &iter); kfree(bvec); From 779c65bb77391b9e096a0cd1d22c039e9a133911 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 18 Jan 2019 17:25:36 -0800 Subject: [PATCH 055/104] CIFS: Do not reconnect TCP session in add_credits() commit ef68e831840c40c7d01b328b3c0f5d8c4796c232 upstream. When executing add_credits() we currently call cifs_reconnect() if the number of credits is zero and there are no requests in flight. In this case we may call cifs_reconnect() recursively twice and cause memory corruption given the following sequence of functions: mid1.callback() -> add_credits() -> cifs_reconnect() -> -> mid2.callback() -> add_credits() -> cifs_reconnect(). Fix this by avoiding to call cifs_reconnect() in add_credits() and checking for zero credits in the demultiplex thread. Cc: Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 21 +++++++++++++++++++++ fs/cifs/smb2ops.c | 32 +++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 52d71b64c0c6..d0bba175117c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -533,6 +533,21 @@ server_unresponsive(struct TCP_Server_Info *server) return false; } +static inline bool +zero_credits(struct TCP_Server_Info *server) +{ + int val; + + spin_lock(&server->req_lock); + val = server->credits + server->echo_credits + server->oplock_credits; + if (server->in_flight == 0 && val == 0) { + spin_unlock(&server->req_lock); + return true; + } + spin_unlock(&server->req_lock); + return false; +} + static int cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) { @@ -545,6 +560,12 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) for (total_read = 0; msg_data_left(smb_msg); total_read += length) { try_to_freeze(); + /* reconnect if no credits and no requests in flight */ + if (zero_credits(server)) { + cifs_reconnect(server); + return -ECONNABORTED; + } + if (server_unresponsive(server)) return -ECONNABORTED; if (cifs_rdma_enabled(server) && server->smbd_conn) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ffb55c174482..237d7281ada3 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -34,6 +34,7 @@ #include "cifs_ioctl.h" #include "smbdirect.h" +/* Change credits for different ops and return the total number of credits */ static int change_conf(struct TCP_Server_Info *server) { @@ -41,17 +42,15 @@ change_conf(struct TCP_Server_Info *server) server->oplock_credits = server->echo_credits = 0; switch (server->credits) { case 0: - return -1; + return 0; case 1: server->echoes = false; server->oplocks = false; - cifs_dbg(VFS, "disabling echoes and oplocks\n"); break; case 2: server->echoes = true; server->oplocks = false; server->echo_credits = 1; - cifs_dbg(FYI, "disabling oplocks\n"); break; default: server->echoes = true; @@ -64,14 +63,15 @@ change_conf(struct TCP_Server_Info *server) server->echo_credits = 1; } server->credits -= server->echo_credits + server->oplock_credits; - return 0; + return server->credits + server->echo_credits + server->oplock_credits; } static void smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, const int optype) { - int *val, rc = 0; + int *val, rc = -1; + spin_lock(&server->req_lock); val = server->ops->get_credits_field(server, optype); *val += add; @@ -95,8 +95,26 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, } spin_unlock(&server->req_lock); wake_up(&server->request_q); - if (rc) - cifs_reconnect(server); + + if (server->tcpStatus == CifsNeedReconnect) + return; + + switch (rc) { + case -1: + /* change_conf hasn't been executed */ + break; + case 0: + cifs_dbg(VFS, "Possible client or server bug - zero credits\n"); + break; + case 1: + cifs_dbg(VFS, "disabling echoes and oplocks\n"); + break; + case 2: + cifs_dbg(FYI, "disabling oplocks\n"); + break; + default: + cifs_dbg(FYI, "add %u credits total=%d\n", add, rc); + } } static void From 06d9f987201feeada1e48e501db5f4a202541db3 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 23 Jan 2019 16:20:38 +1000 Subject: [PATCH 056/104] smb3: add credits we receive from oplock/break PDUs commit 2e5700bdde438ed708b36d8acd0398dc73cbf759 upstream. Otherwise we gradually leak credits leading to potential hung session. Signed-off-by: Ronnie Sahlberg CC: Stable Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2misc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 6a9c47541c53..7b8b58fb4d3f 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -648,6 +648,13 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK) return false; + if (rsp->sync_hdr.CreditRequest) { + spin_lock(&server->req_lock); + server->credits += le16_to_cpu(rsp->sync_hdr.CreditRequest); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + } + if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) From 865a07956db55120d69876a202ce0fa35f46b218 Mon Sep 17 00:00:00 2001 From: Tom Panfil Date: Fri, 11 Jan 2019 17:49:40 -0800 Subject: [PATCH 057/104] Input: xpad - add support for SteelSeries Stratus Duo commit fe2bfd0d40c935763812973ce15f5764f1c12833 upstream. Add support for the SteelSeries Stratus Duo, a wireless Xbox 360 controller. The Stratus Duo ships with a USB dongle to enable wireless connectivity, but it can also function as a wired controller by connecting it directly to a PC via USB, hence the need for two USD PIDs. 0x1430 is the dongle, and 0x1431 is the controller. Signed-off-by: Tom Panfil Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index cfc8b94527b9..aa4e431cbcd3 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -252,6 +252,8 @@ static const struct xpad_device { { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, + { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, + { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, @@ -428,6 +430,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f X-Box One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori Controllers */ + XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ From 71b1af87749b917a2b6fd4afa0b8a9019f1259a4 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 13 Jan 2019 22:28:05 -0800 Subject: [PATCH 058/104] Input: input_event - provide override for sparc64 commit 2e746942ebacf1565caa72cf980745e5ce297c48 upstream. The usec part of the timeval is defined as __kernel_suseconds_t tv_usec; /* microseconds */ Arnd noticed that sparc64 is the only architecture that defines __kernel_suseconds_t as int rather than long. This breaks the current y2038 fix for kernel as we only access and define the timeval struct for non-kernel use cases. But, this was hidden by an another typo in the use of __KERNEL__ qualifier. Fix the typo, and provide an override for sparc64. Fixes: 152194fe9c3f ("Input: extend usable life of event timestamps to 2106 on 32 bit systems") Reported-by: Arnd Bergmann Signed-off-by: Deepa Dinamani Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/input.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index fb78f6f500f3..ffab958bc512 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -26,13 +26,17 @@ */ struct input_event { -#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL) +#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL__) struct timeval time; #define input_event_sec time.tv_sec #define input_event_usec time.tv_usec #else __kernel_ulong_t __sec; +#ifdef CONFIG_SPARC64 + unsigned int __usec; +#else __kernel_ulong_t __usec; +#endif #define input_event_sec __sec #define input_event_usec __usec #endif From 92fbac528fd0727eddf60476d73869383602fab0 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 14 Jan 2019 13:54:55 -0800 Subject: [PATCH 059/104] Input: uinput - fix undefined behavior in uinput_validate_absinfo() commit d77651a227f8920dd7ec179b84e400cce844eeb3 upstream. An integer overflow may arise in uinput_validate_absinfo() if "max - min" can't be represented by an "int". We should check for overflow before trying to use the result. Reported-by: Kyungtae Kim Reviewed-by: Peter Hutterer Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/uinput.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 8ec483e8688b..26ec603fe220 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "../input-compat.h" @@ -405,7 +406,7 @@ static int uinput_open(struct inode *inode, struct file *file) static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, const struct input_absinfo *abs) { - int min, max; + int min, max, range; min = abs->minimum; max = abs->maximum; @@ -417,7 +418,7 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } - if (abs->flat > max - min) { + if (!check_sub_overflow(max, min, &range) && abs->flat > range) { printk(KERN_DEBUG "%s: abs_flat #%02x out of range: %d (min:%d/max:%d)\n", UINPUT_NAME, code, abs->flat, min, max); From 3cb00cfa3d37e46b3c8ee4af97fb52d29b07fbc0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Jan 2019 14:07:19 -0800 Subject: [PATCH 060/104] acpi/nfit: Block function zero DSMs commit 5e9e38d0db1d29efed1dd4cf9a70115d33521be7 upstream. In preparation for using function number 0 as an error value, prevent it from being considered a valid function value by acpi_nfit_ctl(). Cc: Cc: stuart hayes Fixes: e02fb7264d8a ("nfit: add Microsoft NVDIMM DSM command set...") Reported-by: Jeff Moyer Reviewed-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 75b331f8a16a..1697fbc4a035 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1764,6 +1764,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return 0; } + /* + * Function 0 is the command interrogation function, don't + * export it to potential userspace use, and enable it to be + * used as an error value in acpi_nfit_ctl(). + */ + dsm_mask &= ~1UL; + guid = to_nfit_uuid(nfit_mem->family); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev_dimm->handle, guid, From b18931c5fe0de20d61bf1dca8a472ee649636fe3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 19 Jan 2019 10:55:04 -0800 Subject: [PATCH 061/104] acpi/nfit: Fix command-supported detection commit 11189c1089da413aa4b5fd6be4c4d47c78968819 upstream. The _DSM function number validation only happens to succeed when the generic Linux command number translation corresponds with a DSM-family-specific function number. This breaks NVDIMM-N implementations that correctly implement _LSR, _LSW, and _LSI, but do not happen to publish support for DSM function numbers 4, 5, and 6. Recall that the support for _LS{I,R,W} family of methods results in the DIMM being marked as supporting those command numbers at acpi_nfit_register_dimms() time. The DSM function mask is only used for ND_CMD_CALL support of non-NVDIMM_FAMILY_INTEL devices. Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command...") Cc: Link: https://github.com/pmem/ndctl/issues/78 Reported-by: Sujith Pandel Tested-by: Sujith Pandel Reviewed-by: Vishal Verma Reviewed-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit/core.c | 54 +++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 1697fbc4a035..ea59c01ce8db 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -391,6 +391,32 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) return id; } +static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, + struct nd_cmd_pkg *call_pkg) +{ + if (call_pkg) { + int i; + + if (nfit_mem->family != call_pkg->nd_family) + return -ENOTTY; + + for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) + if (call_pkg->nd_reserved2[i]) + return -EINVAL; + return call_pkg->nd_command; + } + + /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */ + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) + return cmd; + + /* + * Force function number validation to fail since 0 is never + * published as a valid function in dsm_mask. + */ + return 0; +} + int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { @@ -404,30 +430,23 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned long cmd_mask, dsm_mask; u32 offset, fw_status = 0; acpi_handle handle; - unsigned int func; const guid_t *guid; - int rc, i; + int func, rc, i; if (cmd_rc) *cmd_rc = -EINVAL; - func = cmd; - if (cmd == ND_CMD_CALL) { - call_pkg = buf; - func = call_pkg->nd_command; - - for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) - if (call_pkg->nd_reserved2[i]) - return -EINVAL; - } if (nvdimm) { struct acpi_device *adev = nfit_mem->adev; if (!adev) return -ENOTTY; - if (call_pkg && nfit_mem->family != call_pkg->nd_family) - return -ENOTTY; + if (cmd == ND_CMD_CALL) + call_pkg = buf; + func = cmd_to_func(nfit_mem, cmd, call_pkg); + if (func < 0) + return func; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -438,6 +457,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); + func = cmd; cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; dsm_mask = cmd_mask; @@ -452,7 +472,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask)) + /* + * Check for a valid command. For ND_CMD_CALL, we also have to + * make sure that the DSM function is supported. + */ + if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask)) + return -ENOTTY; + else if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; From eba68bd456724a6128f25ad5c2f1573c9fae51d3 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 22 Jan 2019 18:29:22 +0100 Subject: [PATCH 062/104] scsi: ufs: Use explicit access size in ufshcd_dump_regs commit d67247566450cf89a693307c9bc9f05a32d96cea upstream. memcpy_fromio() doesn't provide any control over access size. For example, on arm64, it is implemented using readb and readq. This may trigger a synchronous external abort: [ 3.729943] Internal error: synchronous external abort: 96000210 [#1] PREEMPT SMP [ 3.737000] Modules linked in: [ 3.744371] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G S 4.20.0-rc4 #16 [ 3.747413] Hardware name: Qualcomm Technologies, Inc. MSM8998 v1 MTP (DT) [ 3.755295] pstate: 00000005 (nzcv daif -PAN -UAO) [ 3.761978] pc : __memcpy_fromio+0x68/0x80 [ 3.766718] lr : ufshcd_dump_regs+0x50/0xb0 [ 3.770767] sp : ffff00000807ba00 [ 3.774830] x29: ffff00000807ba00 x28: 00000000fffffffb [ 3.778344] x27: ffff0000089db068 x26: ffff8000f6e58000 [ 3.783728] x25: 000000000000000e x24: 0000000000000800 [ 3.789023] x23: ffff8000f6e587c8 x22: 0000000000000800 [ 3.794319] x21: ffff000008908368 x20: ffff8000f6e1ab80 [ 3.799615] x19: 000000000000006c x18: ffffffffffffffff [ 3.804910] x17: 0000000000000000 x16: 0000000000000000 [ 3.810206] x15: ffff000009199648 x14: ffff000089244187 [ 3.815502] x13: ffff000009244195 x12: ffff0000091ab000 [ 3.820797] x11: 0000000005f5e0ff x10: ffff0000091998a0 [ 3.826093] x9 : 0000000000000000 x8 : ffff8000f6e1ac00 [ 3.831389] x7 : 0000000000000000 x6 : 0000000000000068 [ 3.836676] x5 : ffff8000f6e1abe8 x4 : 0000000000000000 [ 3.841971] x3 : ffff00000928c868 x2 : ffff8000f6e1abec [ 3.847267] x1 : ffff00000928c868 x0 : ffff8000f6e1abe8 [ 3.852567] Process swapper/0 (pid: 1, stack limit = 0x(____ptrval____)) [ 3.857900] Call trace: [ 3.864473] __memcpy_fromio+0x68/0x80 [ 3.866683] ufs_qcom_dump_dbg_regs+0x1c0/0x370 [ 3.870522] ufshcd_print_host_regs+0x168/0x190 [ 3.874946] ufshcd_init+0xd4c/0xde0 [ 3.879459] ufshcd_pltfrm_init+0x3c8/0x550 [ 3.883264] ufs_qcom_probe+0x24/0x60 [ 3.887188] platform_drv_probe+0x50/0xa0 Assuming aligned 32-bit registers, let's use readl, after making sure that 'offset' and 'len' are indeed multiples of 4. Fixes: ba80917d9932d ("scsi: ufs: ufshcd_dump_regs to use memcpy_fromio") Cc: Signed-off-by: Marc Gonzalez Acked-by: Tomas Winkler Reviewed-by: Jeffrey Hugo Reviewed-by: Bjorn Andersson Tested-by: Evan Green Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ufs/ufshcd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0b81d9d03357..12ddb5928a73 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -109,13 +109,19 @@ int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, const char *prefix) { - u8 *regs; + u32 *regs; + size_t pos; + + if (offset % 4 != 0 || len % 4 != 0) /* keep readl happy */ + return -EINVAL; regs = kzalloc(len, GFP_KERNEL); if (!regs) return -ENOMEM; - memcpy_fromio(regs, hba->mmio_base + offset, len); + for (pos = 0; pos < len; pos += 4) + regs[pos / 4] = ufshcd_readl(hba, offset + pos); + ufshcd_hex_dump(prefix, regs, len); kfree(regs); From 5b779f8417733e7b321b50d18116e15230da12e9 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 15 Jan 2019 13:27:01 -0500 Subject: [PATCH 063/104] dm thin: fix passdown_double_checking_shared_status() commit d445bd9cec1a850c2100fcf53684c13b3fd934f2 upstream. Commit 00a0ea33b495 ("dm thin: do not queue freed thin mapping for next stage processing") changed process_prepared_discard_passdown_pt1() to increment all the blocks being discarded until after the passdown had completed to avoid them being prematurely reused. IO issued to a thin device that breaks sharing with a snapshot, followed by a discard issued to snapshot(s) that previously shared the block(s), results in passdown_double_checking_shared_status() being called to iterate through the blocks double checking their reference count is zero and issuing the passdown if so. So a side effect of commit 00a0ea33b495 is passdown_double_checking_shared_status() was broken. Fix this by checking if the block reference count is greater than 1. Also, rename dm_pool_block_is_used() to dm_pool_block_is_shared(). Fixes: 00a0ea33b495 ("dm thin: do not queue freed thin mapping for next stage processing") Cc: stable@vger.kernel.org # 4.9+ Reported-by: ryan.p.norwood@gmail.com Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin-metadata.c | 4 ++-- drivers/md/dm-thin-metadata.h | 2 +- drivers/md/dm-thin.c | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 20b0776e39ef..ed3caceaed07 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1678,7 +1678,7 @@ int dm_thin_remove_range(struct dm_thin_device *td, return r; } -int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) +int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { int r; uint32_t ref_count; @@ -1686,7 +1686,7 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu down_read(&pmd->root_lock); r = dm_sm_get_count(pmd->data_sm, b, &ref_count); if (!r) - *result = (ref_count != 0); + *result = (ref_count > 1); up_read(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 35e954ea20a9..f6be0d733c20 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -195,7 +195,7 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); -int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); +int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e); int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 1f225a1e08dd..c30a7850b2da 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1048,7 +1048,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m * passdown we have to check that these blocks are now unused. */ int r = 0; - bool used = true; + bool shared = true; struct thin_c *tc = m->tc; struct pool *pool = tc->pool; dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin; @@ -1058,11 +1058,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m while (b != end) { /* find start of unmapped run */ for (; b < end; b++) { - r = dm_pool_block_is_used(pool->pmd, b, &used); + r = dm_pool_block_is_shared(pool->pmd, b, &shared); if (r) goto out; - if (!used) + if (!shared) break; } @@ -1071,11 +1071,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m /* find end of run */ for (e = b + 1; e != end; e++) { - r = dm_pool_block_is_used(pool->pmd, e, &used); + r = dm_pool_block_is_shared(pool->pmd, e, &shared); if (r) goto out; - if (used) + if (shared) break; } From b911f1dcb60da14fe61bf3be37adc49f6b5b278e Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 9 Jan 2019 11:57:14 +0100 Subject: [PATCH 064/104] dm crypt: fix parsing of extended IV arguments commit 1856b9f7bcc8e9bdcccc360aabb56fbd4dd6c565 upstream. The dm-crypt cipher specification in a mapping table is defined as: cipher[:keycount]-chainmode-ivmode[:ivopts] or (new crypt API format): capi:cipher_api_spec-ivmode[:ivopts] For ESSIV, the parameter includes hash specification, for example: aes-cbc-essiv:sha256 The implementation expected that additional IV option to never include another dash '-' character. But, with SHA3, there are names like sha3-256; so the mapping table parser fails: dmsetup create test --table "0 8 crypt aes-cbc-essiv:sha3-256 9c1185a5c5e9fc54612808977ee8f5b9e 0 /dev/sdb 0" or (new crypt API format) dmsetup create test --table "0 8 crypt capi:cbc(aes)-essiv:sha3-256 9c1185a5c5e9fc54612808977ee8f5b9e 0 /dev/sdb 0" device-mapper: crypt: Ignoring unexpected additional cipher options device-mapper: table: 253:0: crypt: Error creating IV device-mapper: ioctl: error adding target to table Fix the dm-crypt constructor to ignore additional dash in IV options and also remove a bogus warning (that is ignored anyway). Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f2ec882f96be..5921ecc670c1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2405,9 +2405,21 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key * capi:cipher_api_spec-iv:ivopts */ tmp = &cipher_in[strlen("capi:")]; - cipher_api = strsep(&tmp, "-"); - *ivmode = strsep(&tmp, ":"); - *ivopts = tmp; + + /* Separate IV options if present, it can contain another '-' in hash name */ + *ivopts = strrchr(tmp, ':'); + if (*ivopts) { + **ivopts = '\0'; + (*ivopts)++; + } + /* Parse IV mode */ + *ivmode = strrchr(tmp, '-'); + if (*ivmode) { + **ivmode = '\0'; + (*ivmode)++; + } + /* The rest is crypto API spec */ + cipher_api = tmp; if (*ivmode && !strcmp(*ivmode, "lmk")) cc->tfms_count = 64; @@ -2477,11 +2489,8 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key goto bad_mem; chainmode = strsep(&tmp, "-"); - *ivopts = strsep(&tmp, "-"); - *ivmode = strsep(&*ivopts, ":"); - - if (tmp) - DMWARN("Ignoring unexpected additional cipher options"); + *ivmode = strsep(&tmp, ":"); + *ivopts = tmp; /* * For compatibility with the original dm-crypt mapping format, if From c1bfae3403675429a28a9f9f05c3c18e85737699 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jan 2019 12:09:09 -0500 Subject: [PATCH 065/104] drm/amdgpu: Add APTX quirk for Lenovo laptop commit f15f3eb26e8d9d25ea2330ed1273473df2f039df upstream. Needs ATPX rather than _PR3 for dGPU power control. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=202263 Reviewed-by: Jim Qu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index a028661d9e20..92b11de19581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -576,6 +576,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; From 6d3dabbdf46e9eb16b771465c8da4f0d5763e15d Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 21 Jan 2019 15:48:40 +0300 Subject: [PATCH 066/104] KVM: x86: Fix single-step debugging commit 5cc244a20b86090c087073c124284381cdf47234 upstream. The single-step debugging of KVM guests on x86 is broken: if we run gdb 'stepi' command at the breakpoint when the guest interrupts are enabled, RIP always jumps to native_apic_mem_write(). Then other nasty effects follow. Long investigation showed that on Jun 7, 2017 the commit c8401dda2f0a00cd25c0 ("KVM: x86: fix singlestepping over syscall") introduced the kvm_run.debug corruption: kvm_vcpu_do_singlestep() can be called without X86_EFLAGS_TF set. Let's fix it. Please consider that for -stable. Signed-off-by: Alexander Popov Cc: stable@vger.kernel.org Fixes: c8401dda2f0a00cd25c0 ("KVM: x86: fix singlestepping over syscall") Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 956eecd227f8..5bd616c8824b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6277,8 +6277,7 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && - (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + if (r == EMULATE_DONE && ctxt->tf) kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) From b2598858ac219d68fac3b67883e56be59057a61e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jan 2019 09:22:39 -0800 Subject: [PATCH 067/104] KVM: x86: Fix PV IPIs for 32-bit KVM host commit 1ed199a41c70ad7bfaee8b14f78e791fcf43b278 upstream. The recognition of the KVM_HC_SEND_IPI hypercall was unintentionally wrapped in "#ifdef CONFIG_X86_64", causing 32-bit KVM hosts to reject any and all PV IPI requests despite advertising the feature. This results in all KVM paravirtualized guests hanging during SMP boot due to IPIs never being delivered. Fixes: 4180bf1b655a ("KVM: X86: Implement "send IPI" hypercall") Cc: stable@vger.kernel.org Cc: Wanpeng Li Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5bd616c8824b..5a9a3ebe8fba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6867,10 +6867,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_CLOCK_PAIRING: ret = kvm_pv_clock_pairing(vcpu, a0, a1); break; +#endif case KVM_HC_SEND_IPI: ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); break; -#endif default: ret = -KVM_ENOSYS; break; From bbb8c5c75f6e99138bb1b44c7ed1f083f1d88acd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Jan 2019 09:22:40 -0800 Subject: [PATCH 068/104] KVM: x86: WARN_ONCE if sending a PV IPI returns a fatal error commit de81c2f912ef57917bdc6d63b410c534c3e07982 upstream. KVM hypercalls return a negative value error code in case of a fatal error, e.g. when the hypercall isn't supported or was made with invalid parameters. WARN_ONCE on fatal errors when sending PV IPIs as any such error all but guarantees an SMP system will hang due to a missing IPI. Fixes: aaffcfd1e82d ("KVM: X86: Implement PV IPIs in linux guest") Cc: stable@vger.kernel.org Cc: Wanpeng Li Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d9b71924c23c..7f89d609095a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) #else u64 ipi_bitmap = 0; #endif + long ret; if (cpumask_empty(mask)) return; @@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { - kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, + ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); + WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); min = max = apic_id; ipi_bitmap = 0; } @@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } if (ipi_bitmap) { - kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, + ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); + WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); } local_irq_restore(flags); From d58f5e638b18430593442083ee428d3783a185fc Mon Sep 17 00:00:00 2001 From: Tom Roeder Date: Thu, 24 Jan 2019 13:48:20 -0800 Subject: [PATCH 069/104] kvm: x86/vmx: Use kzalloc for cached_vmcs12 commit 3a33d030daaa7c507e1c12d5adcf828248429593 upstream. This changes the allocation of cached_vmcs12 to use kzalloc instead of kmalloc. This removes the information leak found by Syzkaller (see Reported-by) in this case and prevents similar leaks from happening based on cached_vmcs12. It also changes vmx_get_nested_state to copy out the full 4k VMCS12_SIZE in copy_to_user rather than only the size of the struct. Tested: rebuilt against head, booted, and ran the syszkaller repro https://syzkaller.appspot.com/text?tag=ReproC&x=174efca3400000 without observing any problems. Reported-by: syzbot+ded1696f6b50b615b630@syzkaller.appspotmail.com Fixes: 8fcc4b5923af5de58b80b53a069453b135693304 Cc: stable@vger.kernel.org Signed-off-by: Tom Roeder Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 841740045554..3721a3426f04 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8290,11 +8290,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; - vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_shadow_vmcs12) goto out_cached_shadow_vmcs12; @@ -13984,13 +13984,17 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs) copy_shadow_to_vmcs12(vmx); - if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12))) + /* + * Copy over the full allocated size of vmcs12 rather than just the size + * of the struct. + */ + if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE)) return -EFAULT; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, - get_shadow_vmcs12(vcpu), sizeof(*vmcs12))) + get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; } From f9203cd03125ab13a7c72c418f91ef9bdc4a92b9 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Sat, 20 Oct 2018 23:42:59 +0200 Subject: [PATCH 070/104] KVM/nVMX: Do not validate that posted_intr_desc_addr is page aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 22a7cdcae6a4a3c8974899e62851d270956f58ce upstream. The spec only requires the posted interrupt descriptor address to be 64-bytes aligned (i.e. bits[0:5] == 0). Using page_address_valid also forces the address to be page aligned. Only validate that the address does not cross the maximum physical address without enforcing a page alignment. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: H. Peter Anvin Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Fixes: 6de84e581c0 ("nVMX x86: check posted-interrupt descriptor addresss on vmentry of L2") Signed-off-by: KarimAllah Ahmed Reviewed-by: Jim Mattson Reviewed-by: Krish Sadhuhan Signed-off-by: Radim Krčmář From: Mark Mielke Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3721a3426f04..39a0e34ff676 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11733,7 +11733,7 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, !nested_exit_intr_ack_set(vcpu) || (vmcs12->posted_intr_nv & 0xff00) || (vmcs12->posted_intr_desc_addr & 0x3f) || - (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr)))) + (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))) return -EINVAL; /* tpr shadow is needed by all apicv features. */ From db01b8d40feb341833ac45f9173b569ec34773f3 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 2 Jan 2019 13:56:55 -0800 Subject: [PATCH 071/104] x86/pkeys: Properly copy pkey state at fork() commit a31e184e4f69965c99c04cc5eb8a4920e0c63737 upstream. Memory protection key behavior should be the same in a child as it was in the parent before a fork. But, there is a bug that resets the state in the child at fork instead of preserving it. The creation of new mm's is a bit convoluted. At fork(), the code does: 1. memcpy() the parent mm to initialize child 2. mm_init() to initalize some select stuff stuff 3. dup_mmap() to create true copies that memcpy() did not do right For pkeys two bits of state need to be preserved across a fork: 'execute_only_pkey' and 'pkey_allocation_map'. Those are preserved by the memcpy(), but mm_init() invokes init_new_context() which overwrites 'execute_only_pkey' and 'pkey_allocation_map' with "new" values. The author of the code erroneously believed that init_new_context is *only* called at execve()-time. But, alas, init_new_context() is used at execve() and fork(). The result is that, after a fork(), the child's pkey state ends up looking like it does after an execve(), which is totally wrong. pkeys that are already allocated can be allocated again, for instance. To fix this, add code called by dup_mmap() to copy the pkey state from parent to child explicitly. Also add a comment above init_new_context() to make it more clear to the next poor sod what this code is used for. Fixes: e8c24d3a23a ("x86/pkeys: Allocation/free syscalls") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: peterz@infradead.org Cc: mpe@ellerman.id.au Cc: will.deacon@arm.com Cc: luto@kernel.org Cc: jroedel@suse.de Cc: stable@vger.kernel.org Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Michael Ellerman Cc: Will Deacon Cc: Andy Lutomirski Cc: Joerg Roedel Link: https://lkml.kernel.org/r/20190102215655.7A69518C@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mmu_context.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index eeeb9289c764..2252b63d38b5 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +/* + * Init a new mm. Used on mm copies, like at fork() + * and on mm's that are brand-new, like at execve(). + */ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -228,8 +232,22 @@ do { \ } while (0) #endif +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; +#endif +} + static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { + arch_dup_pkeys(oldmm, mm); paravirt_arch_dup_mmap(oldmm, mm); return ldt_dup_context(oldmm, mm); } From 334c0e1b3cdd6dfbfd319dca47c3d0a1b17be896 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 2 Jan 2019 13:56:57 -0800 Subject: [PATCH 072/104] x86/selftests/pkeys: Fork() to check for state being preserved commit e1812933b17be7814f51b6c310c5d1ced7a9a5f5 upstream. There was a bug where the per-mm pkey state was not being preserved across fork() in the child. fork() is performed in the pkey selftests, but all of the pkey activity is performed in the parent. The child does not perform any actions sensitive to pkey state. To make the test more sensitive to these kinds of bugs, add a fork() where the parent exits, and execution continues in the child. To achieve this let the key exhaustion test not terminate at the first allocation failure and fork after 2*NR_PKEYS loops and continue in the child. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: peterz@infradead.org Cc: mpe@ellerman.id.au Cc: will.deacon@arm.com Cc: luto@kernel.org Cc: jroedel@suse.de Cc: stable@vger.kernel.org Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Michael Ellerman Cc: Will Deacon Cc: Andy Lutomirski Cc: Joerg Roedel Link: https://lkml.kernel.org/r/20190102215657.585704B7@viggo.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/x86/protection_keys.c | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 460b4bdf4c1e..5d546dcdbc80 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -1133,6 +1133,21 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } +void become_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + return; + } + exit(0); +} + /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { @@ -1141,7 +1156,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int nr_allocated_pkeys = 0; int i; - for (i = 0; i < NR_PKEYS*2; i++) { + for (i = 0; i < NR_PKEYS*3; i++) { int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); @@ -1152,20 +1167,26 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", __func__, nr_allocated_pkeys); - break; + } else { + /* + * Ensure the number of successes never + * exceeds the number of keys supported + * in the hardware. + */ + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + + /* + * Make sure that allocation state is properly + * preserved across fork(). + */ + if (i == NR_PKEYS*2) + become_child(); } dprintf3("%s()::%d\n", __func__, __LINE__); - /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - /* * There are 16 pkeys supported in hardware. Three are * allocated by the time we get here: From ed334be9c2ed75b6fc348f5560f69cc418750c6b Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Mon, 7 Jan 2019 11:40:24 +0800 Subject: [PATCH 073/104] x86/kaslr: Fix incorrect i8254 outb() parameters commit 7e6fc2f50a3197d0e82d1c0e86282976c9e6c8a4 upstream. The outb() function takes parameters value and port, in that order. Fix the parameters used in the kalsr i8254 fallback code. Fixes: 5bfce5ef55cb ("x86, kaslr: Provide randomness functions") Signed-off-by: Daniel Drake Signed-off-by: Thomas Gleixner Cc: bp@alien8.de Cc: hpa@zytor.com Cc: linux@endlessm.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190107034024.15005-1-drake@endlessm.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/lib/kaslr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index 79778ab200e4..a53665116458 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -36,8 +36,8 @@ static inline u16 i8254(void) u16 status, timer; do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0, + I8254_PORT_CONTROL); status = inb(I8254_PORT_COUNTER0); timer = inb(I8254_PORT_COUNTER0); timer |= inb(I8254_PORT_COUNTER0) << 8; From dd085f9b1dc15dfbd3e5ced275a92324f5e16a44 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 15 Jan 2019 09:58:16 -0700 Subject: [PATCH 074/104] x86/entry/64/compat: Fix stack switching for XEN PV commit fc24d75a7f91837d7918e40719575951820b2b8f upstream. While in the native case entry into the kernel happens on the trampoline stack, PV Xen kernels get entered with the current thread stack right away. Hence source and destination stacks are identical in that case, and special care is needed. Other than in sync_regs() the copying done on the INT80 path isn't NMI / #MC safe, as either of these events occurring in the middle of the stack copying would clobber data on the (source) stack. There is similar code in interrupt_entry() and nmi(), but there is no fixup required because those code paths are unreachable in XEN PV guests. [ tglx: Sanitized subject, changelog, Fixes tag and stable mail address. Sigh ] Fixes: 7f2590a110b8 ("x86/entry/64: Use a per-CPU trampoline stack for IDT entries") Signed-off-by: Jan Beulich Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Acked-by: Andy Lutomirski Cc: Peter Anvin Cc: xen-devel@lists.xenproject.org> Cc: Boris Ostrovsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/5C3E1128020000780020DFAD@prv1-mh.provo.novell.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64_compat.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 7d0df78db727..40d2834a8101 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -356,7 +356,8 @@ ENTRY(entry_INT80_compat) /* Need to switch before accessing the thread stack. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi + /* In the Xen PV case we already run on the thread stack. */ + ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq 6*8(%rdi) /* regs->ss */ @@ -365,8 +366,9 @@ ENTRY(entry_INT80_compat) pushq 3*8(%rdi) /* regs->cs */ pushq 2*8(%rdi) /* regs->ip */ pushq 1*8(%rdi) /* regs->orig_ax */ - pushq (%rdi) /* pt_regs->di */ +.Lint80_keep_stack: + pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ From 21c0d1621b8d4b20f19f383de03d14095657016f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Jan 2019 14:33:16 +0100 Subject: [PATCH 075/104] posix-cpu-timers: Unbreak timer rearming commit 93ad0fc088c5b4631f796c995bdd27a082ef33a6 upstream. The recent commit which prevented a division by 0 issue in the alarm timer code broke posix CPU timers as an unwanted side effect. The reason is that the common rearm code checks for timer->it_interval being 0 now. What went unnoticed is that the posix cpu timer setup does not initialize timer->it_interval as it stores the interval in CPU timer specific storage. The reason for the separate storage is historical as the posix CPU timers always had a 64bit nanoseconds representation internally while timer->it_interval is type ktime_t which used to be a modified timespec representation on 32bit machines. Instead of reverting the offending commit and fixing the alarmtimer issue in the alarmtimer code, store the interval in timer->it_interval at CPU timer setup time so the common code check works. This also repairs the existing inconistency of the posix CPU timer code which kept a single shot timer armed despite of the interval being 0. The separate storage can be removed in mainline, but that needs to be a separate commit as the current one has to be backported to stable kernels. Fixes: 0e334db6bb4b ("posix-timers: Fix division by zero bug") Reported-by: H.J. Lu Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190111133500.840117406@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-cpu-timers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index ce32cf741b25..76801b9b481e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * set up the signal and overrun bookkeeping. */ timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); + timer->it_interval = ns_to_ktime(timer->it.cpu.incr); /* * This acts as a modification timestamp for the timer, From 6f4db68ab5ceae219fb97b667e4439cac5d820f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Jan 2019 11:49:58 +0100 Subject: [PATCH 076/104] net: sun: cassini: Cleanup license conflict commit 56cb4e5034998b5522a657957321ca64ca2ea0a0 upstream. The recent addition of SPDX license identifiers to the files in drivers/net/ethernet/sun created a licensing conflict. The cassini driver files contain a proper license notice: * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. but the SPDX change added: SPDX-License-Identifier: GPL-2.0 So the file got tagged GPL v2 only while in fact it is licensed under GPL v2 or later. It's nice that people care about the SPDX tags, but they need to be more careful about it. Not everything under (the) sun belongs to ... Fix up the SPDX identifier and remove the boiler plate text as it is redundant. Fixes: c861ef83d771 ("sun: Add SPDX license tags to Sun network drivers") Signed-off-by: Thomas Gleixner Cc: Shannon Nelson Cc: Zhu Yanjun Cc: David S. Miller Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Shannon Nelson Reviewed-by: Zhu Yanjun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sun/cassini.c | 15 +-------------- drivers/net/ethernet/sun/cassini.h | 15 +-------------- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 9020b084b953..7ec4eb74fe21 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1,22 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ /* cassini.c: Sun Microsystems Cassini(+) ethernet driver. * * Copyright (C) 2004 Sun Microsystems Inc. * Copyright (C) 2003 Adrian Sun (asun@darksunrising.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * * This driver uses the sungem driver (c) David Miller * (davem@redhat.com) as its basis. * diff --git a/drivers/net/ethernet/sun/cassini.h b/drivers/net/ethernet/sun/cassini.h index 13f3860496a8..ae5f05f03f88 100644 --- a/drivers/net/ethernet/sun/cassini.h +++ b/drivers/net/ethernet/sun/cassini.h @@ -1,23 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0+ */ /* $Id: cassini.h,v 1.16 2004/08/17 21:15:16 zaumen Exp $ * cassini.h: Definitions for Sun Microsystems Cassini(+) ethernet driver. * * Copyright (C) 2004 Sun Microsystems Inc. * Copyright (c) 2003 Adrian Sun (asun@darksunrising.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * * vendor id: 0x108E (Sun Microsystems, Inc.) * device id: 0xabba (Cassini) * revision ids: 0x01 = Cassini From bdcf74e735b11b9b0309e685ef8d58635f5a0cee Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Jan 2019 14:08:59 +0000 Subject: [PATCH 077/104] irqchip/gic-v3-its: Align PCI Multi-MSI allocation on their size commit 8208d1708b88b412ca97f50a6d951242c88cbbac upstream. The way we allocate events works fine in most cases, except when multiple PCI devices share an ITS-visible DevID, and that one of them is trying to use MultiMSI allocation. In that case, our allocation is not guaranteed to be zero-based anymore, and we have to make sure we allocate it on a boundary that is compatible with the PCI Multi-MSI constraints. Fix this by allocating the full region upfront instead of iterating over the number of MSIs. MSI-X are always allocated one by one, so this shouldn't change anything on that front. Fixes: b48ac83d6bbc2 ("irqchip: GICv3: ITS: MSI support") Cc: stable@vger.kernel.org Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c2df341ff6fa..cf3abb8d284f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2267,13 +2267,14 @@ static void its_free_device(struct its_device *its_dev) kfree(its_dev); } -static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) +static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq) { int idx; - idx = find_first_zero_bit(dev->event_map.lpi_map, - dev->event_map.nr_lpis); - if (idx == dev->event_map.nr_lpis) + idx = bitmap_find_free_region(dev->event_map.lpi_map, + dev->event_map.nr_lpis, + get_count_order(nvecs)); + if (idx < 0) return -ENOSPC; *hwirq = dev->event_map.lpi_base + idx; @@ -2369,21 +2370,21 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, int err; int i; - for (i = 0; i < nr_irqs; i++) { - err = its_alloc_device_irq(its_dev, &hwirq); - if (err) - return err; + err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq); + if (err) + return err; - err = its_irq_gic_domain_alloc(domain, virq + i, hwirq); + for (i = 0; i < nr_irqs; i++) { + err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i); if (err) return err; irq_domain_set_hwirq_and_chip(domain, virq + i, - hwirq, &its_irq_chip, its_dev); + hwirq + i, &its_irq_chip, its_dev); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); pr_debug("ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->event_map.lpi_base), - (int) hwirq, virq + i); + (int)(hwirq + i - its_dev->event_map.lpi_base), + (int)(hwirq + i), virq + i); } return 0; From 8d85aa96c54bb558fe923403084fc825bebef4a5 Mon Sep 17 00:00:00 2001 From: Manfred Schlaegl Date: Wed, 19 Dec 2018 19:39:58 +0100 Subject: [PATCH 078/104] can: dev: __can_get_echo_skb(): fix bogous check for non-existing skb by removing it commit 7b12c8189a3dc50638e7d53714c88007268d47ef upstream. This patch revert commit 7da11ba5c506 ("can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb") After introduction of this change we encountered following new error message on various i.MX plattforms (flexcan): | flexcan 53fc8000.can can0: __can_get_echo_skb: BUG! Trying to echo non | existing skb: can_priv::echo_skb[0] The introduction of the message was a mistake because priv->echo_skb[idx] = NULL is a perfectly valid in following case: If CAN_RAW_LOOPBACK is disabled (setsockopt) in applications, the pkt_type of the tx skb's given to can_put_echo_skb is set to PACKET_LOOPBACK. In this case can_put_echo_skb will not set priv->echo_skb[idx]. It is therefore kept NULL. As additional argument for revert: The order of check and usage of idx was changed. idx is used to access an array element before checking it's boundaries. Signed-off-by: Manfred Schlaegl Fixes: 7da11ba5c506 ("can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 3b3f88ffab53..c05e4d50d43d 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -480,8 +480,6 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -489,20 +487,21 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (!skb) { - netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", - __func__, idx); - return NULL; + if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; + + *len_ptr = len; + priv->echo_skb[idx] = NULL; + + return skb; } - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - cf = (struct canfd_frame *)skb->data; - *len_ptr = cf->len; - priv->echo_skb[idx] = NULL; - - return skb; + return NULL; } /* From 576f474fb2d3f1355298dab6e3d87ccf2da9b800 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sun, 13 Jan 2019 19:31:43 +0100 Subject: [PATCH 079/104] can: bcm: check timer values before ktime conversion commit 93171ba6f1deffd82f381d36cb13177872d023f6 upstream. Kyungtae Kim detected a potential integer overflow in bcm_[rx|tx]_setup() when the conversion into ktime multiplies the given value with NSEC_PER_USEC (1000). Reference: https://marc.info/?l=linux-can&m=154732118819828&w=2 Add a check for the given tv_usec, so that the value stays below one second. Additionally limit the tv_sec value to a reasonable value for CAN related use-cases of 400 days and ensure all values to be positive. Reported-by: Kyungtae Kim Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Cc: linux-stable # >= 2.6.26 Tested-by: Kyungtae Kim Acked-by: Andre Naujoks Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index 0af8f0db892a..79bb8afa9c0c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -67,6 +67,9 @@ */ #define MAX_NFRAMES 256 +/* limit timers to 400 days for sending/timeouts */ +#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) + /* use of last_frames[index].flags */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ @@ -140,6 +143,22 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } +/* check limitations for timeval provided by user */ +static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) +{ + if ((msg_head->ival1.tv_sec < 0) || + (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival1.tv_usec < 0) || + (msg_head->ival1.tv_usec >= USEC_PER_SEC) || + (msg_head->ival2.tv_sec < 0) || + (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival2.tv_usec < 0) || + (msg_head->ival2.tv_usec >= USEC_PER_SEC)) + return true; + + return false; +} + #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) @@ -873,6 +892,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex); if (op) { @@ -1053,6 +1076,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex); if (op) { From 6f4f2a443d87e964ebdbb2543cbd7df285cd6080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 11 Jan 2019 12:20:41 +0100 Subject: [PATCH 080/104] can: flexcan: fix NULL pointer exception during bringup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a55234dabe1f72cf22f9197980751d37e38ba020 upstream. Commit cbffaf7aa09e ("can: flexcan: Always use last mailbox for TX") introduced a loop letting i run up to (including) ARRAY_SIZE(regs->mb) and in the body accessed regs->mb[i] which is an out-of-bounds array access that then resulted in an access to an reserved register area. Later this was changed by commit 0517961ccdf1 ("can: flexcan: Add provision for variable payload size") to iterate a bit differently but still runs one iteration too much resulting to call flexcan_get_mb(priv, priv->mb_count) which results in a WARN_ON and then a NULL pointer exception. This only affects devices compatible with "fsl,p1010-flexcan", "fsl,imx53-flexcan", "fsl,imx35-flexcan", "fsl,imx25-flexcan", "fsl,imx28-flexcan", so newer i.MX SoCs are not affected. Fixes: cbffaf7aa09e ("can: flexcan: Always use last mailbox for TX") Signed-off-by: Uwe Kleine-König Cc: linux-stable # >= 4.20 Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 75ce11395ee8..ae219b8a7754 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1004,7 +1004,7 @@ static int flexcan_chip_start(struct net_device *dev) } } else { /* clear and invalidate unused mailboxes first */ - for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i <= ARRAY_SIZE(regs->mb); i++) { + for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i < ARRAY_SIZE(regs->mb); i++) { priv->write(FLEXCAN_MB_CODE_RX_INACTIVE, ®s->mb[i].can_ctrl); } From 855f7e64169f4da6691b30b1357a2b070515634c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 8 Jan 2019 22:54:59 -0500 Subject: [PATCH 081/104] vt: make vt_console_print() compatible with the unicode screen buffer commit 6609cff65c5b184ab889880ef5d41189611ea05f upstream. When kernel messages are printed to the console, they appear blank on the unicode screen. This is because vt_console_print() is lacking a call to vc_uniscr_putc(). However the later function assumes vc->vc_x is always up to date when called, which is not the case here as vt_console_print() uses it to mark the beginning of the display update. This patch reworks (and simplifies) vt_console_print() so that vc->vc_x is always valid and keeps the start of display update in a local variable instead, which finally allows for adding the missing vc_uniscr_putc() call. Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 47 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 476ec4b1b86c..adf5c896cecb 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2887,8 +2887,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count) unsigned char c; static DEFINE_SPINLOCK(printing_lock); const ushort *start; - ushort cnt = 0; - ushort myx; + ushort start_x, cnt; int kmsg_console; /* console busy or not yet initialized */ @@ -2901,10 +2900,6 @@ static void vt_console_print(struct console *co, const char *b, unsigned count) if (kmsg_console && vc_cons_allocated(kmsg_console - 1)) vc = vc_cons[kmsg_console - 1].d; - /* read `x' only after setting currcons properly (otherwise - the `x' macro will read the x of the foreground console). */ - myx = vc->vc_x; - if (!vc_cons_allocated(fg_console)) { /* impossible */ /* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */ @@ -2919,53 +2914,41 @@ static void vt_console_print(struct console *co, const char *b, unsigned count) hide_cursor(vc); start = (ushort *)vc->vc_pos; - - /* Contrived structure to try to emulate original need_wrap behaviour - * Problems caused when we have need_wrap set on '\n' character */ + start_x = vc->vc_x; + cnt = 0; while (count--) { c = *b++; if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) { - if (cnt > 0) { - if (con_is_visible(vc)) - vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x); - vc->vc_x += cnt; - if (vc->vc_need_wrap) - vc->vc_x--; - cnt = 0; - } + if (cnt && con_is_visible(vc)) + vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, start_x); + cnt = 0; if (c == 8) { /* backspace */ bs(vc); start = (ushort *)vc->vc_pos; - myx = vc->vc_x; + start_x = vc->vc_x; continue; } if (c != 13) lf(vc); cr(vc); start = (ushort *)vc->vc_pos; - myx = vc->vc_x; + start_x = vc->vc_x; if (c == 10 || c == 13) continue; } + vc_uniscr_putc(vc, c); scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos); notify_write(vc, c); cnt++; - if (myx == vc->vc_cols - 1) { - vc->vc_need_wrap = 1; - continue; - } - vc->vc_pos += 2; - myx++; - } - if (cnt > 0) { - if (con_is_visible(vc)) - vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x); - vc->vc_x += cnt; - if (vc->vc_x == vc->vc_cols) { - vc->vc_x--; + if (vc->vc_x == vc->vc_cols - 1) { vc->vc_need_wrap = 1; + } else { + vc->vc_pos += 2; + vc->vc_x++; } } + if (cnt && con_is_visible(vc)) + vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, start_x); set_cursor(vc); notify_update(vc); From 18ef43def81cebcdd3b7391b62de84b777814e08 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 8 Jan 2019 22:55:00 -0500 Subject: [PATCH 082/104] vt: always call notifier with the console lock held commit 7e1d226345f89ad5d0216a9092c81386c89b4983 upstream. Every invocation of notify_write() and notify_update() is performed under the console lock, except for one case. Let's fix that. Signed-off-by: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index adf5c896cecb..41ad46bf22c5 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2767,8 +2767,8 @@ rescan_last_byte: con_flush(vc, draw_from, draw_to, &draw_x); vc_uniscr_debug_check(vc); console_conditional_schedule(); - console_unlock(); notify_update(vc); + console_unlock(); return n; } From 8b4dffe8261a5079952222e5920303f6840ca211 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 8 Jan 2019 22:55:01 -0500 Subject: [PATCH 083/104] vt: invoke notifier on screen size change commit 0c9b1965faddad7534b6974b5b36c4ad37998f8e upstream. User space using poll() on /dev/vcs devices are not awaken when a screen size change occurs. Let's fix that. Signed-off-by: Nicolas Pitre Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 41ad46bf22c5..da335899527b 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1275,6 +1275,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (con_is_visible(vc)) update_screen(vc); vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num); + notify_update(vc); return err; } From ce8d0581ae33b64b86fb6cd30d4385f90ddf19d8 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 14 Jan 2019 16:31:18 +0100 Subject: [PATCH 084/104] drm/meson: Fix atomic mode switching regression commit ce0210c12433031aba3bbacd75f4c02ab77f2004 upstream. Since commit 2bcd3ecab773 when switching mode from X11 (ubuntu mate for example) the display gets blurry, looking like an invalid framebuffer width. This commit fixed atomic crtc modesetting in a totally wrong way and introduced a local unnecessary ->enabled crtc state. This commit reverts the crctc _begin() and _enable() changes and simply adds drm_atomic_helper_commit_tail_rpm as helper. Reported-by: Tony McKahan Suggested-by: Daniel Vetter Fixes: 2bcd3ecab773 ("drm/meson: Fixes for drm_crtc_vblank_on/off support") Signed-off-by: Neil Armstrong Acked-by: Daniel Vetter [narmstrong: fixed blank line issue from checkpatch] Link: https://patchwork.freedesktop.org/patch/msgid/20190114153118.8024-1-narmstrong@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/meson/meson_crtc.c | 23 ++--------------------- drivers/gpu/drm/meson/meson_drv.c | 5 +++++ 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c index 191b314f9e9e..709475d5cc30 100644 --- a/drivers/gpu/drm/meson/meson_crtc.c +++ b/drivers/gpu/drm/meson/meson_crtc.c @@ -45,7 +45,6 @@ struct meson_crtc { struct drm_crtc base; struct drm_pending_vblank_event *event; struct meson_drm *priv; - bool enabled; }; #define to_meson_crtc(x) container_of(x, struct meson_crtc, base) @@ -81,7 +80,8 @@ static const struct drm_crtc_funcs meson_crtc_funcs = { }; -static void meson_crtc_enable(struct drm_crtc *crtc) +static void meson_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct meson_crtc *meson_crtc = to_meson_crtc(crtc); struct drm_crtc_state *crtc_state = crtc->state; @@ -103,20 +103,6 @@ static void meson_crtc_enable(struct drm_crtc *crtc) drm_crtc_vblank_on(crtc); - meson_crtc->enabled = true; -} - -static void meson_crtc_atomic_enable(struct drm_crtc *crtc, - struct drm_crtc_state *old_state) -{ - struct meson_crtc *meson_crtc = to_meson_crtc(crtc); - struct meson_drm *priv = meson_crtc->priv; - - DRM_DEBUG_DRIVER("\n"); - - if (!meson_crtc->enabled) - meson_crtc_enable(crtc); - priv->viu.osd1_enabled = true; } @@ -142,8 +128,6 @@ static void meson_crtc_atomic_disable(struct drm_crtc *crtc, crtc->state->event = NULL; } - - meson_crtc->enabled = false; } static void meson_crtc_atomic_begin(struct drm_crtc *crtc, @@ -152,9 +136,6 @@ static void meson_crtc_atomic_begin(struct drm_crtc *crtc, struct meson_crtc *meson_crtc = to_meson_crtc(crtc); unsigned long flags; - if (crtc->state->enable && !meson_crtc->enabled) - meson_crtc_enable(crtc); - if (crtc->state->event) { WARN_ON(drm_crtc_vblank_get(crtc) != 0); diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index d3443125e661..bf5f294f172f 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -82,6 +82,10 @@ static const struct drm_mode_config_funcs meson_mode_config_funcs = { .fb_create = drm_gem_fb_create, }; +static const struct drm_mode_config_helper_funcs meson_mode_config_helpers = { + .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, +}; + static irqreturn_t meson_irq(int irq, void *arg) { struct drm_device *dev = arg; @@ -246,6 +250,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) drm->mode_config.max_width = 3840; drm->mode_config.max_height = 2160; drm->mode_config.funcs = &meson_mode_config_funcs; + drm->mode_config.helper_private = &meson_mode_config_helpers; /* Hardware Initialization */ From 7da6cd690c436cad101b23e1e54aa3f27f55088b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 28 Jan 2019 21:28:19 +0100 Subject: [PATCH 085/104] bpf: improve verifier branch analysis [ commit 4f7b3e82589e0de723780198ec7983e427144c0a upstream ] pathological bpf programs may try to force verifier to explode in the number of branch states: 20: (d5) if r1 s<= 0x24000028 goto pc+0 21: (b5) if r0 <= 0xe1fa20 goto pc+2 22: (d5) if r1 s<= 0x7e goto pc+0 23: (b5) if r0 <= 0xe880e000 goto pc+0 24: (c5) if r0 s< 0x2100ecf4 goto pc+0 25: (d5) if r1 s<= 0xe880e000 goto pc+1 26: (c5) if r0 s< 0xf4041810 goto pc+0 27: (d5) if r1 s<= 0x1e007e goto pc+0 28: (b5) if r0 <= 0xe86be000 goto pc+0 29: (07) r0 += 16614 30: (c5) if r0 s< 0x6d0020da goto pc+0 31: (35) if r0 >= 0x2100ecf4 goto pc+0 Teach verifier to recognize always taken and always not taken branches. This analysis is already done for == and != comparison. Expand it to all other branches. It also helps real bpf programs to be verified faster: before after bpf_lb-DLB_L3.o 2003 1940 bpf_lb-DLB_L4.o 3173 3089 bpf_lb-DUNKNOWN.o 1080 1065 bpf_lxc-DDROP_ALL.o 29584 28052 bpf_lxc-DUNKNOWN.o 36916 35487 bpf_netdev.o 11188 10864 bpf_overlay.o 6679 6643 bpf_lcx_jit.o 39555 38437 Reported-by: Anatoly Trosinenko Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Edward Cree Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 93 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 341806668f03..3d093003c723 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3475,6 +3475,79 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, } } +/* compute branch direction of the expression "if (reg opcode val) goto target;" + * and return: + * 1 - branch will be taken and "goto target" will be executed + * 0 - branch will not be taken and fall-through to next insn + * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] + */ +static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) +{ + if (__is_pointer_value(false, reg)) + return -1; + + switch (opcode) { + case BPF_JEQ: + if (tnum_is_const(reg->var_off)) + return !!tnum_equals_const(reg->var_off, val); + break; + case BPF_JNE: + if (tnum_is_const(reg->var_off)) + return !tnum_equals_const(reg->var_off, val); + break; + case BPF_JGT: + if (reg->umin_value > val) + return 1; + else if (reg->umax_value <= val) + return 0; + break; + case BPF_JSGT: + if (reg->smin_value > (s64)val) + return 1; + else if (reg->smax_value < (s64)val) + return 0; + break; + case BPF_JLT: + if (reg->umax_value < val) + return 1; + else if (reg->umin_value >= val) + return 0; + break; + case BPF_JSLT: + if (reg->smax_value < (s64)val) + return 1; + else if (reg->smin_value >= (s64)val) + return 0; + break; + case BPF_JGE: + if (reg->umin_value >= val) + return 1; + else if (reg->umax_value < val) + return 0; + break; + case BPF_JSGE: + if (reg->smin_value >= (s64)val) + return 1; + else if (reg->smax_value < (s64)val) + return 0; + break; + case BPF_JLE: + if (reg->umax_value <= val) + return 1; + else if (reg->umin_value > val) + return 0; + break; + case BPF_JSLE: + if (reg->smax_value <= (s64)val) + return 1; + else if (reg->smin_value > (s64)val) + return 0; + break; + } + + return -1; +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -3868,21 +3941,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, dst_reg = ®s[insn->dst_reg]; - /* detect if R == 0 where R was initialized to zero earlier */ - if (BPF_SRC(insn->code) == BPF_K && - (opcode == BPF_JEQ || opcode == BPF_JNE) && - dst_reg->type == SCALAR_VALUE && - tnum_is_const(dst_reg->var_off)) { - if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) || - (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) { - /* if (imm == imm) goto pc+off; - * only follow the goto, ignore fall-through - */ + if (BPF_SRC(insn->code) == BPF_K) { + int pred = is_branch_taken(dst_reg, insn->imm, opcode); + + if (pred == 1) { + /* only follow the goto, ignore fall-through */ *insn_idx += insn->off; return 0; - } else { - /* if (imm != imm) goto pc+off; - * only follow fall-through branch, since + } else if (pred == 0) { + /* only follow fall-through branch, since * that's where the program will go */ return 0; From 437112946263de65ac32be20a040ce77b31926b7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 28 Jan 2019 21:28:20 +0100 Subject: [PATCH 086/104] bpf: add per-insn complexity limit [ commit ceefbc96fa5c5b975d87bf8e89ba8416f6b764d9 upstream ] malicious bpf program may try to force the verifier to remember a lot of distinct verifier states. Put a limit to number of per-insn 'struct bpf_verifier_state'. Note that hitting the limit doesn't reject the program. It potentially makes the verifier do more steps to analyze the program. It means that malicious programs will hit BPF_COMPLEXITY_LIMIT_INSNS sooner instead of spending cpu time walking long link list. The limit of BPF_COMPLEXITY_LIMIT_STATES==64 affects cilium progs with slight increase in number of "steps" it takes to successfully verify the programs: before after bpf_lb-DLB_L3.o 1940 1940 bpf_lb-DLB_L4.o 3089 3089 bpf_lb-DUNKNOWN.o 1065 1065 bpf_lxc-DDROP_ALL.o 28052 | 28162 bpf_lxc-DUNKNOWN.o 35487 | 35541 bpf_netdev.o 10864 10864 bpf_overlay.o 6643 6643 bpf_lcx_jit.o 38437 38437 But it also makes malicious program to be rejected in 0.4 seconds vs 6.5 Hence apply this limit to unprivileged programs only. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Edward Cree Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3d093003c723..2bbb98535b70 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -156,6 +156,7 @@ struct bpf_verifier_stack_elem { #define BPF_COMPLEXITY_LIMIT_INSNS 131072 #define BPF_COMPLEXITY_LIMIT_STACK 1024 +#define BPF_COMPLEXITY_LIMIT_STATES 64 #define BPF_MAP_PTR_UNPRIV 1UL #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ @@ -4735,7 +4736,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state; - int i, j, err; + int i, j, err, states_cnt = 0; sl = env->explored_states[insn_idx]; if (!sl) @@ -4762,8 +4763,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 1; } sl = sl->next; + states_cnt++; } + if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) + return 0; + /* there were no equivalent states, remember current one. * technically the current state is not proven to be safe yet, * but it will either reach outer most bpf_exit (which means it's safe) From 333a31c89ae2d991a8a54034e369526cbb384b13 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:21 +0100 Subject: [PATCH 087/104] bpf: move {prev_,}insn_idx into verifier env [ commit c08435ec7f2bc8f4109401f696fd55159b4b40cb upstream ] Move prev_insn_idx and insn_idx from the do_check() function into the verifier environment, so they can be read inside the various helper functions for handling the instructions. It's easier to put this into the environment rather than changing all call-sites only to pass it along. insn_idx is useful in particular since this later on allows to hold state in env->insn_aux_data[env->insn_idx]. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 2 + kernel/bpf/verifier.c | 75 ++++++++++++++++++------------------ 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1fd6fa822d2c..b1587ffdea7b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -186,6 +186,8 @@ struct bpf_subprog_info { * one verifier_env per bpf_check() call */ struct bpf_verifier_env { + u32 insn_idx; + u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ const struct bpf_verifier_ops *ops; struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2bbb98535b70..6d11320998c6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4816,7 +4816,6 @@ static int do_check(struct bpf_verifier_env *env) struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; int insn_cnt = env->prog->len, i; - int insn_idx, prev_insn_idx = 0; int insn_processed = 0; bool do_print_state = false; @@ -4835,19 +4834,19 @@ static int do_check(struct bpf_verifier_env *env) BPF_MAIN_FUNC /* callsite */, 0 /* frameno */, 0 /* subprogno, zero == main subprog */); - insn_idx = 0; + for (;;) { struct bpf_insn *insn; u8 class; int err; - if (insn_idx >= insn_cnt) { + if (env->insn_idx >= insn_cnt) { verbose(env, "invalid insn idx %d insn_cnt %d\n", - insn_idx, insn_cnt); + env->insn_idx, insn_cnt); return -EFAULT; } - insn = &insns[insn_idx]; + insn = &insns[env->insn_idx]; class = BPF_CLASS(insn->code); if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { @@ -4857,7 +4856,7 @@ static int do_check(struct bpf_verifier_env *env) return -E2BIG; } - err = is_state_visited(env, insn_idx); + err = is_state_visited(env, env->insn_idx); if (err < 0) return err; if (err == 1) { @@ -4865,9 +4864,9 @@ static int do_check(struct bpf_verifier_env *env) if (env->log.level) { if (do_print_state) verbose(env, "\nfrom %d to %d: safe\n", - prev_insn_idx, insn_idx); + env->prev_insn_idx, env->insn_idx); else - verbose(env, "%d: safe\n", insn_idx); + verbose(env, "%d: safe\n", env->insn_idx); } goto process_bpf_exit; } @@ -4880,10 +4879,10 @@ static int do_check(struct bpf_verifier_env *env) if (env->log.level > 1 || (env->log.level && do_print_state)) { if (env->log.level > 1) - verbose(env, "%d:", insn_idx); + verbose(env, "%d:", env->insn_idx); else verbose(env, "\nfrom %d to %d:", - prev_insn_idx, insn_idx); + env->prev_insn_idx, env->insn_idx); print_verifier_state(env, state->frame[state->curframe]); do_print_state = false; } @@ -4894,19 +4893,20 @@ static int do_check(struct bpf_verifier_env *env) .private_data = env, }; - verbose(env, "%d: ", insn_idx); + verbose(env, "%d: ", env->insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); } if (bpf_prog_is_dev_bound(env->prog->aux)) { - err = bpf_prog_offload_verify_insn(env, insn_idx, - prev_insn_idx); + err = bpf_prog_offload_verify_insn(env, env->insn_idx, + env->prev_insn_idx); if (err) return err; } regs = cur_regs(env); - env->insn_aux_data[insn_idx].seen = true; + env->insn_aux_data[env->insn_idx].seen = true; + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -4931,13 +4931,13 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, - insn->dst_reg, false); + err = check_mem_access(env, env->insn_idx, insn->src_reg, + insn->off, BPF_SIZE(insn->code), + BPF_READ, insn->dst_reg, false); if (err) return err; - prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; if (*prev_src_type == NOT_INIT) { /* saw a valid insn @@ -4964,10 +4964,10 @@ static int do_check(struct bpf_verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn_idx, insn); + err = check_xadd(env, env->insn_idx, insn); if (err) return err; - insn_idx++; + env->insn_idx++; continue; } @@ -4983,13 +4983,13 @@ static int do_check(struct bpf_verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, - insn->src_reg, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, insn->src_reg, false); if (err) return err; - prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; if (*prev_dst_type == NOT_INIT) { *prev_dst_type = dst_reg_type; @@ -5018,9 +5018,9 @@ static int do_check(struct bpf_verifier_env *env) } /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, - -1, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, -1, false); if (err) return err; @@ -5038,9 +5038,9 @@ static int do_check(struct bpf_verifier_env *env) } if (insn->src_reg == BPF_PSEUDO_CALL) - err = check_func_call(env, insn, &insn_idx); + err = check_func_call(env, insn, &env->insn_idx); else - err = check_helper_call(env, insn->imm, insn_idx); + err = check_helper_call(env, insn->imm, env->insn_idx); if (err) return err; @@ -5053,7 +5053,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - insn_idx += insn->off + 1; + env->insn_idx += insn->off + 1; continue; } else if (opcode == BPF_EXIT) { @@ -5067,8 +5067,8 @@ static int do_check(struct bpf_verifier_env *env) if (state->curframe) { /* exit from nested function */ - prev_insn_idx = insn_idx; - err = prepare_func_exit(env, &insn_idx); + env->prev_insn_idx = env->insn_idx; + err = prepare_func_exit(env, &env->insn_idx); if (err) return err; do_print_state = true; @@ -5094,7 +5094,8 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; process_bpf_exit: - err = pop_stack(env, &prev_insn_idx, &insn_idx); + err = pop_stack(env, &env->prev_insn_idx, + &env->insn_idx); if (err < 0) { if (err != -ENOENT) return err; @@ -5104,7 +5105,7 @@ process_bpf_exit: continue; } } else { - err = check_cond_jmp_op(env, insn, &insn_idx); + err = check_cond_jmp_op(env, insn, &env->insn_idx); if (err) return err; } @@ -5121,8 +5122,8 @@ process_bpf_exit: if (err) return err; - insn_idx++; - env->insn_aux_data[insn_idx].seen = true; + env->insn_idx++; + env->insn_aux_data[env->insn_idx].seen = true; } else { verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; @@ -5132,7 +5133,7 @@ process_bpf_exit: return -EINVAL; } - insn_idx++; + env->insn_idx++; } verbose(env, "processed %d insns (limit %d), stack depth ", From b855e310374063360af5fb3bbbd0cc66ca189dc0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:22 +0100 Subject: [PATCH 088/104] bpf: move tmp variable into ax register in interpreter [ commit 144cd91c4c2bced6eb8a7e25e590f6618a11e854 upstream ] This change moves the on-stack 64 bit tmp variable in ___bpf_prog_run() into the hidden ax register. The latter is currently only used in JITs for constant blinding as a temporary scratch register, meaning the BPF interpreter will never see the use of ax. Therefore it is safe to use it for the cases where tmp has been used earlier. This is needed to later on allow restricted hidden use of ax in both interpreter and JITs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/filter.h | 3 ++- kernel/bpf/core.c | 34 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index ec90d5255cf7..81420a0efdbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -60,7 +60,8 @@ struct sock_reuseport; * constants. See JIT pre-step in bpf_jit_blind_constants(). */ #define BPF_REG_AX MAX_BPF_REG -#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) +#define MAX_BPF_JIT_REG MAX_BPF_EXT_REG /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3f5bf1af0826..aefc62ae4a1e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -52,6 +52,7 @@ #define DST regs[insn->dst_reg] #define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] +#define AX regs[BPF_REG_AX] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] #define IMM insn->imm @@ -971,7 +972,6 @@ bool bpf_opcode_in_insntable(u8 code) */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { - u64 tmp; #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z static const void *jumptable[256] = { @@ -1045,36 +1045,36 @@ select_insn: (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: - div64_u64_rem(DST, SRC, &tmp); - DST = tmp; + div64_u64_rem(DST, SRC, &AX); + DST = AX; CONT; ALU_MOD_X: - tmp = (u32) DST; - DST = do_div(tmp, (u32) SRC); + AX = (u32) DST; + DST = do_div(AX, (u32) SRC); CONT; ALU64_MOD_K: - div64_u64_rem(DST, IMM, &tmp); - DST = tmp; + div64_u64_rem(DST, IMM, &AX); + DST = AX; CONT; ALU_MOD_K: - tmp = (u32) DST; - DST = do_div(tmp, (u32) IMM); + AX = (u32) DST; + DST = do_div(AX, (u32) IMM); CONT; ALU64_DIV_X: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - tmp = (u32) DST; - do_div(tmp, (u32) SRC); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) SRC); + DST = (u32) AX; CONT; ALU64_DIV_K: DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: - tmp = (u32) DST; - do_div(tmp, (u32) IMM); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) IMM); + DST = (u32) AX; CONT; ALU_END_TO_BE: switch (IMM) { @@ -1330,7 +1330,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ @@ -1343,7 +1343,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ BPF_R1 = r1; \ From 232ac70dd38bfae4899460c342aae7aa0a36aa64 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:23 +0100 Subject: [PATCH 089/104] bpf: enable access to ax register also from verifier rewrite [ commit 9b73bfdd08e73231d6a90ae6db4b46b3fbf56c30 upstream ] Right now we are using BPF ax register in JIT for constant blinding as well as in interpreter as temporary variable. Verifier will not be able to use it simply because its use will get overridden from the former in bpf_jit_blind_insn(). However, it can be made to work in that blinding will be skipped if there is prior use in either source or destination register on the instruction. Taking constraints of ax into account, the verifier is then open to use it in rewrites under some constraints. Note, ax register already has mappings in every eBPF JIT. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/filter.h | 7 +------ kernel/bpf/core.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 81420a0efdbe..1a39d57eb88f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -53,12 +53,7 @@ struct sock_reuseport; #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ -/* Kernel hidden auxiliary/helper register for hardening step. - * Only used by eBPF JITs. It's nothing more than a temporary - * register that JITs use internally, only that here it's part - * of eBPF instructions that have been rewritten for blinding - * constants. See JIT pre-step in bpf_jit_blind_constants(). - */ +/* Kernel hidden auxiliary/helper register. */ #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aefc62ae4a1e..474525e3a9db 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -643,6 +643,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + /* Constraints on AX register: + * + * AX register is inaccessible from user space. It is mapped in + * all JITs, and used here for constant blinding rewrites. It is + * typically "stateless" meaning its contents are only valid within + * the executed instruction, but not across several instructions. + * There are a few exceptions however which are further detailed + * below. + * + * Constant blinding is only used by JITs, not in the interpreter. + * The interpreter uses AX in some occasions as a local temporary + * register e.g. in DIV or MOD instructions. + * + * In restricted circumstances, the verifier can also use the AX + * register for rewrites as long as they do not interfere with + * the above cases! + */ + if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) + goto out; + if (from->imm == 0 && (from->code == (BPF_ALU | BPF_MOV | BPF_K) || from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { From 9e57b2969d4afa8fe08dda6d658963336c97b594 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:24 +0100 Subject: [PATCH 090/104] bpf: restrict map value pointer arithmetic for unprivileged [ commit 0d6303db7970e6f56ae700fa07e11eb510cda125 upstream ] Restrict map value pointer arithmetic for unprivileged users in that arithmetic itself must not go out of bounds as opposed to the actual access later on. Therefore after each adjust_ptr_min_max_vals() with a map value pointer as a destination it will simulate a check_map_access() of 1 byte on the destination and once that fails the program is rejected for unprivileged program loads. We use this later on for masking any pointer arithmetic with the remainder of the map value space. The likelihood of breaking any existing real-world unprivileged eBPF program is very small for this corner case. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6d11320998c6..110ca915cfb6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2880,6 +2880,17 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && + check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", + dst); + return -EACCES; + } + return 0; } From 5332dda94f632e56444d1c081e4683463dd64278 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:25 +0100 Subject: [PATCH 091/104] bpf: restrict stack pointer arithmetic for unprivileged [ commit e4298d25830a866cc0f427d4bccb858e76715859 upstream ] Restrict stack pointer arithmetic for unprivileged users in that arithmetic itself must not go out of bounds as opposed to the actual access later on. Therefore after each adjust_ptr_min_max_vals() with a stack pointer as a destination we simulate a check_stack_access() of 1 byte on the destination and once that fails the program is rejected for unprivileged program loads. This is analog to map value pointer arithmetic and needed for masking later on. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 63 ++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 110ca915cfb6..aa2944d54e7a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1238,6 +1238,31 @@ static int check_stack_read(struct bpf_verifier_env *env, } } +static int check_stack_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int off, int size) +{ + /* Stack accesses must be at a fixed offset, so that we + * can determine what type of data were returned. See + * check_stack_read(). + */ + if (!tnum_is_const(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "variable stack access var_off=%s off=%d size=%d", + tn_buf, off, size); + return -EACCES; + } + + if (off >= 0 || off < -MAX_BPF_STACK) { + verbose(env, "invalid stack off=%d size=%d\n", off, size); + return -EACCES; + } + + return 0; +} + /* check read/write into map element returned by bpf_map_lookup_elem() */ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed) @@ -1736,24 +1761,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } } else if (reg->type == PTR_TO_STACK) { - /* stack accesses must be at a fixed offset, so that we can - * determine what type of data were returned. - * See check_stack_read(). - */ - if (!tnum_is_const(reg->var_off)) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable stack access var_off=%s off=%d size=%d", - tn_buf, off, size); - return -EACCES; - } off += reg->var_off.value; - if (off >= 0 || off < -MAX_BPF_STACK) { - verbose(env, "invalid stack off=%d size=%d\n", off, - size); - return -EACCES; - } + err = check_stack_access(env, reg, off, size); + if (err) + return err; state = func(env, reg); err = update_stack_depth(env, state, off); @@ -2884,11 +2895,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* For unprivileged we require that resulting offset must be in bounds * in order to be able to sanitize access later on. */ - if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && - check_map_access(env, dst, dst_reg->off, 1, false)) { - verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", - dst); - return -EACCES; + if (!env->allow_ptr_leaks) { + if (dst_reg->type == PTR_TO_MAP_VALUE && + check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } else if (dst_reg->type == PTR_TO_STACK && + check_stack_access(env, dst_reg, dst_reg->off + + dst_reg->var_off.value, 1)) { + verbose(env, "R%d stack pointer arithmetic goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } } return 0; From 44f8fc6499807c03d30921486e059b951dcea63d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:26 +0100 Subject: [PATCH 092/104] bpf: restrict unknown scalars of mixed signed bounds for unprivileged [ commit 9d7eceede769f90b66cfa06ad5b357140d5141ed upstream ] For unknown scalars of mixed signed bounds, meaning their smin_value is negative and their smax_value is positive, we need to reject arithmetic with pointer to map value. For unprivileged the goal is to mask every map pointer arithmetic and this cannot reliably be done when it is unknown at verification time whether the scalar value is negative or positive. Given this is a corner case, the likelihood of breaking should be very small. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aa2944d54e7a..fbaa3b9e1d71 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2712,8 +2712,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); - u32 dst = insn->dst_reg; dst_reg = ®s[dst]; @@ -2749,6 +2749,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst); return -EACCES; } + if (ptr_reg->type == PTR_TO_MAP_VALUE && + !env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { + verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", + off_reg == dst_reg ? dst : src); + return -EACCES; + } /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. * The id may be overwritten later if we create a new variable offset. From 4f7f708d0e6c7ba7dc3387b3561d950a3a2ff931 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:27 +0100 Subject: [PATCH 093/104] bpf: fix check_map_access smin_value test when pointer contains offset [ commit b7137c4eab85c1cf3d46acdde90ce1163b28c873 upstream ] In check_map_access() we probe actual bounds through __check_map_access() with offset of reg->smin_value + off for lower bound and offset of reg->umax_value + off for the upper bound. However, even though the reg->smin_value could have a negative value, the final result of the sum with off could be positive when pointer arithmetic with known and unknown scalars is combined. In this case we reject the program with an error such as "R min value is negative, either use unsigned index or do a if (index >=0) check." even though the access itself would be fine. Therefore extend the check to probe whether the actual resulting reg->smin_value + off is less than zero. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fbaa3b9e1d71..f9d5aea4891d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1294,13 +1294,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, */ if (env->log.level) print_verifier_state(env, state); + /* The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our * index'es we need to make sure that whatever we use * will have a set floor within our range. */ - if (reg->smin_value < 0) { + if (reg->smin_value < 0 && + (reg->smin_value == S64_MIN || + (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || + reg->smin_value + off < 0)) { verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; From f92a819b4cbef8c9527d9797110544b2055a4b96 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:28 +0100 Subject: [PATCH 094/104] bpf: prevent out of bounds speculation on pointer arithmetic [ commit 979d63d50c0c0f7bc537bf821e056cc9fe5abd38 upstream ] Jann reported that the original commit back in b2157399cc98 ("bpf: prevent out-of-bounds speculation") was not sufficient to stop CPU from speculating out of bounds memory access: While b2157399cc98 only focussed on masking array map access for unprivileged users for tail calls and data access such that the user provided index gets sanitized from BPF program and syscall side, there is still a more generic form affected from BPF programs that applies to most maps that hold user data in relation to dynamic map access when dealing with unknown scalars or "slow" known scalars as access offset, for example: - Load a map value pointer into R6 - Load an index into R7 - Do a slow computation (e.g. with a memory dependency) that loads a limit into R8 (e.g. load the limit from a map for high latency, then mask it to make the verifier happy) - Exit if R7 >= R8 (mispredicted branch) - Load R0 = R6[R7] - Load R0 = R6[R0] For unknown scalars there are two options in the BPF verifier where we could derive knowledge from in order to guarantee safe access to the memory: i) While /<=/>= variants won't allow to derive any lower or upper bounds from the unknown scalar where it would be safe to add it to the map value pointer, it is possible through ==/!= test however. ii) another option is to transform the unknown scalar into a known scalar, for example, through ALU ops combination such as R &= followed by R |= or any similar combination where the original information from the unknown scalar would be destroyed entirely leaving R with a constant. The initial slow load still precedes the latter ALU ops on that register, so the CPU executes speculatively from that point. Once we have the known scalar, any compare operation would work then. A third option only involving registers with known scalars could be crafted as described in [0] where a CPU port (e.g. Slow Int unit) would be filled with many dependent computations such that the subsequent condition depending on its outcome has to wait for evaluation on its execution port and thereby executing speculatively if the speculated code can be scheduled on a different execution port, or any other form of mistraining as described in [1], for example. Given this is not limited to only unknown scalars, not only map but also stack access is affected since both is accessible for unprivileged users and could potentially be used for out of bounds access under speculation. In order to prevent any of these cases, the verifier is now sanitizing pointer arithmetic on the offset such that any out of bounds speculation would be masked in a way where the pointer arithmetic result in the destination register will stay unchanged, meaning offset masked into zero similar as in array_index_nospec() case. With regards to implementation, there are three options that were considered: i) new insn for sanitation, ii) push/pop insn and sanitation as inlined BPF, iii) reuse of ax register and sanitation as inlined BPF. Option i) has the downside that we end up using from reserved bits in the opcode space, but also that we would require each JIT to emit masking as native arch opcodes meaning mitigation would have slow adoption till everyone implements it eventually which is counter-productive. Option ii) and iii) have both in common that a temporary register is needed in order to implement the sanitation as inlined BPF since we are not allowed to modify the source register. While a push / pop insn in ii) would be useful to have in any case, it requires once again that every JIT needs to implement it first. While possible, amount of changes needed would also be unsuitable for a -stable patch. Therefore, the path which has fewer changes, less BPF instructions for the mitigation and does not require anything to be changed in the JITs is option iii) which this work is pursuing. The ax register is already mapped to a register in all JITs (modulo arm32 where it's mapped to stack as various other BPF registers there) and used in constant blinding for JITs-only so far. It can be reused for verifier rewrites under certain constraints. The interpreter's tmp "register" has therefore been remapped into extending the register set with hidden ax register and reusing that for a number of instructions that needed the prior temporary variable internally (e.g. div, mod). This allows for zero increase in stack space usage in the interpreter, and enables (restricted) generic use in rewrites otherwise as long as such a patchlet does not make use of these instructions. The sanitation mask is dynamic and relative to the offset the map value or stack pointer currently holds. There are various cases that need to be taken under consideration for the masking, e.g. such operation could look as follows: ptr += val or val += ptr or ptr -= val. Thus, the value to be sanitized could reside either in source or in destination register, and the limit is different depending on whether the ALU op is addition or subtraction and depending on the current known and bounded offset. The limit is derived as follows: limit := max_value_size - (smin_value + off). For subtraction: limit := umax_value + off. This holds because we do not allow any pointer arithmetic that would temporarily go out of bounds or would have an unknown value with mixed signed bounds where it is unclear at verification time whether the actual runtime value would be either negative or positive. For example, we have a derived map pointer value with constant offset and bounded one, so limit based on smin_value works because the verifier requires that statically analyzed arithmetic on the pointer must be in bounds, and thus it checks if resulting smin_value + off and umax_value + off is still within map value bounds at time of arithmetic in addition to time of access. Similarly, for the case of stack access we derive the limit as follows: MAX_BPF_STACK + off for subtraction and -off for the case of addition where off := ptr_reg->off + ptr_reg->var_off.value. Subtraction is a special case for the masking which can be in form of ptr += -val, ptr -= -val, or ptr -= val. In the first two cases where we know that the value is negative, we need to temporarily negate the value in order to do the sanitation on a positive value where we later swap the ALU op, and restore original source register if the value was in source. The sanitation of pointer arithmetic alone is still not fully sufficient as is, since a scenario like the following could happen ... PTR += 0x1000 (e.g. K-based imm) PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON PTR += 0x1000 PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON [...] ... which under speculation could end up as ... PTR += 0x1000 PTR -= 0 [ truncated by mitigation ] PTR += 0x1000 PTR -= 0 [ truncated by mitigation ] [...] ... and therefore still access out of bounds. To prevent such case, the verifier is also analyzing safety for potential out of bounds access under speculative execution. Meaning, it is also simulating pointer access under truncation. We therefore "branch off" and push the current verification state after the ALU operation with known 0 to the verification stack for later analysis. Given the current path analysis succeeded it is likely that the one under speculation can be pruned. In any case, it is also subject to existing complexity limits and therefore anything beyond this point will be rejected. In terms of pruning, it needs to be ensured that the verification state from speculative execution simulation must never prune a non-speculative execution path, therefore, we mark verifier state accordingly at the time of push_stack(). If verifier detects out of bounds access under speculative execution from one of the possible paths that includes a truncation, it will reject such program. Given we mask every reg-based pointer arithmetic for unprivileged programs, we've been looking into how it could affect real-world programs in terms of size increase. As the majority of programs are targeted for privileged-only use case, we've unconditionally enabled masking (with its alu restrictions on top of it) for privileged programs for the sake of testing in order to check i) whether they get rejected in its current form, and ii) by how much the number of instructions and size will increase. We've tested this by using Katran, Cilium and test_l4lb from the kernel selftests. For Katran we've evaluated balancer_kern.o, Cilium bpf_lxc.o and an older test object bpf_lxc_opt_-DUNKNOWN.o and l4lb we've used test_l4lb.o as well as test_l4lb_noinline.o. We found that none of the programs got rejected by the verifier with this change, and that impact is rather minimal to none. balancer_kern.o had 13,904 bytes (1,738 insns) xlated and 7,797 bytes JITed before and after the change. Most complex program in bpf_lxc.o had 30,544 bytes (3,817 insns) xlated and 18,538 bytes JITed before and after and none of the other tail call programs in bpf_lxc.o had any changes either. For the older bpf_lxc_opt_-DUNKNOWN.o object we found a small increase from 20,616 bytes (2,576 insns) and 12,536 bytes JITed before to 20,664 bytes (2,582 insns) and 12,558 bytes JITed after the change. Other programs from that object file had similar small increase. Both test_l4lb.o had no change and remained at 6,544 bytes (817 insns) xlated and 3,401 bytes JITed and for test_l4lb_noinline.o constant at 5,080 bytes (634 insns) xlated and 3,313 bytes JITed. This can be explained in that LLVM typically optimizes stack based pointer arithmetic by using K-based operations and that use of dynamic map access is not overly frequent. However, in future we may decide to optimize the algorithm further under known guarantees from branch and value speculation. Latter seems also unclear in terms of prediction heuristics that today's CPUs apply as well as whether there could be collisions in e.g. the predictor's Value History/Pattern Table for triggering out of bounds access, thus masking is performed unconditionally at this point but could be subject to relaxation later on. We were generally also brainstorming various other approaches for mitigation, but the blocker was always lack of available registers at runtime and/or overhead for runtime tracking of limits belonging to a specific pointer. Thus, we found this to be minimally intrusive under given constraints. With that in place, a simple example with sanitized access on unprivileged load at post-verification time looks as follows: # bpftool prog dump xlated id 282 [...] 28: (79) r1 = *(u64 *)(r7 +0) 29: (79) r2 = *(u64 *)(r7 +8) 30: (57) r1 &= 15 31: (79) r3 = *(u64 *)(r0 +4608) 32: (57) r3 &= 1 33: (47) r3 |= 1 34: (2d) if r2 > r3 goto pc+19 35: (b4) (u32) r11 = (u32) 20479 | 36: (1f) r11 -= r2 | Dynamic sanitation for pointer 37: (4f) r11 |= r2 | arithmetic with registers 38: (87) r11 = -r11 | containing bounded or known 39: (c7) r11 s>>= 63 | scalars in order to prevent 40: (5f) r11 &= r2 | out of bounds speculation. 41: (0f) r4 += r11 | 42: (71) r4 = *(u8 *)(r4 +0) 43: (6f) r4 <<= r1 [...] For the case where the scalar sits in the destination register as opposed to the source register, the following code is emitted for the above example: [...] 16: (b4) (u32) r11 = (u32) 20479 17: (1f) r11 -= r2 18: (4f) r11 |= r2 19: (87) r11 = -r11 20: (c7) r11 s>>= 63 21: (5f) r2 &= r11 22: (0f) r2 += r0 23: (61) r0 = *(u32 *)(r2 +0) [...] JIT blinding example with non-conflicting use of r10: [...] d5: je 0x0000000000000106 _ d7: mov 0x0(%rax),%edi | da: mov $0xf153246,%r10d | Index load from map value and e0: xor $0xf153259,%r10 | (const blinded) mask with 0x1f. e7: and %r10,%rdi |_ ea: mov $0x2f,%r10d | f0: sub %rdi,%r10 | Sanitized addition. Both use r10 f3: or %rdi,%r10 | but do not interfere with each f6: neg %r10 | other. (Neither do these instructions f9: sar $0x3f,%r10 | interfere with the use of ax as temp fd: and %r10,%rdi | in interpreter.) 100: add %rax,%rdi |_ 103: mov 0x0(%rdi),%eax [...] Tested that it fixes Jann's reproducer, and also checked that test_verifier and test_progs suite with interpreter, JIT and JIT with hardening enabled on x86-64 and arm64 runs successfully. [0] Speculose: Analyzing the Security Implications of Speculative Execution in CPUs, Giorgi Maisuradze and Christian Rossow, https://arxiv.org/pdf/1801.04084.pdf [1] A Systematic Evaluation of Transient Execution Attacks and Defenses, Claudio Canella, Jo Van Bulck, Michael Schwarz, Moritz Lipp, Benjamin von Berg, Philipp Ortner, Frank Piessens, Dmitry Evtyushkin, Daniel Gruss, https://arxiv.org/pdf/1811.05441.pdf Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 10 ++ kernel/bpf/verifier.c | 186 +++++++++++++++++++++++++++++++++-- 2 files changed, 189 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b1587ffdea7b..2716dcbfb2f1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -134,6 +134,7 @@ struct bpf_verifier_state { struct bpf_func_state *frame[MAX_CALL_FRAMES]; struct bpf_verifier_state *parent; u32 curframe; + bool speculative; }; /* linked list of verifier states used to prune search */ @@ -142,15 +143,24 @@ struct bpf_verifier_state_list { struct bpf_verifier_state_list *next; }; +/* Possible states for alu_state member. */ +#define BPF_ALU_SANITIZE_SRC 1U +#define BPF_ALU_SANITIZE_DST 2U +#define BPF_ALU_NEG_VALUE (1U << 2) +#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ + BPF_ALU_SANITIZE_DST) + struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ + u32 alu_limit; /* limit for add/sub register with pointer */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ + u8 alu_state; /* used in combination with alu_limit */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f9d5aea4891d..9ab0adedb174 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -466,6 +466,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; } + dst_state->speculative = src->speculative; dst_state->curframe = src->curframe; dst_state->parent = src->parent; for (i = 0; i <= src->curframe; i++) { @@ -511,7 +512,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, } static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx) + int insn_idx, int prev_insn_idx, + bool speculative) { struct bpf_verifier_state *cur = env->cur_state; struct bpf_verifier_stack_elem *elem; @@ -529,6 +531,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, err = copy_verifier_state(&elem->st, cur); if (err) goto err; + elem->st.speculative |= speculative; if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { verbose(env, "BPF program is too complex\n"); goto err; @@ -2698,6 +2701,102 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, return true; } +static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + +static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, + u32 *ptr_limit, u8 opcode, bool off_is_neg) +{ + bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); + u32 off; + + switch (ptr_reg->type) { + case PTR_TO_STACK: + off = ptr_reg->off + ptr_reg->var_off.value; + if (mask_to_left) + *ptr_limit = MAX_BPF_STACK + off; + else + *ptr_limit = -off; + return 0; + case PTR_TO_MAP_VALUE: + if (mask_to_left) { + *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + } else { + off = ptr_reg->smin_value + ptr_reg->off; + *ptr_limit = ptr_reg->map_ptr->value_size - off; + } + return 0; + default: + return -EINVAL; + } +} + +static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, + struct bpf_reg_state *dst_reg, + bool off_is_neg) +{ + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_insn_aux_data *aux = cur_aux(env); + bool ptr_is_dst_reg = ptr_reg == dst_reg; + u8 opcode = BPF_OP(insn->code); + u32 alu_state, alu_limit; + struct bpf_reg_state tmp; + bool ret; + + if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) + return 0; + + /* We already marked aux for masking from non-speculative + * paths, thus we got here in the first place. We only care + * to explore bad access from here. + */ + if (vstate->speculative) + goto do_sim; + + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) + return 0; + + /* If we arrived here from different branches with different + * limits to sanitize, then this won't work. + */ + if (aux->alu_state && + (aux->alu_state != alu_state || + aux->alu_limit != alu_limit)) + return -EACCES; + + /* Corresponding fixup done in fixup_bpf_calls(). */ + aux->alu_state = alu_state; + aux->alu_limit = alu_limit; + +do_sim: + /* Simulate and find potential out-of-bounds access under + * speculative execution from truncation as a result of + * masking when off was not within expected range. If off + * sits in dst, then we temporarily need to move ptr there + * to simulate dst (== 0) +/-= ptr. Needed, for example, + * for cases where we use K-based arithmetic in one direction + * and truncated reg-based in the other in order to explore + * bad access. + */ + if (!ptr_is_dst_reg) { + tmp = *dst_reg; + *dst_reg = *ptr_reg; + } + ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); + if (!ptr_is_dst_reg) + *dst_reg = tmp; + return !ret ? -EFAULT : 0; +} + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -2718,6 +2817,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); + int ret; dst_reg = ®s[dst]; @@ -2772,6 +2872,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: + ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + if (ret < 0) { + verbose(env, "R%d tried to add from different maps or paths\n", dst); + return ret; + } /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -2822,6 +2927,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: + ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + if (ret < 0) { + verbose(env, "R%d tried to sub from different maps or paths\n", dst); + return ret; + } if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -3997,7 +4107,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } } - other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); + other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, + false); if (!other_branch) return -EFAULT; other_branch_regs = other_branch->frame[other_branch->curframe]->regs; @@ -4712,6 +4823,12 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->curframe != cur->curframe) return false; + /* Verification state from speculative execution simulation + * must never prune a non-speculative execution one. + */ + if (old->speculative && !cur->speculative) + return false; + /* for states to be equal callsites have to be the same * and all frame states need to be equivalent */ @@ -4863,7 +4980,7 @@ static int do_check(struct bpf_verifier_env *env) if (!state) return -ENOMEM; state->curframe = 0; - state->parent = NULL; + state->speculative = false; state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); if (!state->frame[0]) { kfree(state); @@ -4903,8 +5020,10 @@ static int do_check(struct bpf_verifier_env *env) /* found equivalent state, can prune the search */ if (env->log.level) { if (do_print_state) - verbose(env, "\nfrom %d to %d: safe\n", - env->prev_insn_idx, env->insn_idx); + verbose(env, "\nfrom %d to %d%s: safe\n", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); else verbose(env, "%d: safe\n", env->insn_idx); } @@ -4921,8 +5040,10 @@ static int do_check(struct bpf_verifier_env *env) if (env->log.level > 1) verbose(env, "%d:", env->insn_idx); else - verbose(env, "\nfrom %d to %d:", - env->prev_insn_idx, env->insn_idx); + verbose(env, "\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); print_verifier_state(env, state->frame[state->curframe]); do_print_state = false; } @@ -5869,6 +5990,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) continue; } + if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || + insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { + const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; + const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; + struct bpf_insn insn_buf[16]; + struct bpf_insn *patch = &insn_buf[0]; + bool issrc, isneg; + u32 off_reg; + + aux = &env->insn_aux_data[i + delta]; + if (!aux->alu_state) + continue; + + isneg = aux->alu_state & BPF_ALU_NEG_VALUE; + issrc = (aux->alu_state & BPF_ALU_SANITIZE) == + BPF_ALU_SANITIZE_SRC; + + off_reg = issrc ? insn->src_reg : insn->dst_reg; + if (isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); + *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); + *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); + if (issrc) { + *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, + off_reg); + insn->src_reg = BPF_REG_AX; + } else { + *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, + BPF_REG_AX); + } + if (isneg) + insn->code = insn->code == code_add ? + code_sub : code_add; + *patch++ = *insn; + if (issrc && isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + cnt = patch - insn_buf; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; if (insn->src_reg == BPF_PSEUDO_CALL) From eed84f94ff8d97abcbc5706f6f9427520fd60a10 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:29 +0100 Subject: [PATCH 095/104] bpf: fix sanitation of alu op with pointer / scalar type from different paths [ commit d3bd7413e0ca40b60cf60d4003246d067cafdeda upstream ] While 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") took care of rejecting alu op on pointer when e.g. pointer came from two different map values with different map properties such as value size, Jann reported that a case was not covered yet when a given alu op is used in both "ptr_reg += reg" and "numeric_reg += reg" from different branches where we would incorrectly try to sanitize based on the pointer's limit. Catch this corner case and reject the program instead. Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 61 ++++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2716dcbfb2f1..91393724e933 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -147,6 +147,7 @@ struct bpf_verifier_state_list { #define BPF_ALU_SANITIZE_SRC 1U #define BPF_ALU_SANITIZE_DST 2U #define BPF_ALU_NEG_VALUE (1U << 2) +#define BPF_ALU_NON_POINTER (1U << 3) #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9ab0adedb174..4d81be2d0739 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2734,6 +2734,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, } } +static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) +{ + return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; +} + +static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, + u32 alu_state, u32 alu_limit) +{ + /* If we arrived here from different branches with different + * state or limits to sanitize, then this won't work. + */ + if (aux->alu_state && + (aux->alu_state != alu_state || + aux->alu_limit != alu_limit)) + return -EACCES; + + /* Corresponding fixup done in fixup_bpf_calls(). */ + aux->alu_state = alu_state; + aux->alu_limit = alu_limit; + return 0; +} + +static int sanitize_val_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_insn_aux_data *aux = cur_aux(env); + + if (can_skip_alu_sanitation(env, insn)) + return 0; + + return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); +} + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, @@ -2748,7 +2782,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_reg_state tmp; bool ret; - if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) + if (can_skip_alu_sanitation(env, insn)) return 0; /* We already marked aux for masking from non-speculative @@ -2764,19 +2798,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) return 0; - - /* If we arrived here from different branches with different - * limits to sanitize, then this won't work. - */ - if (aux->alu_state && - (aux->alu_state != alu_state || - aux->alu_limit != alu_limit)) + if (update_alu_sanitation_state(aux, alu_state, alu_limit)) return -EACCES; - - /* Corresponding fixup done in fixup_bpf_calls(). */ - aux->alu_state = alu_state; - aux->alu_limit = alu_limit; - do_sim: /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of @@ -3048,6 +3071,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, s64 smin_val, smax_val; u64 umin_val, umax_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + u32 dst = insn->dst_reg; + int ret; if (insn_bitness == 32) { /* Relevant for 32-bit RSH: Information can propagate towards @@ -3082,6 +3107,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: + ret = sanitize_val_alu(env, insn); + if (ret < 0) { + verbose(env, "R%d tried to add from different pointers or scalars\n", dst); + return ret; + } if (signed_add_overflows(dst_reg->smin_value, smin_val) || signed_add_overflows(dst_reg->smax_value, smax_val)) { dst_reg->smin_value = S64_MIN; @@ -3101,6 +3131,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: + ret = sanitize_val_alu(env, insn); + if (ret < 0) { + verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); + return ret; + } if (signed_sub_overflows(dst_reg->smin_value, smax_val) || signed_sub_overflows(dst_reg->smax_value, smin_val)) { /* Overflow possible, we know nothing */ From 37c9e3ee42398badb7ae1f080b6a596936497f43 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 28 Jan 2019 21:28:30 +0100 Subject: [PATCH 096/104] bpf: fix inner map masking to prevent oob under speculation [ commit 9d5564ddcf2a0f5ba3fa1c3a1f8a1b59ad309553 upstream ] During review I noticed that inner meta map setup for map in map is buggy in that it does not propagate all needed data from the reference map which the verifier is later accessing. In particular one such case is index masking to prevent out of bounds access under speculative execution due to missing the map's unpriv_array/index_mask field propagation. Fix this such that the verifier is generating the correct code for inlined lookups in case of unpriviledged use. Before patch (test_verifier's 'map in map access' dump): # bpftool prog dump xla id 3 0: (62) *(u32 *)(r10 -4) = 0 1: (bf) r2 = r10 2: (07) r2 += -4 3: (18) r1 = map[id:4] 5: (07) r1 += 272 | 6: (61) r0 = *(u32 *)(r2 +0) | 7: (35) if r0 >= 0x1 goto pc+6 | Inlined map in map lookup 8: (54) (u32) r0 &= (u32) 0 | with index masking for 9: (67) r0 <<= 3 | map->unpriv_array. 10: (0f) r0 += r1 | 11: (79) r0 = *(u64 *)(r0 +0) | 12: (15) if r0 == 0x0 goto pc+1 | 13: (05) goto pc+1 | 14: (b7) r0 = 0 | 15: (15) if r0 == 0x0 goto pc+11 16: (62) *(u32 *)(r10 -4) = 0 17: (bf) r2 = r10 18: (07) r2 += -4 19: (bf) r1 = r0 20: (07) r1 += 272 | 21: (61) r0 = *(u32 *)(r2 +0) | Index masking missing (!) 22: (35) if r0 >= 0x1 goto pc+3 | for inner map despite 23: (67) r0 <<= 3 | map->unpriv_array set. 24: (0f) r0 += r1 | 25: (05) goto pc+1 | 26: (b7) r0 = 0 | 27: (b7) r0 = 0 28: (95) exit After patch: # bpftool prog dump xla id 1 0: (62) *(u32 *)(r10 -4) = 0 1: (bf) r2 = r10 2: (07) r2 += -4 3: (18) r1 = map[id:2] 5: (07) r1 += 272 | 6: (61) r0 = *(u32 *)(r2 +0) | 7: (35) if r0 >= 0x1 goto pc+6 | Same inlined map in map lookup 8: (54) (u32) r0 &= (u32) 0 | with index masking due to 9: (67) r0 <<= 3 | map->unpriv_array. 10: (0f) r0 += r1 | 11: (79) r0 = *(u64 *)(r0 +0) | 12: (15) if r0 == 0x0 goto pc+1 | 13: (05) goto pc+1 | 14: (b7) r0 = 0 | 15: (15) if r0 == 0x0 goto pc+12 16: (62) *(u32 *)(r10 -4) = 0 17: (bf) r2 = r10 18: (07) r2 += -4 19: (bf) r1 = r0 20: (07) r1 += 272 | 21: (61) r0 = *(u32 *)(r2 +0) | 22: (35) if r0 >= 0x1 goto pc+4 | Now fixed inlined inner map 23: (54) (u32) r0 &= (u32) 0 | lookup with proper index masking 24: (67) r0 <<= 3 | for map->unpriv_array. 25: (0f) r0 += r1 | 26: (05) goto pc+1 | 27: (b7) r0 = 0 | 28: (b7) r0 = 0 29: (95) exit Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/map_in_map.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 3bfbf4464416..9670ee5ee74e 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) { struct bpf_map *inner_map, *inner_map_meta; + u32 inner_map_meta_size; struct fd f; f = fdget(inner_map_ufd); @@ -35,7 +36,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } - inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + inner_map_meta_size = sizeof(*inner_map_meta); + /* In some cases verifier needs to access beyond just base map. */ + if (inner_map->ops == &array_map_ops) + inner_map_meta_size = sizeof(struct bpf_array); + + inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) { fdput(f); return ERR_PTR(-ENOMEM); @@ -45,9 +51,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->key_size = inner_map->key_size; inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; - inner_map_meta->ops = inner_map->ops; inner_map_meta->max_entries = inner_map->max_entries; + /* Misc members not needed in bpf_map_meta_equal() check. */ + inner_map_meta->ops = inner_map->ops; + if (inner_map->ops == &array_map_ops) { + inner_map_meta->unpriv_array = inner_map->unpriv_array; + container_of(inner_map_meta, struct bpf_array, map)->index_mask = + container_of(inner_map, struct bpf_array, map)->index_mask; + } + fdput(f); return inner_map_meta; } From 48046a0177e6f8d91df2416d8086915c77a30b67 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 11 Jan 2019 15:18:22 +0100 Subject: [PATCH 097/104] s390/smp: Fix calling smp_call_ipl_cpu() from ipl CPU commit 60f1bf29c0b2519989927cae640cd1f50f59dc7f upstream. When calling smp_call_ipl_cpu() from the IPL CPU, we will try to read from pcpu_devices->lowcore. However, due to prefixing, that will result in reading from absolute address 0 on that CPU. We have to go via the actual lowcore instead. This means that right now, we will read lc->nodat_stack == 0 and therfore work on a very wrong stack. This BUG essentially broke rebooting under QEMU TCG (which will report a low address protection exception). And checking under KVM, it is also broken under KVM. With 1 VCPU it can be easily triggered. :/# echo 1 > /proc/sys/kernel/sysrq :/# echo b > /proc/sysrq-trigger [ 28.476745] sysrq: SysRq : Resetting [ 28.476793] Kernel stack overflow. [ 28.476817] CPU: 0 PID: 424 Comm: sh Not tainted 5.0.0-rc1+ #13 [ 28.476820] Hardware name: IBM 2964 NE1 716 (KVM/Linux) [ 28.476826] Krnl PSW : 0400c00180000000 0000000000115c0c (pcpu_delegate+0x12c/0x140) [ 28.476861] R:0 T:1 IO:0 EX:0 Key:0 M:0 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 [ 28.476863] Krnl GPRS: ffffffffffffffff 0000000000000000 000000000010dff8 0000000000000000 [ 28.476864] 0000000000000000 0000000000000000 0000000000ab7090 000003e0006efbf0 [ 28.476864] 000000000010dff8 0000000000000000 0000000000000000 0000000000000000 [ 28.476865] 000000007fffc000 0000000000730408 000003e0006efc58 0000000000000000 [ 28.476887] Krnl Code: 0000000000115bfe: 4170f000 la %r7,0(%r15) [ 28.476887] 0000000000115c02: 41f0a000 la %r15,0(%r10) [ 28.476887] #0000000000115c06: e370f0980024 stg %r7,152(%r15) [ 28.476887] >0000000000115c0c: c0e5fffff86e brasl %r14,114ce8 [ 28.476887] 0000000000115c12: 41f07000 la %r15,0(%r7) [ 28.476887] 0000000000115c16: a7f4ffa8 brc 15,115b66 [ 28.476887] 0000000000115c1a: 0707 bcr 0,%r7 [ 28.476887] 0000000000115c1c: 0707 bcr 0,%r7 [ 28.476901] Call Trace: [ 28.476902] Last Breaking-Event-Address: [ 28.476920] [<0000000000a01c4a>] arch_call_rest_init+0x22/0x80 [ 28.476927] Kernel panic - not syncing: Corrupt kernel stack, can't continue. [ 28.476930] CPU: 0 PID: 424 Comm: sh Not tainted 5.0.0-rc1+ #13 [ 28.476932] Hardware name: IBM 2964 NE1 716 (KVM/Linux) [ 28.476932] Call Trace: Fixes: 2f859d0dad81 ("s390/smp: reduce size of struct pcpu") Cc: stable@vger.kernel.org # 4.0+ Reported-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/smp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e5735ab30488..da02f4087d61 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -371,9 +371,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data) */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { + struct lowcore *lc = pcpu_devices->lowcore; + + if (pcpu_devices[0].address == stap()) + lc = &S390_lowcore; + pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->lowcore->panic_stack - - PANIC_FRAME_OFFSET + PAGE_SIZE); + lc->panic_stack - PANIC_FRAME_OFFSET + PAGE_SIZE); } int smp_find_processor_id(u16 address) From fa9184be67a6c2f3d9ae38a832aa061c33c853cc Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 19 Nov 2018 10:58:51 +0000 Subject: [PATCH 098/104] nvmet-rdma: Add unlikely for response allocated check commit ad1f824948e4ed886529219cf7cd717d078c630d upstream. Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Cc: Raju Rangoju Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e57f3902beb3..d39364916f99 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -196,7 +196,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) { unsigned long flags; - if (rsp->allocated) { + if (unlikely(rsp->allocated)) { kfree(rsp); return; } From 7dbf12973d536a226acf5e18ab9ed6ff6364c876 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 3 Jan 2019 23:05:31 +0530 Subject: [PATCH 099/104] nvmet-rdma: fix null dereference under heavy load commit 5cbab6303b4791a3e6713dfe2c5fda6a867f9adc upstream. Under heavy load if we don't have any pre-allocated rsps left, we dynamically allocate a rsp, but we are not actually allocating memory for nvme_completion (rsp->req.rsp). In such a case, accessing pointer fields (req->rsp->status) in nvmet_req_init() will result in crash. To fix this, allocate the memory for nvme_completion by calling nvmet_rdma_alloc_rsp() Fixes: 8407879c("nvmet-rdma:fix possible bogus dereference under heavy load") Cc: Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Raju Rangoju Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/target/rdma.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index d39364916f99..08f997a390d5 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -139,6 +139,10 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); +static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_rsp *r); +static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_rsp *r); static const struct nvmet_fabrics_ops nvmet_rdma_ops; @@ -182,9 +186,17 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) spin_unlock_irqrestore(&queue->rsps_lock, flags); if (unlikely(!rsp)) { - rsp = kmalloc(sizeof(*rsp), GFP_KERNEL); + int ret; + + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); if (unlikely(!rsp)) return NULL; + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp); + if (unlikely(ret)) { + kfree(rsp); + return NULL; + } + rsp->allocated = true; } @@ -197,6 +209,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) unsigned long flags; if (unlikely(rsp->allocated)) { + nvmet_rdma_free_rsp(rsp->queue->dev, rsp); kfree(rsp); return; } From 6bab957396ce60d2c25d161657c7d3e744989fb5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 25 Jan 2019 19:08:58 +0100 Subject: [PATCH 100/104] Revert "mm, memory_hotplug: initialize struct pages for the full memory section" commit 4aa9fc2a435abe95a1e8d7f8c7b3d6356514b37a upstream. This reverts commit 2830bf6f05fb3e05bc4743274b806c821807a684. The underlying assumption that one sparse section belongs into a single numa node doesn't hold really. Robert Shteynfeld has reported a boot failure. The boot log was not captured but his memory layout is as follows: Early memory node ranges node 1: [mem 0x0000000000001000-0x0000000000090fff] node 1: [mem 0x0000000000100000-0x00000000dbdf8fff] node 1: [mem 0x0000000100000000-0x0000001423ffffff] node 0: [mem 0x0000001424000000-0x0000002023ffffff] This means that node0 starts in the middle of a memory section which is also in node1. memmap_init_zone tries to initialize padding of a section even when it is outside of the given pfn range because there are code paths (e.g. memory hotplug) which assume that the full worth of memory section is always initialized. In this particular case, though, such a range is already intialized and most likely already managed by the page allocator. Scribbling over those pages corrupts the internal state and likely blows up when any of those pages gets used. Reported-by: Robert Shteynfeld Fixes: 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") Cc: stable@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 93e73ccb4dec..9e45553cabd6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5538,18 +5538,6 @@ not_early: cond_resched(); } } -#ifdef CONFIG_SPARSEMEM - /* - * If the zone does not span the rest of the section then - * we should at least initialize those pages. Otherwise we - * could blow up on a poisoned page in some paths which depend - * on full sections being initialized (e.g. memory hotplug). - */ - while (end_pfn % PAGES_PER_SECTION) { - __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); - end_pfn++; - } -#endif } static void __meminit zone_init_free_lists(struct zone *zone) From 25ad17d692ad54c3c33b2a31e5ce2a82e38de14e Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 10 Jan 2019 12:39:55 -0800 Subject: [PATCH 101/104] usb: dwc3: gadget: Clear req->needs_extra_trb flag on cleanup commit bd6742249b9ca918565e4e3abaa06665e587f4b5 upstream. OUT endpoint requests may somtimes have this flag set when preparing to be submitted to HW indicating that there is an additional TRB chained to the request for alignment purposes. If that request is removed before the controller can execute the transfer (e.g. ep_dequeue/ep_disable), the request will not go through the dwc3_gadget_ep_cleanup_completed_request() handler and will not have its needs_extra_trb flag cleared when dwc3_gadget_giveback() is called. This same request could be later requeued for a new transfer that does not require an extra TRB and if it is successfully completed, the cleanup and TRB reclamation will incorrectly process the additional TRB which belongs to the next request, and incorrectly advances the TRB dequeue pointer, thereby messing up calculation of the next requeust's actual/remaining count when it completes. The right thing to do here is to ensure that the flag is cleared before it is given back to the function driver. A good place to do that is in dwc3_gadget_del_and_unmap_request(). Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize") Cc: stable@vger.kernel.org Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi [jackp: backport to <= 4.20: replaced 'needs_extra_trb' with 'unaligned' and 'zero' members in patch and reworded commit text] Signed-off-by: Jack Pham Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 558949b826d0..d8bf9307901e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -177,6 +177,8 @@ static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, req->started = false; list_del(&req->list); req->remaining = 0; + req->unaligned = false; + req->zero = false; if (req->request.status == -EINPROGRESS) req->request.status = status; From d4a6ac28d44a8ed2fdc80745d01136d81383d7ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Dec 2018 17:16:53 +0100 Subject: [PATCH 102/104] ide: fix a typo in the settings proc file name commit f8ff6c732d35904d773043f979b844ef330c701b upstream. Fixes: ec7d9c9ce8 ("ide: replace ->proc_fops with ->proc_show") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/ide/ide-proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 45c997430332..0e51803de0e7 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -544,7 +544,7 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif) drive->proc = proc_mkdir(drive->name, parent); if (drive->proc) { ide_add_proc_entries(drive->proc, generic_drive_entries, drive); - proc_create_data("setting", S_IFREG|S_IRUSR|S_IWUSR, + proc_create_data("settings", S_IFREG|S_IRUSR|S_IWUSR, drive->proc, &ide_settings_proc_fops, drive); } From 3a3b6a6b15dbd99d83052070fda2ae2bfdd66dc6 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Thu, 24 Jan 2019 00:29:20 -0800 Subject: [PATCH 103/104] Input: input_event - fix the CONFIG_SPARC64 mixup commit 141e5dcaa7356077028b4cd48ec351a38c70e5e5 upstream. Arnd Bergmann pointed out that CONFIG_* cannot be used in a uapi header. Override with an equivalent conditional. Fixes: 2e746942ebac ("Input: input_event - provide override for sparc64") Fixes: 152194fe9c3f ("Input: extend usable life of event timestamps to 2106 on 32 bit systems") Signed-off-by: Deepa Dinamani Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ffab958bc512..f056b2a00d5c 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -32,7 +32,7 @@ struct input_event { #define input_event_usec time.tv_usec #else __kernel_ulong_t __sec; -#ifdef CONFIG_SPARC64 +#if defined(__sparc__) && defined(__arch64__) unsigned int __usec; #else __kernel_ulong_t __usec; From dffbba4348e9686d6bf42d54eb0f2cd1c4fb3520 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 08:14:42 +0100 Subject: [PATCH 104/104] Linux 4.19.19 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f37a8a9feb9..39c4e7c3c13c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 18 +SUBLEVEL = 19 EXTRAVERSION = NAME = "People's Front"