From ba46d8fab00a8e1538df241681d9161c8ec85778 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 21 Mar 2017 13:44:28 +0100
Subject: [PATCH 01/27] libceph: force GFP_NOIO for socket allocations

commit 633ee407b9d15a75ac9740ba9d3338815e1fcb95 upstream.

sock_alloc_inode() allocates socket+inode and socket_wq with
GFP_KERNEL, which is not allowed on the writeback path:

    Workqueue: ceph-msgr con_work [libceph]
    ffff8810871cb018 0000000000000046 0000000000000000 ffff881085d40000
    0000000000012b00 ffff881025cad428 ffff8810871cbfd8 0000000000012b00
    ffff880102fc1000 ffff881085d40000 ffff8810871cb038 ffff8810871cb148
    Call Trace:
    [<ffffffff816dd629>] schedule+0x29/0x70
    [<ffffffff816e066d>] schedule_timeout+0x1bd/0x200
    [<ffffffff81093ffc>] ? ttwu_do_wakeup+0x2c/0x120
    [<ffffffff81094266>] ? ttwu_do_activate.constprop.135+0x66/0x70
    [<ffffffff816deb5f>] wait_for_completion+0xbf/0x180
    [<ffffffff81097cd0>] ? try_to_wake_up+0x390/0x390
    [<ffffffff81086335>] flush_work+0x165/0x250
    [<ffffffff81082940>] ? worker_detach_from_pool+0xd0/0xd0
    [<ffffffffa03b65b1>] xlog_cil_force_lsn+0x81/0x200 [xfs]
    [<ffffffff816d6b42>] ? __slab_free+0xee/0x234
    [<ffffffffa03b4b1d>] _xfs_log_force_lsn+0x4d/0x2c0 [xfs]
    [<ffffffff811adc1e>] ? lookup_page_cgroup_used+0xe/0x30
    [<ffffffffa039a723>] ? xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa03b4dcf>] xfs_log_force_lsn+0x3f/0xf0 [xfs]
    [<ffffffffa039a723>] ? xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa03a62c6>] xfs_iunpin_wait+0xc6/0x1a0 [xfs]
    [<ffffffff810aa250>] ? wake_atomic_t_function+0x40/0x40
    [<ffffffffa039a723>] xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa039ac07>] xfs_reclaim_inodes_ag+0x257/0x3d0 [xfs]
    [<ffffffffa039bb13>] xfs_reclaim_inodes_nr+0x33/0x40 [xfs]
    [<ffffffffa03ab745>] xfs_fs_free_cached_objects+0x15/0x20 [xfs]
    [<ffffffff811c0c18>] super_cache_scan+0x178/0x180
    [<ffffffff8115912e>] shrink_slab_node+0x14e/0x340
    [<ffffffff811afc3b>] ? mem_cgroup_iter+0x16b/0x450
    [<ffffffff8115af70>] shrink_slab+0x100/0x140
    [<ffffffff8115e425>] do_try_to_free_pages+0x335/0x490
    [<ffffffff8115e7f9>] try_to_free_pages+0xb9/0x1f0
    [<ffffffff816d56e4>] ? __alloc_pages_direct_compact+0x69/0x1be
    [<ffffffff81150cba>] __alloc_pages_nodemask+0x69a/0xb40
    [<ffffffff8119743e>] alloc_pages_current+0x9e/0x110
    [<ffffffff811a0ac5>] new_slab+0x2c5/0x390
    [<ffffffff816d71c4>] __slab_alloc+0x33b/0x459
    [<ffffffff815b906d>] ? sock_alloc_inode+0x2d/0xd0
    [<ffffffff8164bda1>] ? inet_sendmsg+0x71/0xc0
    [<ffffffff815b906d>] ? sock_alloc_inode+0x2d/0xd0
    [<ffffffff811a21f2>] kmem_cache_alloc+0x1a2/0x1b0
    [<ffffffff815b906d>] sock_alloc_inode+0x2d/0xd0
    [<ffffffff811d8566>] alloc_inode+0x26/0xa0
    [<ffffffff811da04a>] new_inode_pseudo+0x1a/0x70
    [<ffffffff815b933e>] sock_alloc+0x1e/0x80
    [<ffffffff815ba855>] __sock_create+0x95/0x220
    [<ffffffff815baa04>] sock_create_kern+0x24/0x30
    [<ffffffffa04794d9>] con_work+0xef9/0x2050 [libceph]
    [<ffffffffa04aa9ec>] ? rbd_img_request_submit+0x4c/0x60 [rbd]
    [<ffffffff81084c19>] process_one_work+0x159/0x4f0
    [<ffffffff8108561b>] worker_thread+0x11b/0x530
    [<ffffffff81085500>] ? create_worker+0x1d0/0x1d0
    [<ffffffff8108b6f9>] kthread+0xc9/0xe0
    [<ffffffff8108b630>] ? flush_kthread_worker+0x90/0x90
    [<ffffffff816e1b98>] ret_from_fork+0x58/0x90
    [<ffffffff8108b630>] ? flush_kthread_worker+0x90/0x90

Use memalloc_noio_{save,restore}() to temporarily force GFP_NOIO here.

Link: http://tracker.ceph.com/issues/19309
Reported-by: Sergey Jerusalimov <wintchester@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ceph/messenger.c | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b8d927c56494..a6b2f2138c9d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
 #include <linux/kthread.h>
 #include <linux/net.h>
 #include <linux/nsproxy.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
@@ -478,11 +479,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 {
 	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
 	struct socket *sock;
+	unsigned int noio_flag;
 	int ret;
 
 	BUG_ON(con->sock);
+
+	/* sock_create_kern() allocates with GFP_KERNEL */
+	noio_flag = memalloc_noio_save();
 	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
+	memalloc_noio_restore(noio_flag);
 	if (ret)
 		return ret;
 	sock->sk->sk_allocation = GFP_NOFS;

From 1eed198ce16b6e05c05ee381e5d90fac35ea67a7 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Mon, 12 Dec 2016 14:35:13 +0000
Subject: [PATCH 02/27] xen/setup: Don't relocate p2m over existing one

commit 7ecec8503af37de6be4f96b53828d640a968705f upstream.

When relocating the p2m, take special care not to relocate it so
that is overlaps with the current location of the p2m/initrd. This is
needed since the full extent of the current location is not marked as a
reserved region in the e820.

This was seen to happen to a dom0 with a large initial p2m and a small
reserved region in the middle of the initial p2m.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/xen/setup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e345891450c3..df8844a1853a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void)
 		size = PFN_PHYS(xen_start_info->nr_p2m_frames);
 	}
 
-	if (!xen_is_e820_reserved(start, size)) {
-		memblock_reserve(start, size);
+	memblock_reserve(start, size);
+	if (!xen_is_e820_reserved(start, size))
 		return;
-	}
 
 #ifdef CONFIG_X86_32
 	/*
@@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void)
 	BUG();
 #else
 	xen_relocate_p2m();
+	memblock_free(start, size);
 #endif
 }
 

From 18639c4bad72218954e728e9ca65c33b13ba673a Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 1 Jan 2017 09:39:24 -0800
Subject: [PATCH 03/27] scsi: mpt3sas: fix hang on ata passthrough commands

commit ffb58456589443ca572221fabbdef3db8483a779 upstream.

mpt3sas has a firmware failure where it can only handle one pass through
ATA command at a time.  If another comes in, contrary to the SAT
standard, it will hang until the first one completes (causing long
commands like secure erase to timeout).  The original fix was to block
the device when an ATA command came in, but this caused a regression
with

commit 669f044170d8933c3d66d231b69ea97cb8447338
Author: Bart Van Assche <bart.vanassche@sandisk.com>
Date:   Tue Nov 22 16:17:13 2016 -0800

    scsi: srp_transport: Move queuecommand() wait code to SCSI core

So fix the original fix of the secure erase timeout by properly
returning SAM_STAT_BUSY like the SAT recommends.  The original patch
also had a concurrency problem since scsih_qcmd is lockless at that
point (this is fixed by using atomic bitops to set and test the flag).

[mkp: addressed feedback wrt. test_bit and fixed whitespace]

Fixes: 18f6084a989ba1b (mpt3sas: Fix secure erase premature termination)
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reported-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Joe Korty <joe.korty@ccur.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/mpt3sas/mpt3sas_base.h  | 12 +++++++++
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 40 ++++++++++++++++++----------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 92648a5ea2d2..63f5965acc89 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -390,6 +390,7 @@ struct MPT3SAS_TARGET {
  * @eedp_enable: eedp support enable bit
  * @eedp_type: 0(type_1), 1(type_2), 2(type_3)
  * @eedp_block_length: block size
+ * @ata_command_pending: SATL passthrough outstanding for device
  */
 struct MPT3SAS_DEVICE {
 	struct MPT3SAS_TARGET *sas_target;
@@ -398,6 +399,17 @@ struct MPT3SAS_DEVICE {
 	u8	configured_lun;
 	u8	block;
 	u8	tlr_snoop_check;
+	/*
+	 * Bug workaround for SATL handling: the mpt2/3sas firmware
+	 * doesn't return BUSY or TASK_SET_FULL for subsequent
+	 * commands while a SATL pass through is in operation as the
+	 * spec requires, it simply does nothing with them until the
+	 * pass through completes, causing them possibly to timeout if
+	 * the passthrough is a long executing command (like format or
+	 * secure erase).  This variable allows us to do the right
+	 * thing while a SATL command is pending.
+	 */
+	unsigned long ata_command_pending;
 };
 
 #define MPT3_CMD_NOT_USED	0x8000	/* free */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index f6a8e9958e75..8a5fbdb45cfd 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3707,9 +3707,18 @@ _scsih_temp_threshold_events(struct MPT3SAS_ADAPTER *ioc,
 	}
 }
 
-static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
+static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending)
 {
-	return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
+	struct MPT3SAS_DEVICE *priv = scmd->device->hostdata;
+
+	if (scmd->cmnd[0] != ATA_12 && scmd->cmnd[0] != ATA_16)
+		return 0;
+
+	if (pending)
+		return test_and_set_bit(0, &priv->ata_command_pending);
+
+	clear_bit(0, &priv->ata_command_pending);
+	return 0;
 }
 
 /**
@@ -3733,9 +3742,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc)
 		if (!scmd)
 			continue;
 		count++;
-		if (ata_12_16_cmd(scmd))
-			scsi_internal_device_unblock(scmd->device,
-							SDEV_RUNNING);
+		_scsih_set_satl_pending(scmd, false);
 		mpt3sas_base_free_smid(ioc, smid);
 		scsi_dma_unmap(scmd);
 		if (ioc->pci_error_recovery)
@@ -3866,13 +3873,6 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 	if (ioc->logging_level & MPT_DEBUG_SCSI)
 		scsi_print_command(scmd);
 
-	/*
-	 * Lock the device for any subsequent command until command is
-	 * done.
-	 */
-	if (ata_12_16_cmd(scmd))
-		scsi_internal_device_block(scmd->device);
-
 	sas_device_priv_data = scmd->device->hostdata;
 	if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
 		scmd->result = DID_NO_CONNECT << 16;
@@ -3886,6 +3886,19 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 		return 0;
 	}
 
+	/*
+	 * Bug work around for firmware SATL handling.  The loop
+	 * is based on atomic operations and ensures consistency
+	 * since we're lockless at this point
+	 */
+	do {
+		if (test_bit(0, &sas_device_priv_data->ata_command_pending)) {
+			scmd->result = SAM_STAT_BUSY;
+			scmd->scsi_done(scmd);
+			return 0;
+		}
+	} while (_scsih_set_satl_pending(scmd, true));
+
 	sas_target_priv_data = sas_device_priv_data->sas_target;
 
 	/* invalid device handle */
@@ -4445,8 +4458,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	if (scmd == NULL)
 		return 1;
 
-	if (ata_12_16_cmd(scmd))
-		scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
+	_scsih_set_satl_pending(scmd, false);
 
 	mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 

From a92f411914cad6532e82e4607bc4075a5ffaa366 Mon Sep 17 00:00:00 2001
From: peter chang <dpf@google.com>
Date: Wed, 15 Feb 2017 14:11:54 -0800
Subject: [PATCH 04/27] scsi: sg: check length passed to SG_NEXT_CMD_LEN

commit bf33f87dd04c371ea33feb821b60d63d754e3124 upstream.

The user can control the size of the next command passed along, but the
value passed to the ioctl isn't checked against the usable max command
size.

Signed-off-by: Peter Chang <dpf@google.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index dedcff9cabb5..6514636431ab 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1008,6 +1008,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
 		result = get_user(val, ip);
 		if (result)
 			return result;
+		if (val > SG_MAX_CDB_SIZE)
+			return -ENOMEM;
 		sfp->next_cmd_len = (val > 0) ? val : 0;
 		return 0;
 	case SG_GET_VERSION_NUM:

From 75a03869c93a443ae068eae9aca0c0df8b33dff5 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 16 Mar 2017 23:07:28 +0800
Subject: [PATCH 05/27] scsi: libsas: fix ata xfer length

commit 9702c67c6066f583b629cf037d2056245bb7a8e6 upstream.

The total ata xfer length may not be calculated properly, in that we do
not use the proper method to get an sg element dma length.

According to the code comment, sg_dma_len() should be used after
dma_map_sg() is called.

This issue was found by turning on the SMMUv3 in front of the hisi_sas
controller in hip07. Multiple sg elements were being combined into a
single element, but the original first element length was being use as
the total xfer length.

Fixes: ff2aeb1eb64c8a4770a6 ("libata: convert to chained sg")
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/libsas/sas_ata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 9c706d8c1441..6f5e2720ffad 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -218,7 +218,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
 		task->num_scatter = qc->n_elem;
 	} else {
 		for_each_sg(qc->sg, sg, qc->n_elem, si)
-			xfer += sg->length;
+			xfer += sg_dma_len(sg);
 
 		task->total_xfer_len = xfer;
 		task->num_scatter = si;

From a90d7447e4a154ad26e3b9e09a0878680be49339 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 24 Mar 2017 17:07:57 +0100
Subject: [PATCH 06/27] ALSA: seq: Fix race during FIFO resize

commit 2d7d54002e396c180db0c800c1046f0a3c471597 upstream.

When a new event is queued while processing to resize the FIFO in
snd_seq_fifo_clear(), it may lead to a use-after-free, as the old pool
that is being queued gets removed.  For avoiding this race, we need to
close the pool to be deleted and sync its usage before actually
deleting it.

The issue was spotted by syzkaller.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/seq/seq_fifo.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 3f4efcb85df5..3490d21ab9e7 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize)
 	/* NOTE: overflow flag is not cleared */
 	spin_unlock_irqrestore(&f->lock, flags);
 
+	/* close the old pool and wait until all users are gone */
+	snd_seq_pool_mark_closing(oldpool);
+	snd_use_lock_sync(&f->use_lock);
+
 	/* release cells in old pool */
 	for (cell = oldhead; cell; cell = next) {
 		next = cell->next;

From ce3dcfdbff04bab023806ef7a342c657ec08915d Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 31 Mar 2017 10:31:40 +0800
Subject: [PATCH 07/27] ALSA: hda - fix a problem for lineout on a Dell AIO
 machine

commit 2f726aec19a9d2c63bec9a8a53a3910ffdcd09f8 upstream.

On this Dell AIO machine, the lineout jack does not work.

We found the pin 0x1a is assigned to lineout on this machine, and in
the past, we applied ALC298_FIXUP_DELL1_MIC_NO_PRESENCE to fix the
heaset-set mic problem for this machine, this fixup will redefine
the pin 0x1a to headphone-mic, as a result the lineout doesn't
work anymore.

After consulting with Dell, they told us this machine doesn't support
microphone via headset jack, so we add a new fixup which only defines
the pin 0x18 as the headset-mic.

[rearranged the fixup insertion position by tiwai in order to make the
 merge with other branches easier -- tiwai]

Fixes: 59ec4b57bcae ("ALSA: hda - Fix headset mic detection problem for two dell machines")
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1d4f34379f56..46a34039ecdc 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4831,6 +4831,7 @@ enum {
 	ALC292_FIXUP_DISABLE_AAMIX,
 	ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
 	ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+	ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
 	ALC275_FIXUP_DELL_XPS,
 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
 	ALC293_FIXUP_LENOVO_SPK_NOISE,
@@ -5429,6 +5430,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MODE
 	},
+	[ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MODE
+	},
 	[ALC275_FIXUP_DELL_XPS] = {
 		.type = HDA_FIXUP_VERBS,
 		.v.verbs = (const struct hda_verb[]) {
@@ -5501,7 +5511,7 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc298_fixup_speaker_volume,
 		.chained = true,
-		.chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+		.chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
 	},
 	[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
 		.type = HDA_FIXUP_PINS,

From ab48ab614b8c83f3a3b0f83f7882b1d2766962d3 Mon Sep 17 00:00:00 2001
From: Songjun Wu <songjun.wu@microchip.com>
Date: Fri, 24 Feb 2017 15:10:43 +0800
Subject: [PATCH 08/27] ASoC: atmel-classd: fix audio clock rate

commit cd3ac9affc43b44f49d7af70d275f0bd426ba643 upstream.

Fix the audio clock rate according to the datasheet.

Reported-by: Dushara Jayasinghe <dushara@successful.com.au>
Signed-off-by: Songjun Wu <songjun.wu@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/atmel/atmel-classd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 8276675730ef..78a985629607 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -343,7 +343,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai,
 }
 
 #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
-#define CLASSD_ACLK_RATE_12M288_MPY_8  (12228 * 1000 * 8)
+#define CLASSD_ACLK_RATE_12M288_MPY_8  (12288 * 1000 * 8)
 
 static struct {
 	int rate;

From 3342857ac074768e14e361392ac09fbbd70d840e Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 16 Mar 2017 08:56:28 -0500
Subject: [PATCH 09/27] ACPI: Fix incompatibility with mcount-based function
 graph tracing

commit 61b79e16c68d703dde58c25d3935d67210b7d71b upstream.

Paul Menzel reported a warning:

  WARNING: CPU: 0 PID: 774 at /build/linux-ROBWaj/linux-4.9.13/kernel/trace/trace_functions_graph.c:233 ftrace_return_to_handler+0x1aa/0x1e0
  Bad frame pointer: expected f6919d98, received f6919db0
    from func acpi_pm_device_sleep_wake return to c43b6f9d

The warning means that function graph tracing is broken for the
acpi_pm_device_sleep_wake() function.  That's because the ACPI Makefile
unconditionally sets the '-Os' gcc flag to optimize for size.  That's an
issue because mcount-based function graph tracing is incompatible with
'-Os' on x86, thanks to the following gcc bug:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109

I have another patch pending which will ensure that mcount-based
function graph tracing is never used with CONFIG_CC_OPTIMIZE_FOR_SIZE on
x86.

But this patch is needed in addition to that one because the ACPI
Makefile overrides that config option for no apparent reason.  It has
had this flag since the beginning of git history, and there's no related
comment, so I don't know why it's there.  As far as I can tell, there's
no reason for it to be there.  The appropriate behavior is for it to
honor CONFIG_CC_OPTIMIZE_FOR_{SIZE,PERFORMANCE} like the rest of the
kernel.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 675eaf337178..b9cebca376f9 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the Linux ACPI interpreter
 #
 
-ccflags-y			:= -Os
 ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
 
 #

From 566a8711a7dd11960fa0bf3a4fd89c742eb359f3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Mar 2017 18:33:25 +0100
Subject: [PATCH 10/27] ACPI: Do not create a platform_device for
 IOAPIC/IOxAPIC

commit 08f63d97749185fab942a3a47ed80f5bd89b8b7d upstream.

No platform-device is required for IO(x)APICs, so don't even
create them.

[ rjw: This fixes a problem with leaking platform device objects
  after IOAPIC/IOxAPIC hot-removal events.]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpi_platform.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 296b7a14893a..5365ff6e69c1 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -24,9 +24,11 @@
 ACPI_MODULE_NAME("platform");
 
 static const struct acpi_device_id forbidden_id_list[] = {
-	{"PNP0000", 0},	/* PIC */
-	{"PNP0100", 0},	/* Timer */
-	{"PNP0200", 0},	/* AT DMA Controller */
+	{"PNP0000",  0},	/* PIC */
+	{"PNP0100",  0},	/* Timer */
+	{"PNP0200",  0},	/* AT DMA Controller */
+	{"ACPI0009", 0},	/* IOxAPIC */
+	{"ACPI000A", 0},	/* IOAPIC */
 	{"", 0},
 };
 

From 74b8fc017d7689d1a60c9e234b2cfe3550b7f414 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Mon, 20 Mar 2017 11:52:41 +0100
Subject: [PATCH 11/27] tty/serial: atmel: fix race condition (TX+DMA)

commit 31ca2c63fdc0aee725cbd4f207c1256f5deaabde upstream.

If uart_flush_buffer() is called between atmel_tx_dma() and
atmel_complete_tx_dma(), the circular buffer has been cleared, but not
atmel_port->tx_len.
That leads to a circular buffer overflow (dumping (UART_XMIT_SIZE -
atmel_port->tx_len) bytes).

Tested-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index a0f911641b04..156a262b6b65 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1987,6 +1987,11 @@ static void atmel_flush_buffer(struct uart_port *port)
 		atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
 		atmel_port->pdc_tx.ofs = 0;
 	}
+	/*
+	 * in uart_flush_buffer(), the xmit circular buffer has just
+	 * been cleared, so we have to reset tx_len accordingly.
+	 */
+	atmel_port->tx_len = 0;
 }
 
 /*

From 0a1757cfa5ba3b46f6ee7a74ddb7a5c0bd5d7c2f Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Mon, 20 Mar 2017 16:38:57 +0100
Subject: [PATCH 12/27] tty/serial: atmel: fix TX path in atmel_console_write()

commit 497e1e16f45c70574dc9922c7f75c642c2162119 upstream.

A side effect of 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA
from transmitting in stop_tx") is that the console can be called with
TX path disabled. Then the system would hang trying to push charecters
out in atmel_console_putchar().

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Fixes: 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 156a262b6b65..a15070a7fcd6 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -2504,6 +2504,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count)
 	pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
 	atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 
+	/* Make sure that tx path is actually able to send characters */
+	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
+
 	uart_console_write(port, s, count, atmel_console_putchar);
 
 	/*

From eac3ab3e69151c21a0a71ec8711600022cc12fa3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 24 Mar 2017 13:38:28 -0400
Subject: [PATCH 13/27] USB: fix linked-list corruption in rh_call_control()

commit 1633682053a7ee8058e10c76722b9b28e97fb73f upstream.

Using KASAN, Dmitry found a bug in the rh_call_control() routine: If
buffer allocation fails, the routine returns immediately without
unlinking its URB from the control endpoint, eventually leading to
linked-list corruption.

This patch fixes the problem by jumping to the end of the routine
(where the URB is unlinked) when an allocation failure occurs.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 5724d7c41e29..ca2cbdb3aa67 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -499,8 +499,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
 	 */
 	tbuf_size =  max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
 	tbuf = kzalloc(tbuf_size, GFP_KERNEL);
-	if (!tbuf)
-		return -ENOMEM;
+	if (!tbuf) {
+		status = -ENOMEM;
+		goto err_alloc;
+	}
 
 	bufp = tbuf;
 
@@ -705,6 +707,7 @@ error:
 	}
 
 	kfree(tbuf);
+ err_alloc:
 
 	/* any errors get returned through the urb completion */
 	spin_lock_irq(&hcd_root_hub_lock);

From 3eb392056aeb4a0beca5fcead9ad3d6b6ff0816e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 15 Mar 2017 16:01:17 +0800
Subject: [PATCH 14/27] KVM: x86: clear bus pointer when destroyed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit df630b8c1e851b5e265dc2ca9c87222e342c093b upstream.

When releasing the bus, let's clear the bus pointers to mark it out. If
any further device unregister happens on this bus, we know that we're
done if we found the bus being released already.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/kvm_main.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 336ed267c407..1ac5b7be7282 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -654,8 +654,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
-	for (i = 0; i < KVM_NR_BUSES; i++)
+	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm_io_bus_destroy(kvm->buses[i]);
+		kvm->buses[i] = NULL;
+	}
 	kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -3376,6 +3378,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	struct kvm_io_bus *new_bus, *bus;
 
 	bus = kvm->buses[bus_idx];
+
+	/*
+	 * It's possible the bus being released before hand. If so,
+	 * we're done here.
+	 */
+	if (!bus)
+		return 0;
+
 	r = -ENOENT;
 	for (i = 0; i < bus->dev_count; i++)
 		if (bus->range[i].dev == dev) {

From ef55c3df5dbd60eb3daab7797feac850bd1e6fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 24 Mar 2017 19:01:09 +0900
Subject: [PATCH 15/27] drm/radeon: Override fpfn for all VRAM placements in
 radeon_evict_flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit ce4b4f228e51219b0b79588caf73225b08b5b779 upstream.

We were accidentally only overriding the first VRAM placement. For BOs
with the RADEON_GEM_NO_CPU_ACCESS flag set,
radeon_ttm_placement_from_domain creates a second VRAM placment with
fpfn == 0. If VRAM is almost full, the first VRAM placement with
fpfn > 0 may not work, but the second one with fpfn == 0 always will
(the BO's current location trivially satisfies it). Because "moving"
the BO to its current location puts it back on the LRU list, this
results in an infinite loop.

Fixes: 2a85aedd117c ("drm/radeon: Try evicting from CPU accessible to
                      inaccessible VRAM first")
Reported-by: Zachary Michaels <zmichaels@oblong.com>
Reported-and-Tested-by: Julien Isorce <jisorce@oblong.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 35310336dd0a..d684e2b79d2b 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
-					if (rbo->placements[0].fpfn < fpfn)
-						rbo->placements[0].fpfn = fpfn;
+					if (rbo->placements[i].fpfn < fpfn)
+						rbo->placements[i].fpfn = fpfn;
 				} else {
 					rbo->placement.busy_placement =
 						&rbo->placements[i];

From 47e2fe17d14d1a7da3a405b1f364c094c271d35c Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 31 Mar 2017 15:11:55 -0700
Subject: [PATCH 16/27] mm, hugetlb: use pte_present() instead of pmd_present()
 in follow_huge_pmd()

commit c9d398fa237882ea07167e23bcfc5e6847066518 upstream.

I found the race condition which triggers the following bug when
move_pages() and soft offline are called on a single hugetlb page
concurrently.

    Soft offlining page 0x119400 at 0x700000000000
    BUG: unable to handle kernel paging request at ffffea0011943820
    IP: follow_huge_pmd+0x143/0x190
    PGD 7ffd2067
    PUD 7ffd1067
    PMD 0
        [61163.582052] Oops: 0000 [#1] SMP
    Modules linked in: binfmt_misc ppdev virtio_balloon parport_pc pcspkr i2c_piix4 parport i2c_core acpi_cpufreq ip_tables xfs libcrc32c ata_generic pata_acpi virtio_blk 8139too crc32c_intel ata_piix serio_raw libata virtio_pci 8139cp virtio_ring virtio mii floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: cap_check]
    CPU: 0 PID: 22573 Comm: iterate_numa_mo Tainted: P           OE   4.11.0-rc2-mm1+ #2
    Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
    RIP: 0010:follow_huge_pmd+0x143/0x190
    RSP: 0018:ffffc90004bdbcd0 EFLAGS: 00010202
    RAX: 0000000465003e80 RBX: ffffea0004e34d30 RCX: 00003ffffffff000
    RDX: 0000000011943800 RSI: 0000000000080001 RDI: 0000000465003e80
    RBP: ffffc90004bdbd18 R08: 0000000000000000 R09: ffff880138d34000
    R10: ffffea0004650000 R11: 0000000000c363b0 R12: ffffea0011943800
    R13: ffff8801b8d34000 R14: ffffea0000000000 R15: 000077ff80000000
    FS:  00007fc977710740(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffea0011943820 CR3: 000000007a746000 CR4: 00000000001406f0
    Call Trace:
     follow_page_mask+0x270/0x550
     SYSC_move_pages+0x4ea/0x8f0
     SyS_move_pages+0xe/0x10
     do_syscall_64+0x67/0x180
     entry_SYSCALL64_slow_path+0x25/0x25
    RIP: 0033:0x7fc976e03949
    RSP: 002b:00007ffe72221d88 EFLAGS: 00000246 ORIG_RAX: 0000000000000117
    RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc976e03949
    RDX: 0000000000c22390 RSI: 0000000000001400 RDI: 0000000000005827
    RBP: 00007ffe72221e00 R08: 0000000000c2c3a0 R09: 0000000000000004
    R10: 0000000000c363b0 R11: 0000000000000246 R12: 0000000000400650
    R13: 00007ffe72221ee0 R14: 0000000000000000 R15: 0000000000000000
    Code: 81 e4 ff ff 1f 00 48 21 c2 49 c1 ec 0c 48 c1 ea 0c 4c 01 e2 49 bc 00 00 00 00 00 ea ff ff 48 c1 e2 06 49 01 d4 f6 45 bc 04 74 90 <49> 8b 7c 24 20 40 f6 c7 01 75 2b 4c 89 e7 8b 47 1c 85 c0 7e 2a
    RIP: follow_huge_pmd+0x143/0x190 RSP: ffffc90004bdbcd0
    CR2: ffffea0011943820
    ---[ end trace e4f81353a2d23232 ]---
    Kernel panic - not syncing: Fatal exception
    Kernel Offset: disabled

This bug is triggered when pmd_present() returns true for non-present
hugetlb, so fixing the present check in follow_huge_pmd() prevents it.
Using pmd_present() to determine present/non-present for hugetlb is not
correct, because pmd_present() checks multiple bits (not only
_PAGE_PRESENT) for historical reason and it can misjudge hugetlb state.

Fixes: e66f17ff7177 ("mm/hugetlb: take page table lock in follow_huge_pmd()")
Link: http://lkml.kernel.org/r/1490149898-20231-1-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/hugetlb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea11123a9249..7294301d8495 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4362,6 +4362,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 {
 	struct page *page = NULL;
 	spinlock_t *ptl;
+	pte_t pte;
 retry:
 	ptl = pmd_lockptr(mm, pmd);
 	spin_lock(ptl);
@@ -4371,12 +4372,13 @@ retry:
 	 */
 	if (!pmd_huge(*pmd))
 		goto out;
-	if (pmd_present(*pmd)) {
+	pte = huge_ptep_get((pte_t *)pmd);
+	if (pte_present(pte)) {
 		page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
 		if (flags & FOLL_GET)
 			get_page(page);
 	} else {
-		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+		if (is_hugetlb_entry_migration(pte)) {
 			spin_unlock(ptl);
 			__migration_entry_wait(mm, (pte_t *)pmd, ptl);
 			goto retry;

From 6280ac931a23d3fa40cd26057576abcf90a4f22d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 19 Jan 2017 12:28:22 +0100
Subject: [PATCH 17/27] MIPS: Lantiq: Fix cascaded IRQ setup

commit 6c356eda225e3ee134ed4176b9ae3a76f793f4dd upstream.

With the IRQ stack changes integrated, the XRX200 devices started
emitting a constant stream of kernel messages like this:

[  565.415310] Spurious IRQ: CAUSE=0x1100c300

This is caused by IP0 getting handled by plat_irq_dispatch() rather than
its vectored interrupt handler, which is fixed by commit de856416e714
("MIPS: IRQ Stack: Fix erroneous jal to plat_irq_dispatch").

Fix plat_irq_dispatch() to handle non-vectored IPI interrupts correctly
by setting up IP2-6 as proper chained IRQ handlers and calling do_IRQ
for all MIPS CPU interrupts.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Acked-by: John Crispin <john@phrozen.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15077/
[james.hogan@imgtec.com: tweaked commit message]
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/lantiq/irq.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 2e7f60c9fc5d..51cdc46a87e2 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void)
 DEFINE_HWx_IRQDISPATCH(5)
 #endif
 
+static void ltq_hw_irq_handler(struct irq_desc *desc)
+{
+	ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
+}
+
 #ifdef CONFIG_MIPS_MT_SMP
 void __init arch_init_ipiirq(int irq, struct irqaction *action)
 {
@@ -313,23 +318,19 @@ static struct irqaction irq_call = {
 asmlinkage void plat_irq_dispatch(void)
 {
 	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
-	unsigned int i;
+	int irq;
 
-	if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) {
-		do_IRQ(MIPS_CPU_TIMER_IRQ);
-		goto out;
-	} else {
-		for (i = 0; i < MAX_IM; i++) {
-			if (pending & (CAUSEF_IP2 << i)) {
-				ltq_hw_irqdispatch(i);
-				goto out;
-			}
-		}
+	if (!pending) {
+		spurious_interrupt();
+		return;
 	}
-	pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
 
-out:
-	return;
+	pending >>= CAUSEB_IP;
+	while (pending) {
+		irq = fls(pending) - 1;
+		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
+		pending &= ~BIT(irq);
+	}
 }
 
 static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
@@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = {
 	.map = icu_map,
 };
 
-static struct irqaction cascade = {
-	.handler = no_action,
-	.name = "cascade",
-};
-
 int __init icu_of_init(struct device_node *node, struct device_node *parent)
 {
 	struct device_node *eiu_node;
@@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
 	mips_cpu_irq_init();
 
 	for (i = 0; i < MAX_IM; i++)
-		setup_irq(i + 2, &cascade);
+		irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
 
 	if (cpu_has_vint) {
 		pr_info("Setting up vectored interrupts\n");

From b3ed3864912e8809e228ddea259e8e0fa1deadf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Sat, 2 Jul 2016 17:28:08 +0200
Subject: [PATCH 18/27] rtc: s35390a: fix reading out alarm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit f87e904ddd8f0ef120e46045b0addeb1cc88354e upstream.

There are several issues fixed in this patch:

 - When alarm isn't enabled, set .enabled to zero instead of returning
   -EINVAL.
 - Ignore how IRQ1 is configured when determining if IRQ2 is on.
 - The three alarm registers have an enable flag which must be
   evaluated.
 - The chip always triggers when the seconds register gets 0.

Note that the rtc framework however doesn't handle the result correctly
because it doesn't check wday being initialized and so interprets an
alarm being set for 10:00 AM in three days as 10:00 AM tomorrow (or
today if that's not over yet).

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-s35390a.c | 40 ++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index f40afdd0e5f5..6507a01cf9ad 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -242,6 +242,8 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 
 	if (alm->time.tm_wday != -1)
 		buf[S35390A_ALRM_BYTE_WDAY] = bin2bcd(alm->time.tm_wday) | 0x80;
+	else
+		buf[S35390A_ALRM_BYTE_WDAY] = 0;
 
 	buf[S35390A_ALRM_BYTE_HOURS] = s35390a_hr2reg(s35390a,
 			alm->time.tm_hour) | 0x80;
@@ -269,23 +271,43 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 	if (err < 0)
 		return err;
 
-	if (bitrev8(sts) != S35390A_INT2_MODE_ALARM)
-		return -EINVAL;
+	if ((bitrev8(sts) & S35390A_INT2_MODE_MASK) != S35390A_INT2_MODE_ALARM) {
+		/*
+		 * When the alarm isn't enabled, the register to configure
+		 * the alarm time isn't accessible.
+		 */
+		alm->enabled = 0;
+		return 0;
+	} else {
+		alm->enabled = 1;
+	}
 
 	err = s35390a_get_reg(s35390a, S35390A_CMD_INT2_REG1, buf, sizeof(buf));
 	if (err < 0)
 		return err;
 
 	/* This chip returns the bits of each byte in reverse order */
-	for (i = 0; i < 3; ++i) {
+	for (i = 0; i < 3; ++i)
 		buf[i] = bitrev8(buf[i]);
-		buf[i] &= ~0x80;
-	}
 
-	alm->time.tm_wday = bcd2bin(buf[S35390A_ALRM_BYTE_WDAY]);
-	alm->time.tm_hour = s35390a_reg2hr(s35390a,
-						buf[S35390A_ALRM_BYTE_HOURS]);
-	alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS]);
+	/*
+	 * B0 of the three matching registers is an enable flag. Iff it is set
+	 * the configured value is used for matching.
+	 */
+	if (buf[S35390A_ALRM_BYTE_WDAY] & 0x80)
+		alm->time.tm_wday =
+			bcd2bin(buf[S35390A_ALRM_BYTE_WDAY] & ~0x80);
+
+	if (buf[S35390A_ALRM_BYTE_HOURS] & 0x80)
+		alm->time.tm_hour =
+			s35390a_reg2hr(s35390a,
+				       buf[S35390A_ALRM_BYTE_HOURS] & ~0x80);
+
+	if (buf[S35390A_ALRM_BYTE_MINS] & 0x80)
+		alm->time.tm_min = bcd2bin(buf[S35390A_ALRM_BYTE_MINS] & ~0x80);
+
+	/* alarm triggers always at s=0 */
+	alm->time.tm_sec = 0;
 
 	dev_dbg(&client->dev, "%s: alm is mins=%d, hours=%d, wday=%d\n",
 			__func__, alm->time.tm_min, alm->time.tm_hour,

From fdd4bc9313e59a1757cfc8ac5836cff55ec03eeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Mon, 3 Apr 2017 23:32:38 +0200
Subject: [PATCH 19/27] rtc: s35390a: make sure all members in the output are
 set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The rtc core calls the .read_alarm with all fields initialized to 0. As
the s35390a driver doesn't touch some fields the returned date is
interpreted as a date in January 1900. So make sure all fields are set
to -1; some of them are then overwritten with the right data depending
on the hardware state.

In mainline this is done by commit d68778b80dd7 ("rtc: initialize output
parameter for read alarm to "uninitialized"") in the core. This is
considered to dangerous for stable as it might have side effects for
other rtc drivers that might for example rely on alarm->time.tm_sec
being initialized to 0.

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-s35390a.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 6507a01cf9ad..47b88bbe4ce7 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -267,6 +267,20 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 	char buf[3], sts;
 	int i, err;
 
+	/*
+	 * initialize all members to -1 to signal the core that they are not
+	 * defined by the hardware.
+	 */
+	alm->time.tm_sec = -1;
+	alm->time.tm_min = -1;
+	alm->time.tm_hour = -1;
+	alm->time.tm_mday = -1;
+	alm->time.tm_mon = -1;
+	alm->time.tm_year = -1;
+	alm->time.tm_wday = -1;
+	alm->time.tm_yday = -1;
+	alm->time.tm_isdst = -1;
+
 	err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts));
 	if (err < 0)
 		return err;

From a55ae9d1937b0bf4004e5416cfa15750cd6d2b22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Sat, 2 Jul 2016 17:28:09 +0200
Subject: [PATCH 20/27] rtc: s35390a: implement reset routine as suggested by
 the reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 8e6583f1b5d1f5f129b873f1428b7e414263d847 upstream.

There were two deviations from the reference manual: you have to wait
half a second when POC is active and you might have to repeat
initialization when POC or BLD are still set after the sequence.

Note however that as POC and BLD are cleared by read the driver might
not be able to detect that a reset is necessary. I don't have a good
idea how to fix this.

Additionally report the value read from STATUS1 to the caller. This
prepares the next patch.

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-s35390a.c | 65 +++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 47b88bbe4ce7..c7c1fce69635 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -15,6 +15,7 @@
 #include <linux/bitrev.h>
 #include <linux/bcd.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 
 #define S35390A_CMD_STATUS1	0
 #define S35390A_CMD_STATUS2	1
@@ -94,19 +95,63 @@ static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len)
 	return 0;
 }
 
-static int s35390a_reset(struct s35390a *s35390a)
+/*
+ * Returns <0 on error, 0 if rtc is setup fine and 1 if the chip was reset.
+ * To keep the information if an irq is pending, pass the value read from
+ * STATUS1 to the caller.
+ */
+static int s35390a_reset(struct s35390a *s35390a, char *status1)
 {
-	char buf[1];
+	char buf;
+	int ret;
+	unsigned initcount = 0;
 
-	if (s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf)) < 0)
-		return -EIO;
+	ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, status1, 1);
+	if (ret < 0)
+		return ret;
 
-	if (!(buf[0] & (S35390A_FLAG_POC | S35390A_FLAG_BLD)))
+	if (*status1 & S35390A_FLAG_POC)
+		/*
+		 * Do not communicate for 0.5 seconds since the power-on
+		 * detection circuit is in operation.
+		 */
+		msleep(500);
+	else if (!(*status1 & S35390A_FLAG_BLD))
+		/*
+		 * If both POC and BLD are unset everything is fine.
+		 */
 		return 0;
 
-	buf[0] |= (S35390A_FLAG_RESET | S35390A_FLAG_24H);
-	buf[0] &= 0xf0;
-	return s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf));
+	/*
+	 * At least one of POC and BLD are set, so reinitialise chip. Keeping
+	 * this information in the hardware to know later that the time isn't
+	 * valid is unfortunately not possible because POC and BLD are cleared
+	 * on read. So the reset is best done now.
+	 *
+	 * The 24H bit is kept over reset, so set it already here.
+	 */
+initialize:
+	*status1 = S35390A_FLAG_24H;
+	buf = S35390A_FLAG_RESET | S35390A_FLAG_24H;
+	ret = s35390a_set_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1);
+
+	if (ret < 0)
+		return ret;
+
+	ret = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, &buf, 1);
+	if (ret < 0)
+		return ret;
+
+	if (buf & (S35390A_FLAG_POC | S35390A_FLAG_BLD)) {
+		/* Try up to five times to reset the chip */
+		if (initcount < 5) {
+			++initcount;
+			goto initialize;
+		} else
+			return -EIO;
+	}
+
+	return 1;
 }
 
 static int s35390a_disable_test_mode(struct s35390a *s35390a)
@@ -367,7 +412,7 @@ static int s35390a_probe(struct i2c_client *client,
 	unsigned int i;
 	struct s35390a *s35390a;
 	struct rtc_time tm;
-	char buf[1];
+	char buf[1], status1;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		err = -ENODEV;
@@ -396,7 +441,7 @@ static int s35390a_probe(struct i2c_client *client,
 		}
 	}
 
-	err = s35390a_reset(s35390a);
+	err = s35390a_reset(s35390a, &status1);
 	if (err < 0) {
 		dev_err(&client->dev, "error resetting chip\n");
 		goto exit_dummy;

From 3a1246b46df5210164ee43d4c5c560d0dc9ed2ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Sat, 2 Jul 2016 17:28:10 +0200
Subject: [PATCH 21/27] rtc: s35390a: improve irq handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 3bd32722c827d00eafe8e6d5b83e9f3148ea7c7e upstream.

On some QNAP NAS devices the rtc can wake the machine. Several people
noticed that once the machine was woken this way it fails to shut down.
That's because the driver fails to acknowledge the interrupt and so it
keeps active and restarts the machine immediatly after shutdown. See
https://bugs.debian.org/794266 for a bug report.

Doing this correctly requires to interpret the INT2 flag of the first read
of the STATUS1 register because this bit is cleared by read.

Note this is not maximally robust though because a pending irq isn't
detected when the STATUS1 register was already read (and so INT2 is not
set) but the irq was not disabled. But that is a hardware imposed problem
that cannot easily be fixed by software.

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-s35390a.c | 48 +++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index c7c1fce69635..00662dd28d66 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -35,10 +35,14 @@
 #define S35390A_ALRM_BYTE_HOURS	1
 #define S35390A_ALRM_BYTE_MINS	2
 
+/* flags for STATUS1 */
 #define S35390A_FLAG_POC	0x01
 #define S35390A_FLAG_BLD	0x02
+#define S35390A_FLAG_INT2	0x04
 #define S35390A_FLAG_24H	0x40
 #define S35390A_FLAG_RESET	0x80
+
+/* flag for STATUS2 */
 #define S35390A_FLAG_TEST	0x01
 
 #define S35390A_INT2_MODE_MASK		0xF0
@@ -408,11 +412,11 @@ static struct i2c_driver s35390a_driver;
 static int s35390a_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
-	int err;
+	int err, err_reset;
 	unsigned int i;
 	struct s35390a *s35390a;
 	struct rtc_time tm;
-	char buf[1], status1;
+	char buf, status1;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
 		err = -ENODEV;
@@ -441,29 +445,35 @@ static int s35390a_probe(struct i2c_client *client,
 		}
 	}
 
-	err = s35390a_reset(s35390a, &status1);
-	if (err < 0) {
+	err_reset = s35390a_reset(s35390a, &status1);
+	if (err_reset < 0) {
+		err = err_reset;
 		dev_err(&client->dev, "error resetting chip\n");
 		goto exit_dummy;
 	}
 
-	err = s35390a_disable_test_mode(s35390a);
-	if (err < 0) {
-		dev_err(&client->dev, "error disabling test mode\n");
-		goto exit_dummy;
-	}
-
-	err = s35390a_get_reg(s35390a, S35390A_CMD_STATUS1, buf, sizeof(buf));
-	if (err < 0) {
-		dev_err(&client->dev, "error checking 12/24 hour mode\n");
-		goto exit_dummy;
-	}
-	if (buf[0] & S35390A_FLAG_24H)
+	if (status1 & S35390A_FLAG_24H)
 		s35390a->twentyfourhour = 1;
 	else
 		s35390a->twentyfourhour = 0;
 
-	if (s35390a_get_datetime(client, &tm) < 0)
+	if (status1 & S35390A_FLAG_INT2) {
+		/* disable alarm (and maybe test mode) */
+		buf = 0;
+		err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &buf, 1);
+		if (err < 0) {
+			dev_err(&client->dev, "error disabling alarm");
+			goto exit_dummy;
+		}
+	} else {
+		err = s35390a_disable_test_mode(s35390a);
+		if (err < 0) {
+			dev_err(&client->dev, "error disabling test mode\n");
+			goto exit_dummy;
+		}
+	}
+
+	if (err_reset > 0 || s35390a_get_datetime(client, &tm) < 0)
 		dev_warn(&client->dev, "clock needs to be set\n");
 
 	device_set_wakeup_capable(&client->dev, 1);
@@ -476,6 +486,10 @@ static int s35390a_probe(struct i2c_client *client,
 		err = PTR_ERR(s35390a->rtc);
 		goto exit_dummy;
 	}
+
+	if (status1 & S35390A_FLAG_INT2)
+		rtc_update_irq(s35390a->rtc, 1, RTC_AF);
+
 	return 0;
 
 exit_dummy:

From 42462d23e60b89a3c2f7d8d63f5f4e464ba77727 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 23 Mar 2017 18:24:19 +0100
Subject: [PATCH 22/27] KVM: kvm_io_bus_unregister_dev() should never fail

commit 90db10434b163e46da413d34db8d0e77404cc645 upstream.

No caller currently checks the return value of
kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on
freeing their device. A stale reference will remain in the io_bus,
getting at least used again, when the iobus gets teared down on
kvm_destroy_vm() - leading to use after free errors.

There is nothing the callers could do, except retrying over and over
again.

So let's simply remove the bus altogether, print an error and make
sure no one can access this broken bus again (returning -ENOMEM on any
attempt to access it).

Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kvm_host.h |  4 ++--
 virt/kvm/eventfd.c       |  3 ++-
 virt/kvm/kvm_main.c      | 40 +++++++++++++++++++++++-----------------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c923350ca20a..d7ce4e3280db 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -182,8 +182,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 		    int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			      struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			       struct kvm_io_device *dev);
 
 #ifdef CONFIG_KVM_ASYNC_PF
 struct kvm_async_pf {
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 46dbc0a7dfc1..49001fa84ead 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -868,7 +868,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-		kvm->buses[bus_idx]->ioeventfd_count--;
+		if (kvm->buses[bus_idx])
+			kvm->buses[bus_idx]->ioeventfd_count--;
 		ioeventfd_release(p);
 		ret = 0;
 		break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1ac5b7be7282..cb092bd9965b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -655,7 +655,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		kvm_io_bus_destroy(kvm->buses[i]);
+		if (kvm->buses[i])
+			kvm_io_bus_destroy(kvm->buses[i]);
 		kvm->buses[i] = NULL;
 	}
 	kvm_coalesced_mmio_free(kvm);
@@ -3273,6 +3274,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 	};
 
 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return -ENOMEM;
 	r = __kvm_io_bus_write(vcpu, bus, &range, val);
 	return r < 0 ? r : 0;
 }
@@ -3290,6 +3293,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
 	};
 
 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return -ENOMEM;
 
 	/* First try the device referenced by cookie. */
 	if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3340,6 +3345,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 	};
 
 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return -ENOMEM;
 	r = __kvm_io_bus_read(vcpu, bus, &range, val);
 	return r < 0 ? r : 0;
 }
@@ -3352,6 +3359,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 	struct kvm_io_bus *new_bus, *bus;
 
 	bus = kvm->buses[bus_idx];
+	if (!bus)
+		return -ENOMEM;
+
 	/* exclude ioeventfd which is limited by maximum fd */
 	if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
 		return -ENOSPC;
@@ -3371,45 +3381,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 }
 
 /* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			      struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			       struct kvm_io_device *dev)
 {
-	int i, r;
+	int i;
 	struct kvm_io_bus *new_bus, *bus;
 
 	bus = kvm->buses[bus_idx];
-
-	/*
-	 * It's possible the bus being released before hand. If so,
-	 * we're done here.
-	 */
 	if (!bus)
-		return 0;
+		return;
 
-	r = -ENOENT;
 	for (i = 0; i < bus->dev_count; i++)
 		if (bus->range[i].dev == dev) {
-			r = 0;
 			break;
 		}
 
-	if (r)
-		return r;
+	if (i == bus->dev_count)
+		return;
 
 	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
-	if (!new_bus)
-		return -ENOMEM;
+	if (!new_bus)  {
+		pr_err("kvm: failed to shrink bus, removing it completely\n");
+		goto broken;
+	}
 
 	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
 	new_bus->dev_count--;
 	memcpy(new_bus->range + i, bus->range + i + 1,
 	       (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
 
+broken:
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
 	synchronize_srcu_expedited(&kvm->srcu);
 	kfree(bus);
-	return r;
+	return;
 }
 
 static struct notifier_block kvm_cpu_notifier = {

From 063d30f187f5c492aa4a6cca88b8afa08f5a170c Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 25 Oct 2016 11:37:59 +0200
Subject: [PATCH 23/27] power: reset: at91-poweroff: timely shutdown LPDDR
 memories

commit 0b0408745e7ff24757cbfd571d69026c0ddb803c upstream.

LPDDR memories can only handle up to 400 uncontrolled power off. Ensure the
proper power off sequence is used before shutting down the platform.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/power/reset/at91-poweroff.c | 54 ++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c
index e9e24df35f26..2579f025b90b 100644
--- a/drivers/power/reset/at91-poweroff.c
+++ b/drivers/power/reset/at91-poweroff.c
@@ -14,9 +14,12 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 
+#include <soc/at91/at91sam9_ddrsdr.h>
+
 #define AT91_SHDW_CR	0x00		/* Shut Down Control Register */
 #define AT91_SHDW_SHDW		BIT(0)			/* Shut Down command */
 #define AT91_SHDW_KEY		(0xa5 << 24)		/* KEY Password */
@@ -50,6 +53,7 @@ static const char *shdwc_wakeup_modes[] = {
 
 static void __iomem *at91_shdwc_base;
 static struct clk *sclk;
+static void __iomem *mpddrc_base;
 
 static void __init at91_wakeup_status(void)
 {
@@ -73,6 +77,29 @@ static void at91_poweroff(void)
 	writel(AT91_SHDW_KEY | AT91_SHDW_SHDW, at91_shdwc_base + AT91_SHDW_CR);
 }
 
+static void at91_lpddr_poweroff(void)
+{
+	asm volatile(
+		/* Align to cache lines */
+		".balign 32\n\t"
+
+		/* Ensure AT91_SHDW_CR is in the TLB by reading it */
+		"	ldr	r6, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t"
+
+		/* Power down SDRAM0 */
+		"	str	%1, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t"
+		/* Shutdown CPU */
+		"	str	%3, [%2, #" __stringify(AT91_SHDW_CR) "]\n\t"
+
+		"	b	.\n\t"
+		:
+		: "r" (mpddrc_base),
+		  "r" cpu_to_le32(AT91_DDRSDRC_LPDDR2_PWOFF),
+		  "r" (at91_shdwc_base),
+		  "r" cpu_to_le32(AT91_SHDW_KEY | AT91_SHDW_SHDW)
+		: "r0");
+}
+
 static int at91_poweroff_get_wakeup_mode(struct device_node *np)
 {
 	const char *pm;
@@ -124,6 +151,8 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev)
 static int __init at91_poweroff_probe(struct platform_device *pdev)
 {
 	struct resource *res;
+	struct device_node *np;
+	u32 ddr_type;
 	int ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -150,12 +179,30 @@ static int __init at91_poweroff_probe(struct platform_device *pdev)
 
 	pm_power_off = at91_poweroff;
 
+	np = of_find_compatible_node(NULL, NULL, "atmel,sama5d3-ddramc");
+	if (!np)
+		return 0;
+
+	mpddrc_base = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!mpddrc_base)
+		return 0;
+
+	ddr_type = readl(mpddrc_base + AT91_DDRSDRC_MDR) & AT91_DDRSDRC_MD;
+	if ((ddr_type == AT91_DDRSDRC_MD_LPDDR2) ||
+	    (ddr_type == AT91_DDRSDRC_MD_LPDDR3))
+		pm_power_off = at91_lpddr_poweroff;
+	else
+		iounmap(mpddrc_base);
+
 	return 0;
 }
 
 static int __exit at91_poweroff_remove(struct platform_device *pdev)
 {
-	if (pm_power_off == at91_poweroff)
+	if (pm_power_off == at91_poweroff ||
+	    pm_power_off == at91_lpddr_poweroff)
 		pm_power_off = NULL;
 
 	clk_disable_unprepare(sclk);
@@ -163,6 +210,11 @@ static int __exit at91_poweroff_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id at91_ramc_of_match[] = {
+	{ .compatible = "atmel,sama5d3-ddramc", },
+	{ /* sentinel */ }
+};
+
 static const struct of_device_id at91_poweroff_of_match[] = {
 	{ .compatible = "atmel,at91sam9260-shdwc", },
 	{ .compatible = "atmel,at91sam9rl-shdwc", },

From 2cbd78f4239bd28b86c6ff8e3b7867db72762f1a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 8 Mar 2017 07:38:05 +1100
Subject: [PATCH 24/27] blk: improve order of bio handling in
 generic_make_request()

commit 79bd99596b7305ab08109a8bf44a6a4511dbf1cd upstream.

To avoid recursion on the kernel stack when stacked block devices
are in use, generic_make_request() will, when called recursively,
queue new requests for later handling.  They will be handled when the
make_request_fn for the current bio completes.

If any bios are submitted by a make_request_fn, these will ultimately
be handled seqeuntially.  If the handling of one of those generates
further requests, they will be added to the end of the queue.

This strict first-in-first-out behaviour can lead to deadlocks in
various ways, normally because a request might need to wait for a
previous request to the same device to complete.  This can happen when
they share a mempool, and can happen due to interdependencies
particular to the device.  Both md and dm have examples where this happens.

These deadlocks can be erradicated by more selective ordering of bios.
Specifically by handling them in depth-first order.  That is: when the
handling of one bio generates one or more further bios, they are
handled immediately after the parent, before any siblings of the
parent.  That way, when generic_make_request() calls make_request_fn
for some particular device, we can be certain that all previously
submited requests for that device have been completely handled and are
not waiting for anything in the queue of requests maintained in
generic_make_request().

An easy way to achieve this would be to use a last-in-first-out stack
instead of a queue.  However this will change the order of consecutive
bios submitted by a make_request_fn, which could have unexpected consequences.
Instead we take a slightly more complex approach.
A fresh queue is created for each call to a make_request_fn.  After it completes,
any bios for a different device are placed on the front of the main queue, followed
by any bios for the same device, followed by all bios that were already on
the queue before the make_request_fn was called.
This provides the depth-first approach without reordering bios on the same level.

This, by itself, it not enough to remove all deadlocks.  It just makes
it possible for drivers to take the extra step required themselves.

To avoid deadlocks, drivers must never risk waiting for a request
after submitting one to generic_make_request.  This includes never
allocing from a mempool twice in the one call to a make_request_fn.

A common pattern in drivers is to call bio_split() in a loop, handling
the first part and then looping around to possibly split the next part.
Instead, a driver that finds it needs to split a bio should queue
(with generic_make_request) the second part, handle the first part,
and then return.  The new code in generic_make_request will ensure the
requests to underlying bios are processed first, then the second bio
that was split off.  If it splits again, the same process happens.  In
each case one bio will be completely handled before the next one is attempted.

With this is place, it should be possible to disable the
punt_bios_to_recover() recovery thread for many block devices, and
eventually it may be possible to remove it completely.

Ref: http://www.spinics.net/lists/raid/msg54680.html
Tested-by: Jinpu Wang <jinpu.wang@profitbricks.com>
Inspired-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
[jwang: backport to 4.4]
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-core.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4fab5d610805..7a58a22d1595 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2063,18 +2063,33 @@ blk_qc_t generic_make_request(struct bio *bio)
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
+			struct bio_list lower, same, hold;
+
+			/* Create a fresh bio_list for all subordinate requests */
+			hold = bio_list_on_stack;
+			bio_list_init(&bio_list_on_stack);
 
 			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
-
-			bio = bio_list_pop(current->bio_list);
+			/* sort new bios into those for a lower level
+			 * and those for the same level
+			 */
+			bio_list_init(&lower);
+			bio_list_init(&same);
+			while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+				if (q == bdev_get_queue(bio->bi_bdev))
+					bio_list_add(&same, bio);
+				else
+					bio_list_add(&lower, bio);
+			/* now assemble so we handle the lowest level first */
+			bio_list_merge(&bio_list_on_stack, &lower);
+			bio_list_merge(&bio_list_on_stack, &same);
+			bio_list_merge(&bio_list_on_stack, &hold);
 		} else {
-			struct bio *bio_next = bio_list_pop(current->bio_list);
-
 			bio_io_error(bio);
-			bio = bio_next;
 		}
+		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 

From 5cca175b6cda16b68b18967210872327b1cadf4f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 10 Mar 2017 17:00:47 +1100
Subject: [PATCH 25/27] blk: Ensure users for current->bio_list can see the
 full list.

commit f5fe1b51905df7cfe4fdfd85c5fb7bc5b71a094f upstream.

Commit 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()")
changed current->bio_list so that it did not contain *all* of the
queued bios, but only those submitted by the currently running
make_request_fn.

There are two places which walk the list and requeue selected bios,
and others that check if the list is empty.  These are no longer
correct.

So redefine current->bio_list to point to an array of two lists, which
contain all queued bios, and adjust various code to test or walk both
lists.

Signed-off-by: NeilBrown <neilb@suse.com>
Fixes: 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()")
Signed-off-by: Jens Axboe <axboe@fb.com>
[jwang: backport to 4.4]
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[bwh: Restore changes in device-mapper from upstream version]
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
---
 block/bio.c         | 12 +++++++++---
 block/blk-core.c    | 31 +++++++++++++++++++------------
 drivers/md/dm.c     | 27 +++++++++++++++------------
 drivers/md/raid1.c  |  3 ++-
 drivers/md/raid10.c |  3 ++-
 5 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 46e2cc1d4016..14263fab94d3 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -373,10 +373,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	bio_list_init(&punt);
 	bio_list_init(&nopunt);
 
-	while ((bio = bio_list_pop(current->bio_list)))
+	while ((bio = bio_list_pop(&current->bio_list[0])))
 		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+	current->bio_list[0] = nopunt;
 
-	*current->bio_list = nopunt;
+	bio_list_init(&nopunt);
+	while ((bio = bio_list_pop(&current->bio_list[1])))
+		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+	current->bio_list[1] = nopunt;
 
 	spin_lock(&bs->rescue_lock);
 	bio_list_merge(&bs->rescue_list, &punt);
@@ -464,7 +468,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		 * we retry with the original gfp_flags.
 		 */
 
-		if (current->bio_list && !bio_list_empty(current->bio_list))
+		if (current->bio_list &&
+		    (!bio_list_empty(&current->bio_list[0]) ||
+		     !bio_list_empty(&current->bio_list[1])))
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/block/blk-core.c b/block/blk-core.c
index 7a58a22d1595..ef083e7a37c5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2021,7 +2021,14 @@ end_io:
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
-	struct bio_list bio_list_on_stack;
+	/*
+	 * bio_list_on_stack[0] contains bios submitted by the current
+	 * make_request_fn.
+	 * bio_list_on_stack[1] contains bios that were submitted before
+	 * the current make_request_fn, but that haven't been processed
+	 * yet.
+	 */
+	struct bio_list bio_list_on_stack[2];
 	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (!generic_make_request_checks(bio))
@@ -2038,7 +2045,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * should be added at the tail
 	 */
 	if (current->bio_list) {
-		bio_list_add(current->bio_list, bio);
+		bio_list_add(&current->bio_list[0], bio);
 		goto out;
 	}
 
@@ -2057,17 +2064,17 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * bio_list, and call into ->make_request() again.
 	 */
 	BUG_ON(bio->bi_next);
-	bio_list_init(&bio_list_on_stack);
-	current->bio_list = &bio_list_on_stack;
+	bio_list_init(&bio_list_on_stack[0]);
+	current->bio_list = bio_list_on_stack;
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
-			struct bio_list lower, same, hold;
+			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
-			hold = bio_list_on_stack;
-			bio_list_init(&bio_list_on_stack);
+			bio_list_on_stack[1] = bio_list_on_stack[0];
+			bio_list_init(&bio_list_on_stack[0]);
 
 			ret = q->make_request_fn(q, bio);
 
@@ -2077,19 +2084,19 @@ blk_qc_t generic_make_request(struct bio *bio)
 			 */
 			bio_list_init(&lower);
 			bio_list_init(&same);
-			while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
 				if (q == bdev_get_queue(bio->bi_bdev))
 					bio_list_add(&same, bio);
 				else
 					bio_list_add(&lower, bio);
 			/* now assemble so we handle the lowest level first */
-			bio_list_merge(&bio_list_on_stack, &lower);
-			bio_list_merge(&bio_list_on_stack, &same);
-			bio_list_merge(&bio_list_on_stack, &hold);
+			bio_list_merge(&bio_list_on_stack[0], &lower);
+			bio_list_merge(&bio_list_on_stack[0], &same);
+			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
 			bio_io_error(bio);
 		}
-		bio = bio_list_pop(current->bio_list);
+		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 397f0454100b..320eb3c4bb6b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1481,26 +1481,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 	struct dm_offload *o = container_of(cb, struct dm_offload, cb);
 	struct bio_list list;
 	struct bio *bio;
+	int i;
 
 	INIT_LIST_HEAD(&o->cb.list);
 
 	if (unlikely(!current->bio_list))
 		return;
 
-	list = *current->bio_list;
-	bio_list_init(current->bio_list);
+	for (i = 0; i < 2; i++) {
+		list = current->bio_list[i];
+		bio_list_init(&current->bio_list[i]);
 
-	while ((bio = bio_list_pop(&list))) {
-		struct bio_set *bs = bio->bi_pool;
-		if (unlikely(!bs) || bs == fs_bio_set) {
-			bio_list_add(current->bio_list, bio);
-			continue;
+		while ((bio = bio_list_pop(&list))) {
+			struct bio_set *bs = bio->bi_pool;
+			if (unlikely(!bs) || bs == fs_bio_set) {
+				bio_list_add(&current->bio_list[i], bio);
+				continue;
+			}
+
+			spin_lock(&bs->rescue_lock);
+			bio_list_add(&bs->rescue_list, bio);
+			queue_work(bs->rescue_workqueue, &bs->rescue_work);
+			spin_unlock(&bs->rescue_lock);
 		}
-
-		spin_lock(&bs->rescue_lock);
-		bio_list_add(&bs->rescue_list, bio);
-		queue_work(bs->rescue_workqueue, &bs->rescue_work);
-		spin_unlock(&bs->rescue_lock);
 	}
 }
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 515554c7365b..9be39988bf06 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -877,7 +877,8 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
 				     ((conf->start_next_window <
 				       conf->next_resync + RESYNC_SECTORS) &&
 				      current->bio_list &&
-				      !bio_list_empty(current->bio_list))),
+				     (!bio_list_empty(&current->bio_list[0]) ||
+				      !bio_list_empty(&current->bio_list[1])))),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a92979e704e3..e5ee4e9e0ea5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -946,7 +946,8 @@ static void wait_barrier(struct r10conf *conf)
 				    !conf->barrier ||
 				    (conf->nr_pending &&
 				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
+				     (!bio_list_empty(&current->bio_list[0]) ||
+				      !bio_list_empty(&current->bio_list[1]))),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 	}

From 84bd21a708b83a24d26cd0010ea94106c96557de Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 23 Mar 2017 12:24:43 +0100
Subject: [PATCH 26/27] padata: avoid race in reordering

commit de5540d088fe97ad583cc7d396586437b32149a5 upstream.

Under extremely heavy uses of padata, crashes occur, and with list
debugging turned on, this happens instead:

[87487.298728] WARNING: CPU: 1 PID: 882 at lib/list_debug.c:33
__list_add+0xae/0x130
[87487.301868] list_add corruption. prev->next should be next
(ffffb17abfc043d0), but was ffff8dba70872c80. (prev=ffff8dba70872b00).
[87487.339011]  [<ffffffff9a53d075>] dump_stack+0x68/0xa3
[87487.342198]  [<ffffffff99e119a1>] ? console_unlock+0x281/0x6d0
[87487.345364]  [<ffffffff99d6b91f>] __warn+0xff/0x140
[87487.348513]  [<ffffffff99d6b9aa>] warn_slowpath_fmt+0x4a/0x50
[87487.351659]  [<ffffffff9a58b5de>] __list_add+0xae/0x130
[87487.354772]  [<ffffffff9add5094>] ? _raw_spin_lock+0x64/0x70
[87487.357915]  [<ffffffff99eefd66>] padata_reorder+0x1e6/0x420
[87487.361084]  [<ffffffff99ef0055>] padata_do_serial+0xa5/0x120

padata_reorder calls list_add_tail with the list to which its adding
locked, which seems correct:

spin_lock(&squeue->serial.lock);
list_add_tail(&padata->list, &squeue->serial.list);
spin_unlock(&squeue->serial.lock);

This therefore leaves only place where such inconsistency could occur:
if padata->list is added at the same time on two different threads.
This pdata pointer comes from the function call to
padata_get_next(pd), which has in it the following block:

next_queue = per_cpu_ptr(pd->pqueue, cpu);
padata = NULL;
reorder = &next_queue->reorder;
if (!list_empty(&reorder->list)) {
       padata = list_entry(reorder->list.next,
                           struct padata_priv, list);
       spin_lock(&reorder->lock);
       list_del_init(&padata->list);
       atomic_dec(&pd->reorder_objects);
       spin_unlock(&reorder->lock);

       pd->processed++;

       goto out;
}
out:
return padata;

I strongly suspect that the problem here is that two threads can race
on reorder list. Even though the deletion is locked, call to
list_entry is not locked, which means it's feasible that two threads
pick up the same padata object and subsequently call list_add_tail on
them at the same time. The fix is thus be hoist that lock outside of
that block.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/padata.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index b38bea9c466a..401227e3967c 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -189,19 +189,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 
 	reorder = &next_queue->reorder;
 
+	spin_lock(&reorder->lock);
 	if (!list_empty(&reorder->list)) {
 		padata = list_entry(reorder->list.next,
 				    struct padata_priv, list);
 
-		spin_lock(&reorder->lock);
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
-		spin_unlock(&reorder->lock);
 
 		pd->processed++;
 
+		spin_unlock(&reorder->lock);
 		goto out;
 	}
+	spin_unlock(&reorder->lock);
 
 	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
 		padata = ERR_PTR(-ENODATA);

From 8f8ee9706b0a64a3506b9d9789ace7c44f3d817d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 8 Apr 2017 09:53:53 +0200
Subject: [PATCH 27/27] Linux 4.4.60

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 083724c6ca4d..fb7c2b40753d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 59
+SUBLEVEL = 60
 EXTRAVERSION =
 NAME = Blurry Fish Butt