Merge tag 'mvebu-soc-3.16-2' of git://git.infradead.org/linux-mvebu into next/soc
Merge "mvebu SoC changes for v3.16 (incremental #2)" from Jason Cooper <jason@lakedaemon.net>: - mvebu - fix coherency on big-endian in -next - hardware IO coherency - L2/PCIe deadlock workaround - small coherency cleanups * tag 'mvebu-soc-3.16-2' of git://git.infradead.org/linux-mvebu: ARM: mvebu: returns ll_get_cpuid() to ll_get_coherency_cpumask() ARM: mvebu: improve comments in coherency_ll.S ARM: mvebu: fix indentation of assembly instructions in coherency_ll.S ARM: mvebu: fix big endian booting after coherency code rework ARM: mvebu: coherency: fix registration of PCI bus notifier when !PCI ARM: mvebu: implement L2/PCIe deadlock workaround ARM: mvebu: use hardware I/O coherency also for PCI devices Signed-off-by: Arnd Bergmann <arnd@arndb.de>
This commit is contained in:
commit
37f5f4e173
2 changed files with 106 additions and 33 deletions
|
|
@ -29,8 +29,10 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/mbus.h>
|
#include <linux/mbus.h>
|
||||||
#include <linux/clk.h>
|
#include <linux/clk.h>
|
||||||
|
#include <linux/pci.h>
|
||||||
#include <asm/smp_plat.h>
|
#include <asm/smp_plat.h>
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
|
#include <asm/mach/map.h>
|
||||||
#include "armada-370-xp.h"
|
#include "armada-370-xp.h"
|
||||||
#include "coherency.h"
|
#include "coherency.h"
|
||||||
#include "mvebu-soc-id.h"
|
#include "mvebu-soc-id.h"
|
||||||
|
|
@ -274,8 +276,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = {
|
||||||
.set_dma_mask = arm_dma_set_mask,
|
.set_dma_mask = arm_dma_set_mask,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int mvebu_hwcc_platform_notifier(struct notifier_block *nb,
|
static int mvebu_hwcc_notifier(struct notifier_block *nb,
|
||||||
unsigned long event, void *__dev)
|
unsigned long event, void *__dev)
|
||||||
{
|
{
|
||||||
struct device *dev = __dev;
|
struct device *dev = __dev;
|
||||||
|
|
||||||
|
|
@ -286,8 +288,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb,
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct notifier_block mvebu_hwcc_platform_nb = {
|
static struct notifier_block mvebu_hwcc_nb = {
|
||||||
.notifier_call = mvebu_hwcc_platform_notifier,
|
.notifier_call = mvebu_hwcc_notifier,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void __init armada_370_coherency_init(struct device_node *np)
|
static void __init armada_370_coherency_init(struct device_node *np)
|
||||||
|
|
@ -308,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np)
|
||||||
set_cpu_coherent();
|
set_cpu_coherent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This ioremap hook is used on Armada 375/38x to ensure that PCIe
|
||||||
|
* memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
|
||||||
|
* is needed as a workaround for a deadlock issue between the PCIe
|
||||||
|
* interface and the cache controller.
|
||||||
|
*/
|
||||||
|
static void __iomem *
|
||||||
|
armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
|
||||||
|
unsigned int mtype, void *caller)
|
||||||
|
{
|
||||||
|
struct resource pcie_mem;
|
||||||
|
|
||||||
|
mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
|
||||||
|
|
||||||
|
if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
|
||||||
|
mtype = MT_UNCACHED;
|
||||||
|
|
||||||
|
return __arm_ioremap_caller(phys_addr, size, mtype, caller);
|
||||||
|
}
|
||||||
|
|
||||||
static void __init armada_375_380_coherency_init(struct device_node *np)
|
static void __init armada_375_380_coherency_init(struct device_node *np)
|
||||||
{
|
{
|
||||||
|
struct device_node *cache_dn;
|
||||||
|
|
||||||
coherency_cpu_base = of_iomap(np, 0);
|
coherency_cpu_base = of_iomap(np, 0);
|
||||||
|
arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add the PL310 property "arm,io-coherent". This makes sure the
|
||||||
|
* outer sync operation is not used, which allows to
|
||||||
|
* workaround the system erratum that causes deadlocks when
|
||||||
|
* doing PCIe in an SMP situation on Armada 375 and Armada
|
||||||
|
* 38x.
|
||||||
|
*/
|
||||||
|
for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
|
||||||
|
struct property *p;
|
||||||
|
|
||||||
|
p = kzalloc(sizeof(*p), GFP_KERNEL);
|
||||||
|
p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
|
||||||
|
of_add_property(cache_dn, p);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coherency_type(void)
|
static int coherency_type(void)
|
||||||
|
|
@ -375,9 +415,21 @@ static int __init coherency_late_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
bus_register_notifier(&platform_bus_type,
|
bus_register_notifier(&platform_bus_type,
|
||||||
&mvebu_hwcc_platform_nb);
|
&mvebu_hwcc_nb);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
postcore_initcall(coherency_late_init);
|
postcore_initcall(coherency_late_init);
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_PCI)
|
||||||
|
static int __init coherency_pci_init(void)
|
||||||
|
{
|
||||||
|
if (coherency_available())
|
||||||
|
bus_register_notifier(&pci_bus_type,
|
||||||
|
&mvebu_hwcc_nb);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
arch_initcall(coherency_pci_init);
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -24,52 +24,69 @@
|
||||||
#include <asm/cp15.h>
|
#include <asm/cp15.h>
|
||||||
|
|
||||||
.text
|
.text
|
||||||
/* Returns with the coherency address in r1 (r0 is untouched)*/
|
/* Returns the coherency base address in r1 (r0 is untouched) */
|
||||||
ENTRY(ll_get_coherency_base)
|
ENTRY(ll_get_coherency_base)
|
||||||
mrc p15, 0, r1, c1, c0, 0
|
mrc p15, 0, r1, c1, c0, 0
|
||||||
tst r1, #CR_M @ Check MMU bit enabled
|
tst r1, #CR_M @ Check MMU bit enabled
|
||||||
bne 1f
|
bne 1f
|
||||||
|
|
||||||
/* use physical address of the coherency register */
|
/*
|
||||||
|
* MMU is disabled, use the physical address of the coherency
|
||||||
|
* base address.
|
||||||
|
*/
|
||||||
adr r1, 3f
|
adr r1, 3f
|
||||||
ldr r3, [r1]
|
ldr r3, [r1]
|
||||||
ldr r1, [r1, r3]
|
ldr r1, [r1, r3]
|
||||||
b 2f
|
b 2f
|
||||||
1:
|
1:
|
||||||
/* use virtual address of the coherency register */
|
/*
|
||||||
|
* MMU is enabled, use the virtual address of the coherency
|
||||||
|
* base address.
|
||||||
|
*/
|
||||||
ldr r1, =coherency_base
|
ldr r1, =coherency_base
|
||||||
ldr r1, [r1]
|
ldr r1, [r1]
|
||||||
2:
|
2:
|
||||||
mov pc, lr
|
mov pc, lr
|
||||||
ENDPROC(ll_get_coherency_base)
|
ENDPROC(ll_get_coherency_base)
|
||||||
|
|
||||||
/* Returns with the CPU ID in r3 (r0 is untouched)*/
|
/*
|
||||||
ENTRY(ll_get_cpuid)
|
* Returns the coherency CPU mask in r3 (r0 is untouched). This
|
||||||
|
* coherency CPU mask can be used with the coherency fabric
|
||||||
|
* configuration and control registers. Note that the mask is already
|
||||||
|
* endian-swapped as appropriate so that the calling functions do not
|
||||||
|
* have to care about endianness issues while accessing the coherency
|
||||||
|
* fabric registers
|
||||||
|
*/
|
||||||
|
ENTRY(ll_get_coherency_cpumask)
|
||||||
mrc 15, 0, r3, cr0, cr0, 5
|
mrc 15, 0, r3, cr0, cr0, 5
|
||||||
and r3, r3, #15
|
and r3, r3, #15
|
||||||
mov r2, #(1 << 24)
|
mov r2, #(1 << 24)
|
||||||
lsl r3, r2, r3
|
lsl r3, r2, r3
|
||||||
ARM_BE8(rev r1, r1)
|
ARM_BE8(rev r3, r3)
|
||||||
mov pc, lr
|
mov pc, lr
|
||||||
ENDPROC(ll_get_cpuid)
|
ENDPROC(ll_get_coherency_cpumask)
|
||||||
|
|
||||||
/* ll_add_cpu_to_smp_group, ll_enable_coherency and
|
/*
|
||||||
* ll_disable_coherency use strex/ldrex whereas MMU can be off. The
|
* ll_add_cpu_to_smp_group(), ll_enable_coherency() and
|
||||||
* Armada XP SoC has an exclusive monitor that can track transactions
|
* ll_disable_coherency() use the strex/ldrex instructions while the
|
||||||
* to Device and/or SO and as such also when MMU is disabled the
|
* MMU can be disabled. The Armada XP SoC has an exclusive monitor
|
||||||
* exclusive transactions will be functional
|
* that tracks transactions to Device and/or SO memory and thanks to
|
||||||
|
* that, exclusive transactions are functional even when the MMU is
|
||||||
|
* disabled.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ENTRY(ll_add_cpu_to_smp_group)
|
ENTRY(ll_add_cpu_to_smp_group)
|
||||||
/*
|
/*
|
||||||
* r0 being untouched in ll_get_coherency_base and
|
* As r0 is not modified by ll_get_coherency_base() and
|
||||||
* ll_get_cpuid, we can use it to save lr modifing it with the
|
* ll_get_coherency_cpumask(), we use it to temporarly save lr
|
||||||
* following bl
|
* and avoid it being modified by the branch and link
|
||||||
|
* calls. This function is used very early in the secondary
|
||||||
|
* CPU boot, and no stack is available at this point.
|
||||||
*/
|
*/
|
||||||
mov r0, lr
|
mov r0, lr
|
||||||
bl ll_get_coherency_base
|
bl ll_get_coherency_base
|
||||||
bl ll_get_cpuid
|
bl ll_get_coherency_cpumask
|
||||||
mov lr, r0
|
mov lr, r0
|
||||||
add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
|
add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
|
||||||
1:
|
1:
|
||||||
ldrex r2, [r0]
|
ldrex r2, [r0]
|
||||||
|
|
@ -82,13 +99,15 @@ ENDPROC(ll_add_cpu_to_smp_group)
|
||||||
|
|
||||||
ENTRY(ll_enable_coherency)
|
ENTRY(ll_enable_coherency)
|
||||||
/*
|
/*
|
||||||
* r0 being untouched in ll_get_coherency_base and
|
* As r0 is not modified by ll_get_coherency_base() and
|
||||||
* ll_get_cpuid, we can use it to save lr modifing it with the
|
* ll_get_coherency_cpumask(), we use it to temporarly save lr
|
||||||
* following bl
|
* and avoid it being modified by the branch and link
|
||||||
|
* calls. This function is used very early in the secondary
|
||||||
|
* CPU boot, and no stack is available at this point.
|
||||||
*/
|
*/
|
||||||
mov r0, lr
|
mov r0, lr
|
||||||
bl ll_get_coherency_base
|
bl ll_get_coherency_base
|
||||||
bl ll_get_cpuid
|
bl ll_get_coherency_cpumask
|
||||||
mov lr, r0
|
mov lr, r0
|
||||||
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
|
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
|
||||||
1:
|
1:
|
||||||
|
|
@ -104,14 +123,16 @@ ENDPROC(ll_enable_coherency)
|
||||||
|
|
||||||
ENTRY(ll_disable_coherency)
|
ENTRY(ll_disable_coherency)
|
||||||
/*
|
/*
|
||||||
* r0 being untouched in ll_get_coherency_base and
|
* As r0 is not modified by ll_get_coherency_base() and
|
||||||
* ll_get_cpuid, we can use it to save lr modifing it with the
|
* ll_get_coherency_cpumask(), we use it to temporarly save lr
|
||||||
* following bl
|
* and avoid it being modified by the branch and link
|
||||||
|
* calls. This function is used very early in the secondary
|
||||||
|
* CPU boot, and no stack is available at this point.
|
||||||
*/
|
*/
|
||||||
mov r0, lr
|
mov r0, lr
|
||||||
bl ll_get_coherency_base
|
bl ll_get_coherency_base
|
||||||
bl ll_get_cpuid
|
bl ll_get_coherency_cpumask
|
||||||
mov lr, r0
|
mov lr, r0
|
||||||
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
|
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
|
||||||
1:
|
1:
|
||||||
ldrex r2, [r0]
|
ldrex r2, [r0]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue