diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 4ff8a9379ba9..cb04b3baaa33 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -94,6 +94,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2454944 | ARM64_ERRATUM_2454944 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1196fe7686a7..90a8f9925ce8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -713,6 +713,36 @@ config ARM64_ERRATUM_2067961 If unsure, say Y. +config ARM64_ERRATUM_2454944 + bool "Cortex-A510: 2454944: Unmodified cache line might be written back to memory" + select ARCH_HAS_TEARDOWN_DMA_OPS + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2454944. + + Affected Cortex-A510 core might write unmodified cache lines back to + memory, which breaks the assumptions upon which software coherency + management for non-coherent DMA relies. If a cache line is + speculatively fetched while a non-coherent device is writing directly + to DRAM, and subsequently written back by natural eviction, data + written by the device in the intervening period can be lost. + + The workaround is to enforce as far as reasonably possible that all + non-coherent DMA transfers are bounced and/or remapped to minimise + the chance that any Cacheable alias exists through which speculative + cache fills could occur. + + This is quite involved and has unavoidable performance impact on + affected systems. + +config ARM64_ERRATUM_2454944_DEBUG + bool "Extra debug checks for Cortex-A510 2454944" + depends on ARM64_ERRATUM_2454944 + default y + help + Enable additional checks and warnings to detect and mitigate driver + bugs breaking the remapping workaround. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ed39d674de5f..cbe87b1bc4a8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -72,7 +72,8 @@ #define ARM64_WORKAROUND_TSB_FLUSH_FAILURE 61 #define ARM64_SPECTRE_BHB 62 -/* kabi: reserve 63 - 76 for future cpu capabilities */ +/* kabi: reserve 63 - 74 for future cpu capabilities */ +#define ARM64_WORKAROUND_NO_DMA_ALIAS 75 #define ARM64_NCAPS 76 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index ed1b9dcf12b2..ced097af888d 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -43,6 +43,19 @@ typedef struct page *pgtable_t; extern int pfn_valid(unsigned long); +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG +#include + +void page_check_nc(struct page *page, int order); + +static inline void arch_free_page(struct page *page, int order) +{ + if (cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) + page_check_nc(page, order); +} +#define HAVE_ARCH_FREE_PAGE +#endif + #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 2ca708ab9b20..b5a0614e48c0 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -1,4 +1,8 @@ #ifndef _ASM_ARM64_VMALLOC_H #define _ASM_ARM64_VMALLOC_H +#include + +#define arch_disable_lazy_vunmap cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS) + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 57ece7965bd6..d4848a3f88c4 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -555,6 +555,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2454944 + { + .desc = "ARM erratum 2454944", + .capability = ARM64_WORKAROUND_NO_DMA_ALIAS, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), + MIDR_FIXED(MIDR_CPU_VAR_REV(1, 1), BIT(25)), + }, #endif { } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 047eac4046e0..1431d840d119 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -125,9 +125,13 @@ SYM_CODE_END(primary_entry) SYM_CODE_START_LOCAL(preserve_boot_args) mov x21, x0 // x21=FDT - adr_l x0, boot_args // record the contents of - stp x21, x1, [x0] // x0 .. x3 at kernel entry - stp x2, x3, [x0, #16] + adr_l x0, boot_args +#ifdef CONFIG_ARM64_ERRATUM_2454944 + dc ivac, x0 // Cortex-A510 CWG is 64 bytes, so plenty + dsb sy +#endif + stp x21, x1, [x0] // record the contents of + stp x2, x3, [x0, #16] // x0 .. x3 at kernel entry dmb sy // needed before dc ivac with // MMU off @@ -282,8 +286,17 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * the kernel image, and thus are clean to the PoC per the boot * protocol. */ +#ifndef CONFIG_ARM64_ERRATUM_2454944 adrp x0, init_pg_dir adrp x1, init_pg_end +#else + /* + * However if we can't even trust "clean" cache lines shadowing rodata, + * then nuke the entire image. It's the only way to be sure. + */ + adrp x0, _text + adrp x1, _end +#endif sub x1, x1, x0 bl __inval_dcache_area @@ -531,6 +544,10 @@ SYM_FUNC_END(init_kernel_el) */ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) adr_l x1, __boot_cpu_mode +#ifdef CONFIG_ARM64_ERRATUM_2454944 + dc ivac, x1 + dsb sy +#endif cmp w0, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e75e5e75b192..bd46ad81d061 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -31,7 +31,599 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } +#ifdef CONFIG_ARM64_ERRATUM_2454944 +#include +#include +#include + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG +#include + +static atomic_t pages_remapped; + +static int __init remap_debugfs_register(void) +{ + debugfs_create_atomic_t("pages_remapped_nc", 0444, NULL, &pages_remapped); + return 0; +} +arch_initcall(remap_debugfs_register); +#endif + +/* + * Nobody should be using these software bits on linear map addresses, right? + * This is categorically the worst, but oh well, needs must... + */ +#define REFCOUNT_INC (1UL << 55) +#define PTE_REFCOUNT(pte) (((pte) >> 55) & 0xf) + +static int pte_set_nc(pte_t *ptep, unsigned long addr, void *data) +{ + pteval_t old_pte, new_pte, pte; + unsigned int refcount; + + pte = pte_val(READ_ONCE(*ptep)); + do { + /* Avoid racing against the transient invalid state */ + old_pte = pte | PTE_VALID; + new_pte = old_pte + REFCOUNT_INC; + refcount = PTE_REFCOUNT(pte); + if (WARN_ON(refcount == 15)) + return -EINVAL; + if (refcount == 0) { + new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); + new_pte |= PTE_ATTRINDX(MT_NORMAL_NC); + } + pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); + } while (pte != old_pte); + + *(unsigned int *)data = refcount; + if (refcount) + return 0; + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG + atomic_inc(&pages_remapped); +#endif + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); + return 0; +} + +static int pte_clear_nc(pte_t *ptep, unsigned long addr, void *data) +{ + pteval_t old_pte, new_pte, pte; + unsigned int refcount; + + pte = pte_val(READ_ONCE(*ptep)); + do { + old_pte = pte | PTE_VALID; + new_pte = old_pte - REFCOUNT_INC; + refcount = PTE_REFCOUNT(pte); + if (WARN_ON(refcount == 0)) + return -EINVAL; + if (refcount == 1) { + new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID); + new_pte |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + } + pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte); + } while (pte != old_pte); + + if (refcount > 1) + return 0; + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG + atomic_dec(&pages_remapped); +#endif + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID)); + return 0; +} + +static int set_nc(void *addr, size_t size) +{ + unsigned int count; + int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, + size, pte_set_nc, &count); + WARN_RATELIMIT(IS_ENABLED(CONFIG_ARM64_ERRATUM_2454944_DEBUG) && + count == 0 && page_mapped(virt_to_page(addr)), + "changing linear mapping but cacheable aliases may still exist\n"); + dsb(ishst); + isb(); + __flush_dcache_area(addr, size); + return ret; +} + +static int clear_nc(void *addr, size_t size) +{ + int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr, + size, pte_clear_nc, NULL); + dsb(ishst); + isb(); + __inval_dcache_area(addr, size); + return ret; +} + +#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG +void page_check_nc(struct page *page, int order) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + unsigned long addr = (unsigned long)page_address(page); + int i, j; + + pgdp = pgd_offset(&init_mm, addr); + pgd = READ_ONCE(*pgdp); + + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + + for (i = 0; i < (1 << order); i++) { + ptep = pte_offset_map(pmdp, addr); + pte = READ_ONCE(*ptep); + j = PTE_REFCOUNT(pte_val(pte)); + WARN(j, "Non-Cacheable page leaked! I'm fixing it up but it means you have a bug elsewhere"); + while (j--) + pte_clear_nc(ptep, addr, NULL); + addr += PAGE_SIZE; + } +} +#endif /* CONFIG_ARM64_ERRATUM_2454944_DEBUG */ + +static phys_addr_t __arm64_noalias_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, + unsigned long attrs, bool bounce) +{ + bounce = bounce || (phys | size) & ~PAGE_MASK; + if (bounce) { + phys = swiotlb_tbl_map_single(dev, phys, size, PAGE_ALIGN(size), + dir, attrs); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + } + if (set_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size))) + goto out_unmap; + + return phys; +out_unmap: + if (bounce) + swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); + return DMA_MAPPING_ERROR; +} + +static void __arm64_noalias_unmap(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + clear_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size)); + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, attrs); +} + +static void __arm64_noalias_sync_for_device(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir) +{ + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE); + else + arch_sync_dma_for_device(phys, size, dir); +} + +static void __arm64_noalias_sync_for_cpu(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir) +{ + if (is_swiotlb_buffer(phys)) + swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU); + else + arch_sync_dma_for_cpu(phys, size, dir); +} + +static void *arm64_noalias_alloc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) +{ + struct page *page; + void *ret; + + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + + size = PAGE_ALIGN(size); + page = dma_direct_alloc_pages(dev, size, dma_addr, 0, gfp & ~__GFP_ZERO); + if (!page) + return NULL; + + ret = page_address(page); + if (set_nc(ret, size)) { + dma_direct_free_pages(dev, size, page, *dma_addr, 0); + return NULL; + } + return ret; +} + +static void arm64_noalias_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + size = PAGE_ALIGN(size); + clear_nc(cpu_addr, size); + dma_direct_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); +} + +static dma_addr_t arm64_noalias_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + bool bounce = !dma_capable(dev, phys_to_dma(dev, phys), size, true); + + if (!bounce && dir == DMA_TO_DEVICE) { + arch_sync_dma_for_device(phys, size, dir); + return phys_to_dma(dev, phys); + } + + bounce = bounce || page_mapped(page); + phys = __arm64_noalias_map(dev, phys, size, dir, attrs, bounce); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + return phys_to_dma(dev, phys); +} + +static void arm64_noalias_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + if (dir == DMA_TO_DEVICE) + return; + __arm64_noalias_unmap(dev, dma_to_phys(dev, dma_addr), size, dir, attrs); +} + +static void arm64_noalias_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + if (dir == DMA_TO_DEVICE) + return; + for_each_sg (sgl, sg, nents, i) + __arm64_noalias_unmap(dev, dma_to_phys(dev, sg->dma_address), + sg->length, dir, attrs); +} + +static int arm64_noalias_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = arm64_noalias_map_page(dev, sg_page(sg), sg->offset, + sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) + goto out_unmap; + sg->dma_length = sg->length; + } + + return nents; + +out_unmap: + arm64_noalias_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return 0; +} + +static void arm64_noalias_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + __arm64_noalias_sync_for_device(dev, dma_to_phys(dev, addr), size, dir); +} + +static void arm64_noalias_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + __arm64_noalias_sync_for_cpu(dev, dma_to_phys(dev, addr), size, dir); +} + +static void arm64_noalias_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arm64_noalias_sync_single_for_device(dev, sg->dma_address, sg->length, dir); +} + +static void arm64_noalias_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + arm64_noalias_sync_single_for_cpu(dev, sg->dma_address, sg->length, dir); +} + +static const struct dma_map_ops arm64_noalias_ops = { + .alloc = arm64_noalias_alloc, + .free = arm64_noalias_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .map_page = arm64_noalias_map_page, + .unmap_page = arm64_noalias_unmap_page, + .map_sg = arm64_noalias_map_sg, + .unmap_sg = arm64_noalias_unmap_sg, + .sync_single_for_cpu = arm64_noalias_sync_single_for_cpu, + .sync_single_for_device = arm64_noalias_sync_single_for_device, + .sync_sg_for_cpu = arm64_noalias_sync_sg_for_cpu, + .sync_sg_for_device = arm64_noalias_sync_sg_for_device, + .dma_supported = dma_direct_supported, + .get_required_mask = dma_direct_get_required_mask, + .max_mapping_size = swiotlb_max_mapping_size, +}; + #ifdef CONFIG_IOMMU_DMA +static const struct dma_map_ops *iommu_dma_ops; + +static void *arm64_iommu_alloc(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs) +{ + struct page **pages; + void *ret; + int i; + + size = PAGE_ALIGN(size); + if (!gfpflags_allow_blocking(gfp) || (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { + ret = dma_common_alloc_pages(dev, size, dma_addr, 0, gfp); + return ret ? page_address(ret) : NULL; + } + + ret = iommu_dma_ops->alloc(dev, size, dma_addr, gfp, attrs); + if (ret) { + pages = dma_common_find_pages(ret); + for (i = 0; i < size / PAGE_SIZE; i++) + if (set_nc(page_address(pages[i]), PAGE_SIZE)) + goto err; + } + return ret; + +err: + while (i--) + clear_nc(page_address(pages[i]), PAGE_SIZE); + iommu_dma_ops->free(dev, size, ret, *dma_addr, attrs); + return NULL; +} + +static void arm64_iommu_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) +{ + struct page **pages = dma_common_find_pages(cpu_addr); + int i; + + size = PAGE_ALIGN(size); + if (!pages) + return dma_common_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0); + + for (i = 0; i < size / PAGE_SIZE; i++) + clear_nc(page_address(pages[i]), PAGE_SIZE); + iommu_dma_ops->free(dev, size, cpu_addr, dma_addr, attrs); +} + +static dma_addr_t arm64_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t ret; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs); + + phys = __arm64_noalias_map(dev, phys, size, dir, attrs, page_mapped(page)); + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + attrs |= DMA_ATTR_SKIP_CPU_SYNC; + ret = iommu_dma_ops->map_page(dev, phys_to_page(phys), offset_in_page(phys), + size, dir, attrs); + if (ret == DMA_MAPPING_ERROR) + __arm64_noalias_unmap(dev, phys, size, dir, attrs); + return ret; +} + +static void arm64_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs); + + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + __arm64_noalias_unmap(dev, phys, size, dir, attrs); +} + +static int arm64_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + int i, ret; + struct scatterlist *sg; + phys_addr_t *orig_phys; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs); + + orig_phys = kmalloc_array(nents, sizeof(*orig_phys), GFP_ATOMIC); + if (!orig_phys) + return 0; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = sg_phys(sg); + /* + * Note we do not have the page_mapped() check here, since + * bouncing plays complete havoc with dma-buf imports. Those + * may well be mapped in userspace, but we hope and pray that + * it's via dma_mmap_attrs() so any such mappings are safely + * non-cacheable. DO NOT allow a block device or other similar + * scatterlist user to get here (disable IOMMUs if necessary), + * since we can't mitigate for both conflicting use-cases. + */ + phys = __arm64_noalias_map(dev, phys, sg->length, dir, attrs, false); + if (phys == DMA_MAPPING_ERROR) + goto out_unmap; + + orig_phys[i] = sg_phys(sg); + sg_assign_page(sg, phys_to_page(phys)); + sg->offset = offset_in_page(phys); + } + ret = iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + if (ret <= 0) + goto out_unmap; + + for_each_sg(sgl, sg, nents, i) { + sg_assign_page(sg, phys_to_page(orig_phys[i])); + sg->offset = offset_in_page(orig_phys[i]); + } + + kfree(orig_phys); + return ret; + +out_unmap: + for_each_sg(sgl, sg, nents, i) { + __arm64_noalias_unmap(dev, sg_phys(sg), sg->length, dir, attrs); + sg_assign_page(sg, phys_to_page(orig_phys[i])); + sg->offset = offset_in_page(orig_phys[i]); + } + kfree(orig_phys); + return 0; +} + +static void arm64_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain; + struct scatterlist *sg, *tmp; + dma_addr_t iova; + int i; + + if (dir == DMA_TO_DEVICE) + return iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs); + + domain = iommu_get_dma_domain(dev); + iova = sgl->dma_address; + tmp = sgl; + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_unmap(dev, phys, sg->length, dir, attrs); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } + iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); +} + +static void arm64_iommu_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + + __arm64_noalias_sync_for_device(dev, phys, size, dir); +} + +static void arm64_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr); + + __arm64_noalias_sync_for_cpu(dev, phys, size, dir); +} + +static void arm64_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct scatterlist *sg, *tmp = sgl; + dma_addr_t iova = sgl->dma_address; + int i; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_sync_for_device(dev, phys, sg->length, dir); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } +} + +static void arm64_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct scatterlist *sg, *tmp = sgl; + dma_addr_t iova = sgl->dma_address; + int i; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova); + + __arm64_noalias_sync_for_cpu(dev, phys, sg->length, dir); + iova += sg->length; + if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) { + tmp = sg_next(tmp); + iova = tmp->dma_address; + } + } +} + +static struct dma_map_ops arm64_iommu_ops = { + .alloc = arm64_iommu_alloc, + .free = arm64_iommu_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, + .map_page = arm64_iommu_map_page, + .unmap_page = arm64_iommu_unmap_page, + .map_sg = arm64_iommu_map_sg, + .unmap_sg = arm64_iommu_unmap_sg, + .sync_single_for_cpu = arm64_iommu_sync_single_for_cpu, + .sync_single_for_device = arm64_iommu_sync_single_for_device, + .sync_sg_for_cpu = arm64_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm64_iommu_sync_sg_for_device, +}; + +static void arm64_init_iommu_ops(struct device *dev) +{ + const struct dma_map_ops *ops = dev->dma_ops; + + dev->dma_ops = &arm64_iommu_ops; + if (iommu_dma_ops) + return; + + iommu_dma_ops = ops; + arm64_iommu_ops.mmap = ops->mmap; + arm64_iommu_ops.get_sgtable = ops->get_sgtable; + arm64_iommu_ops.map_resource = ops->map_resource; + arm64_iommu_ops.unmap_resource = ops->unmap_resource; + arm64_iommu_ops.get_merge_boundary = ops->get_merge_boundary; +} +#endif /* CONFIG_IOMMU_DMA */ +#endif /* CONFIG_ARM64_ERRATUM_2454944*/ + +#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS void arch_teardown_dma_ops(struct device *dev) { dev->dma_ops = NULL; @@ -42,6 +634,14 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { int cls = cache_line_size_of_cpu(); +#ifdef CONFIG_ARM64_ERRATUM_2454944 + bool noalias = !coherent && cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS); + + if (noalias) { + dev_info(dev, "applying no-alias DMA workaround\n"); + dev->dma_ops = &arm64_noalias_ops; + } +#endif WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, @@ -55,6 +655,10 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, trace_android_vh_iommu_setup_dma_ops(dev, dma_base, size); trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size); } +#if defined(CONFIG_ARM64_ERRATUM_2454944) && defined(CONFIG_IOMMU_DMA) + if (noalias && dev->dma_ops != &arm64_noalias_ops) + arm64_init_iommu_ops(dev); +#endif #ifdef CONFIG_XEN if (xen_initial_domain()) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 0cbb63cf955f..275f190f0c3d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -581,7 +581,8 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit) || + cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cce252206962..57383bcbb6cb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -502,7 +502,7 @@ static void __init map_mem(pgd_t *pgdp) u64 i; if (rodata_full || debug_pagealloc_enabled() || - IS_ENABLED(CONFIG_KFENCE)) + IS_ENABLED(CONFIG_KFENCE) || cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /*