arm64: Implement coherent DMA API based on swiotlb
This patch adds support for DMA API cache maintenance on SoCs without hardware device cache coherency. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
This commit is contained in:
		
					parent
					
						
							
								3690951fc6
							
						
					
				
			
			
				commit
				
					
						7363590d2c
					
				
			
		
					 4 changed files with 253 additions and 1 deletions
				
			
		|  | @ -84,6 +84,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma, | |||
| { | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Cache maintenance functions used by the DMA API. No to be used directly. | ||||
|  */ | ||||
| extern void __dma_map_area(const void *, size_t, int); | ||||
| extern void __dma_unmap_area(const void *, size_t, int); | ||||
| extern void __dma_flush_range(const void *, const void *); | ||||
| 
 | ||||
| /*
 | ||||
|  * Copy user data from/to a page which is mapped into a different | ||||
|  * processes address space.  Really, we want to allow our "user | ||||
|  |  | |||
|  | @ -30,6 +30,8 @@ | |||
| 
 | ||||
| #define DMA_ERROR_CODE	(~(dma_addr_t)0) | ||||
| extern struct dma_map_ops *dma_ops; | ||||
| extern struct dma_map_ops coherent_swiotlb_dma_ops; | ||||
| extern struct dma_map_ops noncoherent_swiotlb_dma_ops; | ||||
| 
 | ||||
| static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) | ||||
| { | ||||
|  | @ -47,6 +49,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
| 		return __generic_dma_ops(dev); | ||||
| } | ||||
| 
 | ||||
| static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) | ||||
| { | ||||
| 	dev->archdata.dma_ops = ops; | ||||
| } | ||||
| 
 | ||||
| #include <asm-generic/dma-mapping-common.h> | ||||
| 
 | ||||
| static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) | ||||
|  |  | |||
|  | @ -166,3 +166,81 @@ ENTRY(__flush_dcache_area) | |||
| 	dsb	sy | ||||
| 	ret | ||||
| ENDPROC(__flush_dcache_area) | ||||
| 
 | ||||
| /* | ||||
|  *	__dma_inv_range(start, end) | ||||
|  *	- start   - virtual start address of region | ||||
|  *	- end     - virtual end address of region | ||||
|  */ | ||||
| __dma_inv_range: | ||||
| 	dcache_line_size x2, x3 | ||||
| 	sub	x3, x2, #1 | ||||
| 	bic	x0, x0, x3 | ||||
| 	bic	x1, x1, x3 | ||||
| 1:	dc	ivac, x0			// invalidate D / U line | ||||
| 	add	x0, x0, x2 | ||||
| 	cmp	x0, x1 | ||||
| 	b.lo	1b | ||||
| 	dsb	sy | ||||
| 	ret | ||||
| ENDPROC(__dma_inv_range) | ||||
| 
 | ||||
| /* | ||||
|  *	__dma_clean_range(start, end) | ||||
|  *	- start   - virtual start address of region | ||||
|  *	- end     - virtual end address of region | ||||
|  */ | ||||
| __dma_clean_range: | ||||
| 	dcache_line_size x2, x3 | ||||
| 	sub	x3, x2, #1 | ||||
| 	bic	x0, x0, x3 | ||||
| 1:	dc	cvac, x0			// clean D / U line | ||||
| 	add	x0, x0, x2 | ||||
| 	cmp	x0, x1 | ||||
| 	b.lo	1b | ||||
| 	dsb	sy | ||||
| 	ret | ||||
| ENDPROC(__dma_clean_range) | ||||
| 
 | ||||
| /* | ||||
|  *	__dma_flush_range(start, end) | ||||
|  *	- start   - virtual start address of region | ||||
|  *	- end     - virtual end address of region | ||||
|  */ | ||||
| ENTRY(__dma_flush_range) | ||||
| 	dcache_line_size x2, x3 | ||||
| 	sub	x3, x2, #1 | ||||
| 	bic	x0, x0, x3 | ||||
| 1:	dc	civac, x0			// clean & invalidate D / U line | ||||
| 	add	x0, x0, x2 | ||||
| 	cmp	x0, x1 | ||||
| 	b.lo	1b | ||||
| 	dsb	sy | ||||
| 	ret | ||||
| ENDPROC(__dma_flush_range) | ||||
| 
 | ||||
| /* | ||||
|  *	__dma_map_area(start, size, dir) | ||||
|  *	- start	- kernel virtual start address | ||||
|  *	- size	- size of region | ||||
|  *	- dir	- DMA direction | ||||
|  */ | ||||
| ENTRY(__dma_map_area) | ||||
| 	add	x1, x1, x0 | ||||
| 	cmp	w2, #DMA_FROM_DEVICE | ||||
| 	b.eq	__dma_inv_range | ||||
| 	b	__dma_clean_range | ||||
| ENDPROC(__dma_map_area) | ||||
| 
 | ||||
| /* | ||||
|  *	__dma_unmap_area(start, size, dir) | ||||
|  *	- start	- kernel virtual start address | ||||
|  *	- size	- size of region | ||||
|  *	- dir	- DMA direction | ||||
|  */ | ||||
| ENTRY(__dma_unmap_area) | ||||
| 	add	x1, x1, x0 | ||||
| 	cmp	w2, #DMA_TO_DEVICE | ||||
| 	b.ne	__dma_inv_range | ||||
| 	ret | ||||
| ENDPROC(__dma_unmap_area) | ||||
|  |  | |||
|  | @ -78,7 +78,166 @@ static void __dma_free_coherent(struct device *dev, size_t size, | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| static struct dma_map_ops coherent_swiotlb_dma_ops = { | ||||
| static void *__dma_alloc_noncoherent(struct device *dev, size_t size, | ||||
| 				     dma_addr_t *dma_handle, gfp_t flags, | ||||
| 				     struct dma_attrs *attrs) | ||||
| { | ||||
| 	struct page *page, **map; | ||||
| 	void *ptr, *coherent_ptr; | ||||
| 	int order, i; | ||||
| 
 | ||||
| 	size = PAGE_ALIGN(size); | ||||
| 	order = get_order(size); | ||||
| 
 | ||||
| 	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); | ||||
| 	if (!ptr) | ||||
| 		goto no_mem; | ||||
| 	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA); | ||||
| 	if (!map) | ||||
| 		goto no_map; | ||||
| 
 | ||||
| 	/* remove any dirty cache lines on the kernel alias */ | ||||
| 	__dma_flush_range(ptr, ptr + size); | ||||
| 
 | ||||
| 	/* create a coherent mapping */ | ||||
| 	page = virt_to_page(ptr); | ||||
| 	for (i = 0; i < (size >> PAGE_SHIFT); i++) | ||||
| 		map[i] = page + i; | ||||
| 	coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, | ||||
| 			    pgprot_dmacoherent(pgprot_default)); | ||||
| 	kfree(map); | ||||
| 	if (!coherent_ptr) | ||||
| 		goto no_map; | ||||
| 
 | ||||
| 	return coherent_ptr; | ||||
| 
 | ||||
| no_map: | ||||
| 	__dma_free_coherent(dev, size, ptr, *dma_handle, attrs); | ||||
| no_mem: | ||||
| 	*dma_handle = ~0; | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static void __dma_free_noncoherent(struct device *dev, size_t size, | ||||
| 				   void *vaddr, dma_addr_t dma_handle, | ||||
| 				   struct dma_attrs *attrs) | ||||
| { | ||||
| 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); | ||||
| 
 | ||||
| 	vunmap(vaddr); | ||||
| 	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); | ||||
| } | ||||
| 
 | ||||
| static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, | ||||
| 				     unsigned long offset, size_t size, | ||||
| 				     enum dma_data_direction dir, | ||||
| 				     struct dma_attrs *attrs) | ||||
| { | ||||
| 	dma_addr_t dev_addr; | ||||
| 
 | ||||
| 	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); | ||||
| 	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | ||||
| 
 | ||||
| 	return dev_addr; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, | ||||
| 				 size_t size, enum dma_data_direction dir, | ||||
| 				 struct dma_attrs *attrs) | ||||
| { | ||||
| 	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | ||||
| 	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); | ||||
| } | ||||
| 
 | ||||
| static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | ||||
| 				  int nelems, enum dma_data_direction dir, | ||||
| 				  struct dma_attrs *attrs) | ||||
| { | ||||
| 	struct scatterlist *sg; | ||||
| 	int i, ret; | ||||
| 
 | ||||
| 	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); | ||||
| 	for_each_sg(sgl, sg, ret, i) | ||||
| 		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | ||||
| 			       sg->length, dir); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static void __swiotlb_unmap_sg_attrs(struct device *dev, | ||||
| 				     struct scatterlist *sgl, int nelems, | ||||
| 				     enum dma_data_direction dir, | ||||
| 				     struct dma_attrs *attrs) | ||||
| { | ||||
| 	struct scatterlist *sg; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for_each_sg(sgl, sg, nelems, i) | ||||
| 		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | ||||
| 				 sg->length, dir); | ||||
| 	swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); | ||||
| } | ||||
| 
 | ||||
| static void __swiotlb_sync_single_for_cpu(struct device *dev, | ||||
| 					  dma_addr_t dev_addr, size_t size, | ||||
| 					  enum dma_data_direction dir) | ||||
| { | ||||
| 	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | ||||
| 	swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); | ||||
| } | ||||
| 
 | ||||
| static void __swiotlb_sync_single_for_device(struct device *dev, | ||||
| 					     dma_addr_t dev_addr, size_t size, | ||||
| 					     enum dma_data_direction dir) | ||||
| { | ||||
| 	swiotlb_sync_single_for_device(dev, dev_addr, size, dir); | ||||
| 	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | ||||
| } | ||||
| 
 | ||||
| static void __swiotlb_sync_sg_for_cpu(struct device *dev, | ||||
| 				      struct scatterlist *sgl, int nelems, | ||||
| 				      enum dma_data_direction dir) | ||||
| { | ||||
| 	struct scatterlist *sg; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for_each_sg(sgl, sg, nelems, i) | ||||
| 		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | ||||
| 				 sg->length, dir); | ||||
| 	swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); | ||||
| } | ||||
| 
 | ||||
| static void __swiotlb_sync_sg_for_device(struct device *dev, | ||||
| 					 struct scatterlist *sgl, int nelems, | ||||
| 					 enum dma_data_direction dir) | ||||
| { | ||||
| 	struct scatterlist *sg; | ||||
| 	int i; | ||||
| 
 | ||||
| 	swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); | ||||
| 	for_each_sg(sgl, sg, nelems, i) | ||||
| 		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | ||||
| 			       sg->length, dir); | ||||
| } | ||||
| 
 | ||||
| struct dma_map_ops noncoherent_swiotlb_dma_ops = { | ||||
| 	.alloc = __dma_alloc_noncoherent, | ||||
| 	.free = __dma_free_noncoherent, | ||||
| 	.map_page = __swiotlb_map_page, | ||||
| 	.unmap_page = __swiotlb_unmap_page, | ||||
| 	.map_sg = __swiotlb_map_sg_attrs, | ||||
| 	.unmap_sg = __swiotlb_unmap_sg_attrs, | ||||
| 	.sync_single_for_cpu = __swiotlb_sync_single_for_cpu, | ||||
| 	.sync_single_for_device = __swiotlb_sync_single_for_device, | ||||
| 	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, | ||||
| 	.sync_sg_for_device = __swiotlb_sync_sg_for_device, | ||||
| 	.dma_supported = swiotlb_dma_supported, | ||||
| 	.mapping_error = swiotlb_dma_mapping_error, | ||||
| }; | ||||
| EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); | ||||
| 
 | ||||
| struct dma_map_ops coherent_swiotlb_dma_ops = { | ||||
| 	.alloc = __dma_alloc_coherent, | ||||
| 	.free = __dma_free_coherent, | ||||
| 	.map_page = swiotlb_map_page, | ||||
|  | @ -92,6 +251,7 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = { | |||
| 	.dma_supported = swiotlb_dma_supported, | ||||
| 	.mapping_error = swiotlb_dma_mapping_error, | ||||
| }; | ||||
| EXPORT_SYMBOL(coherent_swiotlb_dma_ops); | ||||
| 
 | ||||
| extern int swiotlb_late_init_with_default_size(size_t default_size); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Catalin Marinas
				Catalin Marinas