powerpc/vfio: Implement IOMMU driver for VFIO
VFIO implements platform independent stuff such as a PCI driver, BAR access (via read/write on a file descriptor or direct mapping when possible) and IRQ signaling. The platform dependent part includes IOMMU initialization and handling. This implements an IOMMU driver for VFIO which does mapping/unmapping pages for the guest IO and provides information about DMA window (required by a POWER guest). Cc: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
		
					parent
					
						
							
								4e13c1ac6b
							
						
					
				
			
			
				commit
				
					
						5ffd229c02
					
				
			
		
					 6 changed files with 482 additions and 0 deletions
				
			
		|  | @ -283,6 +283,69 @@ a direct pass through for VFIO_DEVICE_* ioctls.  The read/write/mmap | ||||||
| interfaces implement the device region access defined by the device's | interfaces implement the device region access defined by the device's | ||||||
| own VFIO_DEVICE_GET_REGION_INFO ioctl. | own VFIO_DEVICE_GET_REGION_INFO ioctl. | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | PPC64 sPAPR implementation note | ||||||
|  | ------------------------------------------------------------------------------- | ||||||
|  | 
 | ||||||
|  | This implementation has some specifics: | ||||||
|  | 
 | ||||||
|  | 1) Only one IOMMU group per container is supported as an IOMMU group | ||||||
|  | represents the minimal entity which isolation can be guaranteed for and | ||||||
|  | groups are allocated statically, one per a Partitionable Endpoint (PE) | ||||||
|  | (PE is often a PCI domain but not always). | ||||||
|  | 
 | ||||||
|  | 2) The hardware supports so called DMA windows - the PCI address range | ||||||
|  | within which DMA transfer is allowed, any attempt to access address space | ||||||
|  | out of the window leads to the whole PE isolation. | ||||||
|  | 
 | ||||||
|  | 3) PPC64 guests are paravirtualized but not fully emulated. There is an API | ||||||
|  | to map/unmap pages for DMA, and it normally maps 1..32 pages per call and | ||||||
|  | currently there is no way to reduce the number of calls. In order to make things | ||||||
|  | faster, the map/unmap handling has been implemented in real mode which provides | ||||||
|  | an excellent performance which has limitations such as inability to do | ||||||
|  | locked pages accounting in real time. | ||||||
|  | 
 | ||||||
|  | So 3 additional ioctls have been added: | ||||||
|  | 
 | ||||||
|  | 	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start | ||||||
|  | 		of the DMA window on the PCI bus. | ||||||
|  | 
 | ||||||
|  | 	VFIO_IOMMU_ENABLE - enables the container. The locked pages accounting | ||||||
|  | 		is done at this point. This lets user first to know what | ||||||
|  | 		the DMA window is and adjust rlimit before doing any real job. | ||||||
|  | 
 | ||||||
|  | 	VFIO_IOMMU_DISABLE - disables the container. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | The code flow from the example above should be slightly changed: | ||||||
|  | 
 | ||||||
|  | 	..... | ||||||
|  | 	/* Add the group to the container */ | ||||||
|  | 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); | ||||||
|  | 
 | ||||||
|  | 	/* Enable the IOMMU model we want */ | ||||||
|  | 	ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU) | ||||||
|  | 
 | ||||||
|  | 	/* Get addition sPAPR IOMMU info */ | ||||||
|  | 	vfio_iommu_spapr_tce_info spapr_iommu_info; | ||||||
|  | 	ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info); | ||||||
|  | 
 | ||||||
|  | 	if (ioctl(container, VFIO_IOMMU_ENABLE)) | ||||||
|  | 		/* Cannot enable container, may be low rlimit */ | ||||||
|  | 
 | ||||||
|  | 	/* Allocate some space and setup a DMA mapping */ | ||||||
|  | 	dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, | ||||||
|  | 			     MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); | ||||||
|  | 
 | ||||||
|  | 	dma_map.size = 1024 * 1024; | ||||||
|  | 	dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ | ||||||
|  | 	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; | ||||||
|  | 
 | ||||||
|  | 	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */ | ||||||
|  | 
 | ||||||
|  | 	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); | ||||||
|  | 	..... | ||||||
|  | 
 | ||||||
| ------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | ||||||
| 
 | 
 | ||||||
| [1] VFIO was originally an acronym for "Virtual Function I/O" in its | [1] VFIO was originally an acronym for "Virtual Function I/O" in its | ||||||
|  |  | ||||||
|  | @ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1 | ||||||
| 	depends on VFIO | 	depends on VFIO | ||||||
| 	default n | 	default n | ||||||
| 
 | 
 | ||||||
|  | config VFIO_IOMMU_SPAPR_TCE | ||||||
|  | 	tristate | ||||||
|  | 	depends on VFIO && SPAPR_TCE_IOMMU | ||||||
|  | 	default n | ||||||
|  | 
 | ||||||
| menuconfig VFIO | menuconfig VFIO | ||||||
| 	tristate "VFIO Non-Privileged userspace driver framework" | 	tristate "VFIO Non-Privileged userspace driver framework" | ||||||
| 	depends on IOMMU_API | 	depends on IOMMU_API | ||||||
| 	select VFIO_IOMMU_TYPE1 if X86 | 	select VFIO_IOMMU_TYPE1 if X86 | ||||||
|  | 	select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV | ||||||
| 	help | 	help | ||||||
| 	  VFIO provides a framework for secure userspace device drivers. | 	  VFIO provides a framework for secure userspace device drivers. | ||||||
| 	  See Documentation/vfio.txt for more details. | 	  See Documentation/vfio.txt for more details. | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
| obj-$(CONFIG_VFIO) += vfio.o | obj-$(CONFIG_VFIO) += vfio.o | ||||||
| obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o | obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o | ||||||
|  | obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o | ||||||
| obj-$(CONFIG_VFIO_PCI) += pci/ | obj-$(CONFIG_VFIO_PCI) += pci/ | ||||||
|  |  | ||||||
|  | @ -1415,6 +1415,7 @@ static int __init vfio_init(void) | ||||||
| 	 * drivers. | 	 * drivers. | ||||||
| 	 */ | 	 */ | ||||||
| 	request_module_nowait("vfio_iommu_type1"); | 	request_module_nowait("vfio_iommu_type1"); | ||||||
|  | 	request_module_nowait("vfio_iommu_spapr_tce"); | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										377
									
								
								drivers/vfio/vfio_iommu_spapr_tce.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										377
									
								
								drivers/vfio/vfio_iommu_spapr_tce.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,377 @@ | ||||||
|  | /*
 | ||||||
|  |  * VFIO: IOMMU DMA mapping support for TCE on POWER | ||||||
|  |  * | ||||||
|  |  * Copyright (C) 2013 IBM Corp.  All rights reserved. | ||||||
|  |  *     Author: Alexey Kardashevskiy <aik@ozlabs.ru> | ||||||
|  |  * | ||||||
|  |  * This program is free software; you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License version 2 as | ||||||
|  |  * published by the Free Software Foundation. | ||||||
|  |  * | ||||||
|  |  * Derived from original vfio_iommu_type1.c: | ||||||
|  |  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved. | ||||||
|  |  *     Author: Alex Williamson <alex.williamson@redhat.com> | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <linux/module.h> | ||||||
|  | #include <linux/pci.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | #include <linux/uaccess.h> | ||||||
|  | #include <linux/err.h> | ||||||
|  | #include <linux/vfio.h> | ||||||
|  | #include <asm/iommu.h> | ||||||
|  | #include <asm/tce.h> | ||||||
|  | 
 | ||||||
|  | #define DRIVER_VERSION  "0.1" | ||||||
|  | #define DRIVER_AUTHOR   "aik@ozlabs.ru" | ||||||
|  | #define DRIVER_DESC     "VFIO IOMMU SPAPR TCE" | ||||||
|  | 
 | ||||||
|  | static void tce_iommu_detach_group(void *iommu_data, | ||||||
|  | 		struct iommu_group *iommu_group); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation | ||||||
|  |  * | ||||||
|  |  * This code handles mapping and unmapping of user data buffers | ||||||
|  |  * into DMA'ble space using the IOMMU | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * The container descriptor supports only a single group per container. | ||||||
|  |  * Required by the API as the container is not supplied with the IOMMU group | ||||||
|  |  * at the moment of initialization. | ||||||
|  |  */ | ||||||
|  | struct tce_container { | ||||||
|  | 	struct mutex lock; | ||||||
|  | 	struct iommu_table *tbl; | ||||||
|  | 	bool enabled; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int tce_iommu_enable(struct tce_container *container) | ||||||
|  | { | ||||||
|  | 	int ret = 0; | ||||||
|  | 	unsigned long locked, lock_limit, npages; | ||||||
|  | 	struct iommu_table *tbl = container->tbl; | ||||||
|  | 
 | ||||||
|  | 	if (!container->tbl) | ||||||
|  | 		return -ENXIO; | ||||||
|  | 
 | ||||||
|  | 	if (!current->mm) | ||||||
|  | 		return -ESRCH; /* process exited */ | ||||||
|  | 
 | ||||||
|  | 	if (container->enabled) | ||||||
|  | 		return -EBUSY; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * When userspace pages are mapped into the IOMMU, they are effectively | ||||||
|  | 	 * locked memory, so, theoretically, we need to update the accounting | ||||||
|  | 	 * of locked pages on each map and unmap.  For powerpc, the map unmap | ||||||
|  | 	 * paths can be very hot, though, and the accounting would kill | ||||||
|  | 	 * performance, especially since it would be difficult to impossible | ||||||
|  | 	 * to handle the accounting in real mode only. | ||||||
|  | 	 * | ||||||
|  | 	 * To address that, rather than precisely accounting every page, we | ||||||
|  | 	 * instead account for a worst case on locked memory when the iommu is | ||||||
|  | 	 * enabled and disabled.  The worst case upper bound on locked memory | ||||||
|  | 	 * is the size of the whole iommu window, which is usually relatively | ||||||
|  | 	 * small (compared to total memory sizes) on POWER hardware. | ||||||
|  | 	 * | ||||||
|  | 	 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, | ||||||
|  | 	 * that would effectively kill the guest at random points, much better | ||||||
|  | 	 * enforcing the limit based on the max that the guest can map. | ||||||
|  | 	 */ | ||||||
|  | 	down_write(¤t->mm->mmap_sem); | ||||||
|  | 	npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; | ||||||
|  | 	locked = current->mm->locked_vm + npages; | ||||||
|  | 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | ||||||
|  | 	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { | ||||||
|  | 		pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", | ||||||
|  | 				rlimit(RLIMIT_MEMLOCK)); | ||||||
|  | 		ret = -ENOMEM; | ||||||
|  | 	} else { | ||||||
|  | 
 | ||||||
|  | 		current->mm->locked_vm += npages; | ||||||
|  | 		container->enabled = true; | ||||||
|  | 	} | ||||||
|  | 	up_write(¤t->mm->mmap_sem); | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void tce_iommu_disable(struct tce_container *container) | ||||||
|  | { | ||||||
|  | 	if (!container->enabled) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	container->enabled = false; | ||||||
|  | 
 | ||||||
|  | 	if (!container->tbl || !current->mm) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	down_write(¤t->mm->mmap_sem); | ||||||
|  | 	current->mm->locked_vm -= (container->tbl->it_size << | ||||||
|  | 			IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; | ||||||
|  | 	up_write(¤t->mm->mmap_sem); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void *tce_iommu_open(unsigned long arg) | ||||||
|  | { | ||||||
|  | 	struct tce_container *container; | ||||||
|  | 
 | ||||||
|  | 	if (arg != VFIO_SPAPR_TCE_IOMMU) { | ||||||
|  | 		pr_err("tce_vfio: Wrong IOMMU type\n"); | ||||||
|  | 		return ERR_PTR(-EINVAL); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	container = kzalloc(sizeof(*container), GFP_KERNEL); | ||||||
|  | 	if (!container) | ||||||
|  | 		return ERR_PTR(-ENOMEM); | ||||||
|  | 
 | ||||||
|  | 	mutex_init(&container->lock); | ||||||
|  | 
 | ||||||
|  | 	return container; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void tce_iommu_release(void *iommu_data) | ||||||
|  | { | ||||||
|  | 	struct tce_container *container = iommu_data; | ||||||
|  | 
 | ||||||
|  | 	WARN_ON(container->tbl && !container->tbl->it_group); | ||||||
|  | 	tce_iommu_disable(container); | ||||||
|  | 
 | ||||||
|  | 	if (container->tbl && container->tbl->it_group) | ||||||
|  | 		tce_iommu_detach_group(iommu_data, container->tbl->it_group); | ||||||
|  | 
 | ||||||
|  | 	mutex_destroy(&container->lock); | ||||||
|  | 
 | ||||||
|  | 	kfree(container); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static long tce_iommu_ioctl(void *iommu_data, | ||||||
|  | 				 unsigned int cmd, unsigned long arg) | ||||||
|  | { | ||||||
|  | 	struct tce_container *container = iommu_data; | ||||||
|  | 	unsigned long minsz; | ||||||
|  | 	long ret; | ||||||
|  | 
 | ||||||
|  | 	switch (cmd) { | ||||||
|  | 	case VFIO_CHECK_EXTENSION: | ||||||
|  | 		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; | ||||||
|  | 
 | ||||||
|  | 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { | ||||||
|  | 		struct vfio_iommu_spapr_tce_info info; | ||||||
|  | 		struct iommu_table *tbl = container->tbl; | ||||||
|  | 
 | ||||||
|  | 		if (WARN_ON(!tbl)) | ||||||
|  | 			return -ENXIO; | ||||||
|  | 
 | ||||||
|  | 		minsz = offsetofend(struct vfio_iommu_spapr_tce_info, | ||||||
|  | 				dma32_window_size); | ||||||
|  | 
 | ||||||
|  | 		if (copy_from_user(&info, (void __user *)arg, minsz)) | ||||||
|  | 			return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 		if (info.argsz < minsz) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; | ||||||
|  | 		info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; | ||||||
|  | 		info.flags = 0; | ||||||
|  | 
 | ||||||
|  | 		if (copy_to_user((void __user *)arg, &info, minsz)) | ||||||
|  | 			return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 	case VFIO_IOMMU_MAP_DMA: { | ||||||
|  | 		struct vfio_iommu_type1_dma_map param; | ||||||
|  | 		struct iommu_table *tbl = container->tbl; | ||||||
|  | 		unsigned long tce, i; | ||||||
|  | 
 | ||||||
|  | 		if (!tbl) | ||||||
|  | 			return -ENXIO; | ||||||
|  | 
 | ||||||
|  | 		BUG_ON(!tbl->it_group); | ||||||
|  | 
 | ||||||
|  | 		minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); | ||||||
|  | 
 | ||||||
|  | 		if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||||||
|  | 			return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 		if (param.argsz < minsz) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | | ||||||
|  | 				VFIO_DMA_MAP_FLAG_WRITE)) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		if ((param.size & ~IOMMU_PAGE_MASK) || | ||||||
|  | 				(param.vaddr & ~IOMMU_PAGE_MASK)) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		/* iova is checked by the IOMMU API */ | ||||||
|  | 		tce = param.vaddr; | ||||||
|  | 		if (param.flags & VFIO_DMA_MAP_FLAG_READ) | ||||||
|  | 			tce |= TCE_PCI_READ; | ||||||
|  | 		if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) | ||||||
|  | 			tce |= TCE_PCI_WRITE; | ||||||
|  | 
 | ||||||
|  | 		ret = iommu_tce_put_param_check(tbl, param.iova, tce); | ||||||
|  | 		if (ret) | ||||||
|  | 			return ret; | ||||||
|  | 
 | ||||||
|  | 		for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { | ||||||
|  | 			ret = iommu_put_tce_user_mode(tbl, | ||||||
|  | 					(param.iova >> IOMMU_PAGE_SHIFT) + i, | ||||||
|  | 					tce); | ||||||
|  | 			if (ret) | ||||||
|  | 				break; | ||||||
|  | 			tce += IOMMU_PAGE_SIZE; | ||||||
|  | 		} | ||||||
|  | 		if (ret) | ||||||
|  | 			iommu_clear_tces_and_put_pages(tbl, | ||||||
|  | 					param.iova >> IOMMU_PAGE_SHIFT,	i); | ||||||
|  | 
 | ||||||
|  | 		iommu_flush_tce(tbl); | ||||||
|  | 
 | ||||||
|  | 		return ret; | ||||||
|  | 	} | ||||||
|  | 	case VFIO_IOMMU_UNMAP_DMA: { | ||||||
|  | 		struct vfio_iommu_type1_dma_unmap param; | ||||||
|  | 		struct iommu_table *tbl = container->tbl; | ||||||
|  | 
 | ||||||
|  | 		if (WARN_ON(!tbl)) | ||||||
|  | 			return -ENXIO; | ||||||
|  | 
 | ||||||
|  | 		minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, | ||||||
|  | 				size); | ||||||
|  | 
 | ||||||
|  | 		if (copy_from_user(¶m, (void __user *)arg, minsz)) | ||||||
|  | 			return -EFAULT; | ||||||
|  | 
 | ||||||
|  | 		if (param.argsz < minsz) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		/* No flag is supported now */ | ||||||
|  | 		if (param.flags) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		if (param.size & ~IOMMU_PAGE_MASK) | ||||||
|  | 			return -EINVAL; | ||||||
|  | 
 | ||||||
|  | 		ret = iommu_tce_clear_param_check(tbl, param.iova, 0, | ||||||
|  | 				param.size >> IOMMU_PAGE_SHIFT); | ||||||
|  | 		if (ret) | ||||||
|  | 			return ret; | ||||||
|  | 
 | ||||||
|  | 		ret = iommu_clear_tces_and_put_pages(tbl, | ||||||
|  | 				param.iova >> IOMMU_PAGE_SHIFT, | ||||||
|  | 				param.size >> IOMMU_PAGE_SHIFT); | ||||||
|  | 		iommu_flush_tce(tbl); | ||||||
|  | 
 | ||||||
|  | 		return ret; | ||||||
|  | 	} | ||||||
|  | 	case VFIO_IOMMU_ENABLE: | ||||||
|  | 		mutex_lock(&container->lock); | ||||||
|  | 		ret = tce_iommu_enable(container); | ||||||
|  | 		mutex_unlock(&container->lock); | ||||||
|  | 		return ret; | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 	case VFIO_IOMMU_DISABLE: | ||||||
|  | 		mutex_lock(&container->lock); | ||||||
|  | 		tce_iommu_disable(container); | ||||||
|  | 		mutex_unlock(&container->lock); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return -ENOTTY; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int tce_iommu_attach_group(void *iommu_data, | ||||||
|  | 		struct iommu_group *iommu_group) | ||||||
|  | { | ||||||
|  | 	int ret; | ||||||
|  | 	struct tce_container *container = iommu_data; | ||||||
|  | 	struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | ||||||
|  | 
 | ||||||
|  | 	BUG_ON(!tbl); | ||||||
|  | 	mutex_lock(&container->lock); | ||||||
|  | 
 | ||||||
|  | 	/* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
 | ||||||
|  | 			iommu_group_id(iommu_group), iommu_group); */ | ||||||
|  | 	if (container->tbl) { | ||||||
|  | 		pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", | ||||||
|  | 				iommu_group_id(container->tbl->it_group), | ||||||
|  | 				iommu_group_id(iommu_group)); | ||||||
|  | 		ret = -EBUSY; | ||||||
|  | 	} else if (container->enabled) { | ||||||
|  | 		pr_err("tce_vfio: attaching group #%u to enabled container\n", | ||||||
|  | 				iommu_group_id(iommu_group)); | ||||||
|  | 		ret = -EBUSY; | ||||||
|  | 	} else { | ||||||
|  | 		ret = iommu_take_ownership(tbl); | ||||||
|  | 		if (!ret) | ||||||
|  | 			container->tbl = tbl; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	mutex_unlock(&container->lock); | ||||||
|  | 
 | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void tce_iommu_detach_group(void *iommu_data, | ||||||
|  | 		struct iommu_group *iommu_group) | ||||||
|  | { | ||||||
|  | 	struct tce_container *container = iommu_data; | ||||||
|  | 	struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); | ||||||
|  | 
 | ||||||
|  | 	BUG_ON(!tbl); | ||||||
|  | 	mutex_lock(&container->lock); | ||||||
|  | 	if (tbl != container->tbl) { | ||||||
|  | 		pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", | ||||||
|  | 				iommu_group_id(iommu_group), | ||||||
|  | 				iommu_group_id(tbl->it_group)); | ||||||
|  | 	} else { | ||||||
|  | 		if (container->enabled) { | ||||||
|  | 			pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", | ||||||
|  | 					iommu_group_id(tbl->it_group)); | ||||||
|  | 			tce_iommu_disable(container); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		/* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
 | ||||||
|  | 				iommu_group_id(iommu_group), iommu_group); */ | ||||||
|  | 		container->tbl = NULL; | ||||||
|  | 		iommu_release_ownership(tbl); | ||||||
|  | 	} | ||||||
|  | 	mutex_unlock(&container->lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { | ||||||
|  | 	.name		= "iommu-vfio-powerpc", | ||||||
|  | 	.owner		= THIS_MODULE, | ||||||
|  | 	.open		= tce_iommu_open, | ||||||
|  | 	.release	= tce_iommu_release, | ||||||
|  | 	.ioctl		= tce_iommu_ioctl, | ||||||
|  | 	.attach_group	= tce_iommu_attach_group, | ||||||
|  | 	.detach_group	= tce_iommu_detach_group, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static int __init tce_iommu_init(void) | ||||||
|  | { | ||||||
|  | 	return vfio_register_iommu_driver(&tce_iommu_driver_ops); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void __exit tce_iommu_cleanup(void) | ||||||
|  | { | ||||||
|  | 	vfio_unregister_iommu_driver(&tce_iommu_driver_ops); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | module_init(tce_iommu_init); | ||||||
|  | module_exit(tce_iommu_cleanup); | ||||||
|  | 
 | ||||||
|  | MODULE_VERSION(DRIVER_VERSION); | ||||||
|  | MODULE_LICENSE("GPL v2"); | ||||||
|  | MODULE_AUTHOR(DRIVER_AUTHOR); | ||||||
|  | MODULE_DESCRIPTION(DRIVER_DESC); | ||||||
|  | 
 | ||||||
|  | @ -22,6 +22,7 @@ | ||||||
| /* Extensions */ | /* Extensions */ | ||||||
| 
 | 
 | ||||||
| #define VFIO_TYPE1_IOMMU		1 | #define VFIO_TYPE1_IOMMU		1 | ||||||
|  | #define VFIO_SPAPR_TCE_IOMMU		2 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * The IOCTL interface is designed for extensibility by embedding the |  * The IOCTL interface is designed for extensibility by embedding the | ||||||
|  | @ -375,4 +376,37 @@ struct vfio_iommu_type1_dma_unmap { | ||||||
| 
 | 
 | ||||||
| #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) | #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * IOCTLs to enable/disable IOMMU container usage. | ||||||
|  |  * No parameters are supported. | ||||||
|  |  */ | ||||||
|  | #define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15) | ||||||
|  | #define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16) | ||||||
|  | 
 | ||||||
|  | /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * The SPAPR TCE info struct provides the information about the PCI bus | ||||||
|  |  * address ranges available for DMA, these values are programmed into | ||||||
|  |  * the hardware so the guest has to know that information. | ||||||
|  |  * | ||||||
|  |  * The DMA 32 bit window start is an absolute PCI bus address. | ||||||
|  |  * The IOVA address passed via map/unmap ioctls are absolute PCI bus | ||||||
|  |  * addresses too so the window works as a filter rather than an offset | ||||||
|  |  * for IOVA addresses. | ||||||
|  |  * | ||||||
|  |  * A flag will need to be added if other page sizes are supported, | ||||||
|  |  * so as defined here, it is always 4k. | ||||||
|  |  */ | ||||||
|  | struct vfio_iommu_spapr_tce_info { | ||||||
|  | 	__u32 argsz; | ||||||
|  | 	__u32 flags;			/* reserved for future use */ | ||||||
|  | 	__u32 dma32_window_start;	/* 32 bit window start (bytes) */ | ||||||
|  | 	__u32 dma32_window_size;	/* 32 bit window size (bytes) */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12) | ||||||
|  | 
 | ||||||
|  | /* ***************************************************************** */ | ||||||
|  | 
 | ||||||
| #endif /* _UAPIVFIO_H */ | #endif /* _UAPIVFIO_H */ | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Alexey Kardashevskiy
				Alexey Kardashevskiy