Merge branch 'master' into next
This commit is contained in:
		
				commit
				
					
						d25d6fa1a9
					
				
			
		
					 4041 changed files with 230073 additions and 108041 deletions
				
			
		
							
								
								
									
										19
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							| 
						 | 
					@ -34,13 +34,18 @@ modules.builtin
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# Top-level generic files
 | 
					# Top-level generic files
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
tags
 | 
					/tags
 | 
				
			||||||
TAGS
 | 
					/TAGS
 | 
				
			||||||
vmlinux
 | 
					/linux
 | 
				
			||||||
vmlinuz
 | 
					/vmlinux
 | 
				
			||||||
System.map
 | 
					/vmlinuz
 | 
				
			||||||
Module.markers
 | 
					/System.map
 | 
				
			||||||
Module.symvers
 | 
					/Module.markers
 | 
				
			||||||
 | 
					/Module.symvers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# git files that we don't want to ignore even it they are dot-files
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
!.gitignore
 | 
					!.gitignore
 | 
				
			||||||
!.mailmap
 | 
					!.mailmap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										7
									
								
								Documentation/ABI/stable/sysfs-devices-node
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								Documentation/ABI/stable/sysfs-devices-node
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,7 @@
 | 
				
			||||||
 | 
					What:		/sys/devices/system/node/nodeX
 | 
				
			||||||
 | 
					Date:		October 2002
 | 
				
			||||||
 | 
					Contact:	Linux Memory Management list <linux-mm@kvack.org>
 | 
				
			||||||
 | 
					Description:
 | 
				
			||||||
 | 
							When CONFIG_NUMA is enabled, this is a directory containing
 | 
				
			||||||
 | 
							information on node X such as what CPUs are local to the
 | 
				
			||||||
 | 
							node.
 | 
				
			||||||
| 
						 | 
					@ -160,7 +160,7 @@ Description:
 | 
				
			||||||
		match the driver to the device.  For example:
 | 
							match the driver to the device.  For example:
 | 
				
			||||||
		# echo "046d c315" > /sys/bus/usb/drivers/foo/remove_id
 | 
							# echo "046d c315" > /sys/bus/usb/drivers/foo/remove_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
What:		/sys/bus/usb/device/.../avoid_reset
 | 
					What:		/sys/bus/usb/device/.../avoid_reset_quirk
 | 
				
			||||||
Date:		December 2009
 | 
					Date:		December 2009
 | 
				
			||||||
Contact:	Oliver Neukum <oliver@neukum.org>
 | 
					Contact:	Oliver Neukum <oliver@neukum.org>
 | 
				
			||||||
Description:
 | 
					Description:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,12 +1,12 @@
 | 
				
			||||||
			Dynamic DMA mapping
 | 
							     Dynamic DMA mapping Guide
 | 
				
			||||||
			===================
 | 
							     =========================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		 David S. Miller <davem@redhat.com>
 | 
							 David S. Miller <davem@redhat.com>
 | 
				
			||||||
		 Richard Henderson <rth@cygnus.com>
 | 
							 Richard Henderson <rth@cygnus.com>
 | 
				
			||||||
		  Jakub Jelinek <jakub@redhat.com>
 | 
							  Jakub Jelinek <jakub@redhat.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This document describes the DMA mapping system in terms of the pci_
 | 
					This is a guide to device driver writers on how to use the DMA API
 | 
				
			||||||
API.  For a similar API that works for generic devices, see
 | 
					with example pseudo-code.  For a concise description of the API, see
 | 
				
			||||||
DMA-API.txt.
 | 
					DMA-API.txt.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Most of the 64bit platforms have special hardware that translates bus
 | 
					Most of the 64bit platforms have special hardware that translates bus
 | 
				
			||||||
| 
						 | 
					@ -26,12 +26,15 @@ mapped only for the time they are actually used and unmapped after the DMA
 | 
				
			||||||
transfer.
 | 
					transfer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The following API will work of course even on platforms where no such
 | 
					The following API will work of course even on platforms where no such
 | 
				
			||||||
hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on
 | 
					hardware exists.
 | 
				
			||||||
top of the virt_to_bus interface.
 | 
					
 | 
				
			||||||
 | 
					Note that the DMA API works with any bus independent of the underlying
 | 
				
			||||||
 | 
					microprocessor architecture. You should use the DMA API rather than
 | 
				
			||||||
 | 
					the bus specific DMA API (e.g. pci_dma_*).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
First of all, you should make sure
 | 
					First of all, you should make sure
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/pci.h>
 | 
					#include <linux/dma-mapping.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
is in your driver. This file will obtain for you the definition of the
 | 
					is in your driver. This file will obtain for you the definition of the
 | 
				
			||||||
dma_addr_t (which can hold any valid DMA address for the platform)
 | 
					dma_addr_t (which can hold any valid DMA address for the platform)
 | 
				
			||||||
| 
						 | 
					@ -78,44 +81,43 @@ for you to DMA from/to.
 | 
				
			||||||
			DMA addressing limitations
 | 
								DMA addressing limitations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Does your device have any DMA addressing limitations?  For example, is
 | 
					Does your device have any DMA addressing limitations?  For example, is
 | 
				
			||||||
your device only capable of driving the low order 24-bits of address
 | 
					your device only capable of driving the low order 24-bits of address?
 | 
				
			||||||
on the PCI bus for SAC DMA transfers?  If so, you need to inform the
 | 
					If so, you need to inform the kernel of this fact.
 | 
				
			||||||
PCI layer of this fact.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
By default, the kernel assumes that your device can address the full
 | 
					By default, the kernel assumes that your device can address the full
 | 
				
			||||||
32-bits in a SAC cycle.  For a 64-bit DAC capable device, this needs
 | 
					32-bits.  For a 64-bit capable device, this needs to be increased.
 | 
				
			||||||
to be increased.  And for a device with limitations, as discussed in
 | 
					And for a device with limitations, as discussed in the previous
 | 
				
			||||||
the previous paragraph, it needs to be decreased.
 | 
					paragraph, it needs to be decreased.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pci_alloc_consistent() by default will return 32-bit DMA addresses.
 | 
					Special note about PCI: PCI-X specification requires PCI-X devices to
 | 
				
			||||||
PCI-X specification requires PCI-X devices to support 64-bit
 | 
					support 64-bit addressing (DAC) for all transactions.  And at least
 | 
				
			||||||
addressing (DAC) for all transactions. And at least one platform (SGI
 | 
					one platform (SGI SN2) requires 64-bit consistent allocations to
 | 
				
			||||||
SN2) requires 64-bit consistent allocations to operate correctly when
 | 
					operate correctly when the IO bus is in PCI-X mode.
 | 
				
			||||||
the IO bus is in PCI-X mode. Therefore, like with pci_set_dma_mask(),
 | 
					 | 
				
			||||||
it's good practice to call pci_set_consistent_dma_mask() to set the
 | 
					 | 
				
			||||||
appropriate mask even if your device only supports 32-bit DMA
 | 
					 | 
				
			||||||
(default) and especially if it's a PCI-X device.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
For correct operation, you must interrogate the PCI layer in your
 | 
					For correct operation, you must interrogate the kernel in your device
 | 
				
			||||||
device probe routine to see if the PCI controller on the machine can
 | 
					probe routine to see if the DMA controller on the machine can properly
 | 
				
			||||||
properly support the DMA addressing limitation your device has.  It is
 | 
					support the DMA addressing limitation your device has.  It is good
 | 
				
			||||||
good style to do this even if your device holds the default setting,
 | 
					style to do this even if your device holds the default setting,
 | 
				
			||||||
because this shows that you did think about these issues wrt. your
 | 
					because this shows that you did think about these issues wrt. your
 | 
				
			||||||
device.
 | 
					device.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The query is performed via a call to pci_set_dma_mask():
 | 
					The query is performed via a call to dma_set_mask():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int pci_set_dma_mask(struct pci_dev *pdev, u64 device_mask);
 | 
						int dma_set_mask(struct device *dev, u64 mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The query for consistent allocations is performed via a call to
 | 
					The query for consistent allocations is performed via a call to
 | 
				
			||||||
pci_set_consistent_dma_mask():
 | 
					dma_set_coherent_mask():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int pci_set_consistent_dma_mask(struct pci_dev *pdev, u64 device_mask);
 | 
						int dma_set_coherent_mask(struct device *dev, u64 mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here, pdev is a pointer to the PCI device struct of your device, and
 | 
					Here, dev is a pointer to the device struct of your device, and mask
 | 
				
			||||||
device_mask is a bit mask describing which bits of a PCI address your
 | 
					is a bit mask describing which bits of an address your device
 | 
				
			||||||
device supports.  It returns zero if your card can perform DMA
 | 
					supports.  It returns zero if your card can perform DMA properly on
 | 
				
			||||||
properly on the machine given the address mask you provided.
 | 
					the machine given the address mask you provided.  In general, the
 | 
				
			||||||
 | 
					device struct of your device is embedded in the bus specific device
 | 
				
			||||||
 | 
					struct of your device.  For example, a pointer to the device struct of
 | 
				
			||||||
 | 
					your PCI device is pdev->dev (pdev is a pointer to the PCI device
 | 
				
			||||||
 | 
					struct of your device).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If it returns non-zero, your device cannot perform DMA properly on
 | 
					If it returns non-zero, your device cannot perform DMA properly on
 | 
				
			||||||
this platform, and attempting to do so will result in undefined
 | 
					this platform, and attempting to do so will result in undefined
 | 
				
			||||||
| 
						 | 
					@ -133,31 +135,30 @@ of your driver reports that performance is bad or that the device is not
 | 
				
			||||||
even detected, you can ask them for the kernel messages to find out
 | 
					even detected, you can ask them for the kernel messages to find out
 | 
				
			||||||
exactly why.
 | 
					exactly why.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The standard 32-bit addressing PCI device would do something like
 | 
					The standard 32-bit addressing device would do something like this:
 | 
				
			||||||
this:
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
 | 
						if (dma_set_mask(dev, DMA_BIT_MASK(32))) {
 | 
				
			||||||
		printk(KERN_WARNING
 | 
							printk(KERN_WARNING
 | 
				
			||||||
		       "mydev: No suitable DMA available.\n");
 | 
							       "mydev: No suitable DMA available.\n");
 | 
				
			||||||
		goto ignore_this_device;
 | 
							goto ignore_this_device;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Another common scenario is a 64-bit capable device.  The approach
 | 
					Another common scenario is a 64-bit capable device.  The approach here
 | 
				
			||||||
here is to try for 64-bit DAC addressing, but back down to a
 | 
					is to try for 64-bit addressing, but back down to a 32-bit mask that
 | 
				
			||||||
32-bit mask should that fail.  The PCI platform code may fail the
 | 
					should not fail.  The kernel may fail the 64-bit mask not because the
 | 
				
			||||||
64-bit mask not because the platform is not capable of 64-bit
 | 
					platform is not capable of 64-bit addressing.  Rather, it may fail in
 | 
				
			||||||
addressing.  Rather, it may fail in this case simply because
 | 
					this case simply because 32-bit addressing is done more efficiently
 | 
				
			||||||
32-bit SAC addressing is done more efficiently than DAC addressing.
 | 
					than 64-bit addressing.  For example, Sparc64 PCI SAC addressing is
 | 
				
			||||||
Sparc64 is one platform which behaves in this way.
 | 
					more efficient than DAC addressing.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here is how you would handle a 64-bit capable device which can drive
 | 
					Here is how you would handle a 64-bit capable device which can drive
 | 
				
			||||||
all 64-bits when accessing streaming DMA:
 | 
					all 64-bits when accessing streaming DMA:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int using_dac;
 | 
						int using_dac;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
 | 
						if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
 | 
				
			||||||
		using_dac = 1;
 | 
							using_dac = 1;
 | 
				
			||||||
	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
 | 
						} else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
 | 
				
			||||||
		using_dac = 0;
 | 
							using_dac = 0;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		printk(KERN_WARNING
 | 
							printk(KERN_WARNING
 | 
				
			||||||
| 
						 | 
					@ -170,36 +171,36 @@ the case would look like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int using_dac, consistent_using_dac;
 | 
						int using_dac, consistent_using_dac;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
 | 
						if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
 | 
				
			||||||
		using_dac = 1;
 | 
							using_dac = 1;
 | 
				
			||||||
	   	consistent_using_dac = 1;
 | 
						   	consistent_using_dac = 1;
 | 
				
			||||||
		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 | 
							dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
 | 
				
			||||||
	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
 | 
						} else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
 | 
				
			||||||
		using_dac = 0;
 | 
							using_dac = 0;
 | 
				
			||||||
		consistent_using_dac = 0;
 | 
							consistent_using_dac = 0;
 | 
				
			||||||
		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 | 
							dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		printk(KERN_WARNING
 | 
							printk(KERN_WARNING
 | 
				
			||||||
		       "mydev: No suitable DMA available.\n");
 | 
							       "mydev: No suitable DMA available.\n");
 | 
				
			||||||
		goto ignore_this_device;
 | 
							goto ignore_this_device;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pci_set_consistent_dma_mask() will always be able to set the same or a
 | 
					dma_set_coherent_mask() will always be able to set the same or a
 | 
				
			||||||
smaller mask as pci_set_dma_mask(). However for the rare case that a
 | 
					smaller mask as dma_set_mask(). However for the rare case that a
 | 
				
			||||||
device driver only uses consistent allocations, one would have to
 | 
					device driver only uses consistent allocations, one would have to
 | 
				
			||||||
check the return value from pci_set_consistent_dma_mask().
 | 
					check the return value from dma_set_coherent_mask().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Finally, if your device can only drive the low 24-bits of
 | 
					Finally, if your device can only drive the low 24-bits of
 | 
				
			||||||
address during PCI bus mastering you might do something like:
 | 
					address you might do something like:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) {
 | 
						if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
 | 
				
			||||||
		printk(KERN_WARNING
 | 
							printk(KERN_WARNING
 | 
				
			||||||
		       "mydev: 24-bit DMA addressing not available.\n");
 | 
							       "mydev: 24-bit DMA addressing not available.\n");
 | 
				
			||||||
		goto ignore_this_device;
 | 
							goto ignore_this_device;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When pci_set_dma_mask() is successful, and returns zero, the PCI layer
 | 
					When dma_set_mask() is successful, and returns zero, the kernel saves
 | 
				
			||||||
saves away this mask you have provided.  The PCI layer will use this
 | 
					away this mask you have provided.  The kernel will use this
 | 
				
			||||||
information later when you make DMA mappings.
 | 
					information later when you make DMA mappings.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
There is a case which we are aware of at this time, which is worth
 | 
					There is a case which we are aware of at this time, which is worth
 | 
				
			||||||
| 
						 | 
					@ -208,7 +209,7 @@ functions (for example a sound card provides playback and record
 | 
				
			||||||
functions) and the various different functions have _different_
 | 
					functions) and the various different functions have _different_
 | 
				
			||||||
DMA addressing limitations, you may wish to probe each mask and
 | 
					DMA addressing limitations, you may wish to probe each mask and
 | 
				
			||||||
only provide the functionality which the machine can handle.  It
 | 
					only provide the functionality which the machine can handle.  It
 | 
				
			||||||
is important that the last call to pci_set_dma_mask() be for the
 | 
					is important that the last call to dma_set_mask() be for the
 | 
				
			||||||
most specific mask.
 | 
					most specific mask.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here is pseudo-code showing how this might be done:
 | 
					Here is pseudo-code showing how this might be done:
 | 
				
			||||||
| 
						 | 
					@ -217,17 +218,17 @@ Here is pseudo-code showing how this might be done:
 | 
				
			||||||
	#define RECORD_ADDRESS_BITS	DMA_BIT_MASK(24)
 | 
						#define RECORD_ADDRESS_BITS	DMA_BIT_MASK(24)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct my_sound_card *card;
 | 
						struct my_sound_card *card;
 | 
				
			||||||
	struct pci_dev *pdev;
 | 
						struct device *dev;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	...
 | 
						...
 | 
				
			||||||
	if (!pci_set_dma_mask(pdev, PLAYBACK_ADDRESS_BITS)) {
 | 
						if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) {
 | 
				
			||||||
		card->playback_enabled = 1;
 | 
							card->playback_enabled = 1;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		card->playback_enabled = 0;
 | 
							card->playback_enabled = 0;
 | 
				
			||||||
		printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n",
 | 
							printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n",
 | 
				
			||||||
		       card->name);
 | 
							       card->name);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (!pci_set_dma_mask(pdev, RECORD_ADDRESS_BITS)) {
 | 
						if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
 | 
				
			||||||
		card->record_enabled = 1;
 | 
							card->record_enabled = 1;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		card->record_enabled = 0;
 | 
							card->record_enabled = 0;
 | 
				
			||||||
| 
						 | 
					@ -252,8 +253,8 @@ There are two types of DMA mappings:
 | 
				
			||||||
  Think of "consistent" as "synchronous" or "coherent".
 | 
					  Think of "consistent" as "synchronous" or "coherent".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  The current default is to return consistent memory in the low 32
 | 
					  The current default is to return consistent memory in the low 32
 | 
				
			||||||
  bits of the PCI bus space.  However, for future compatibility you
 | 
					  bits of the bus space.  However, for future compatibility you should
 | 
				
			||||||
  should set the consistent mask even if this default is fine for your
 | 
					  set the consistent mask even if this default is fine for your
 | 
				
			||||||
  driver.
 | 
					  driver.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Good examples of what to use consistent mappings for are:
 | 
					  Good examples of what to use consistent mappings for are:
 | 
				
			||||||
| 
						 | 
					@ -285,9 +286,9 @@ There are two types of DMA mappings:
 | 
				
			||||||
	     found in PCI bridges (such as by reading a register's value
 | 
						     found in PCI bridges (such as by reading a register's value
 | 
				
			||||||
	     after writing it).
 | 
						     after writing it).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Streaming DMA mappings which are usually mapped for one DMA transfer,
 | 
					- Streaming DMA mappings which are usually mapped for one DMA
 | 
				
			||||||
  unmapped right after it (unless you use pci_dma_sync_* below) and for which
 | 
					  transfer, unmapped right after it (unless you use dma_sync_* below)
 | 
				
			||||||
  hardware can optimize for sequential accesses.
 | 
					  and for which hardware can optimize for sequential accesses.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  This of "streaming" as "asynchronous" or "outside the coherency
 | 
					  This of "streaming" as "asynchronous" or "outside the coherency
 | 
				
			||||||
  domain".
 | 
					  domain".
 | 
				
			||||||
| 
						 | 
					@ -302,8 +303,8 @@ There are two types of DMA mappings:
 | 
				
			||||||
  optimizations the hardware allows.  To this end, when using
 | 
					  optimizations the hardware allows.  To this end, when using
 | 
				
			||||||
  such mappings you must be explicit about what you want to happen.
 | 
					  such mappings you must be explicit about what you want to happen.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Neither type of DMA mapping has alignment restrictions that come
 | 
					Neither type of DMA mapping has alignment restrictions that come from
 | 
				
			||||||
from PCI, although some devices may have such restrictions.
 | 
					the underlying bus, although some devices may have such restrictions.
 | 
				
			||||||
Also, systems with caches that aren't DMA-coherent will work better
 | 
					Also, systems with caches that aren't DMA-coherent will work better
 | 
				
			||||||
when the underlying buffers don't share cache lines with other data.
 | 
					when the underlying buffers don't share cache lines with other data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -315,33 +316,27 @@ you should do:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dma_addr_t dma_handle;
 | 
						dma_addr_t dma_handle;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle);
 | 
						cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
where pdev is a struct pci_dev *. This may be called in interrupt context.
 | 
					where device is a struct device *. This may be called in interrupt
 | 
				
			||||||
You should use dma_alloc_coherent (see DMA-API.txt) for buses
 | 
					context with the GFP_ATOMIC flag.
 | 
				
			||||||
where devices don't have struct pci_dev (like ISA, EISA).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This argument is needed because the DMA translations may be bus
 | 
					 | 
				
			||||||
specific (and often is private to the bus which the device is attached
 | 
					 | 
				
			||||||
to).
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Size is the length of the region you want to allocate, in bytes.
 | 
					Size is the length of the region you want to allocate, in bytes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This routine will allocate RAM for that region, so it acts similarly to
 | 
					This routine will allocate RAM for that region, so it acts similarly to
 | 
				
			||||||
__get_free_pages (but takes size instead of a page order).  If your
 | 
					__get_free_pages (but takes size instead of a page order).  If your
 | 
				
			||||||
driver needs regions sized smaller than a page, you may prefer using
 | 
					driver needs regions sized smaller than a page, you may prefer using
 | 
				
			||||||
the pci_pool interface, described below.
 | 
					the dma_pool interface, described below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The consistent DMA mapping interfaces, for non-NULL pdev, will by
 | 
					The consistent DMA mapping interfaces, for non-NULL dev, will by
 | 
				
			||||||
default return a DMA address which is SAC (Single Address Cycle)
 | 
					default return a DMA address which is 32-bit addressable.  Even if the
 | 
				
			||||||
addressable.  Even if the device indicates (via PCI dma mask) that it
 | 
					device indicates (via DMA mask) that it may address the upper 32-bits,
 | 
				
			||||||
may address the upper 32-bits and thus perform DAC cycles, consistent
 | 
					consistent allocation will only return > 32-bit addresses for DMA if
 | 
				
			||||||
allocation will only return > 32-bit PCI addresses for DMA if the
 | 
					the consistent DMA mask has been explicitly changed via
 | 
				
			||||||
consistent dma mask has been explicitly changed via
 | 
					dma_set_coherent_mask().  This is true of the dma_pool interface as
 | 
				
			||||||
pci_set_consistent_dma_mask().  This is true of the pci_pool interface
 | 
					well.
 | 
				
			||||||
as well.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
pci_alloc_consistent returns two values: the virtual address which you
 | 
					dma_alloc_coherent returns two values: the virtual address which you
 | 
				
			||||||
can use to access it from the CPU and dma_handle which you pass to the
 | 
					can use to access it from the CPU and dma_handle which you pass to the
 | 
				
			||||||
card.
 | 
					card.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -354,54 +349,54 @@ buffer you receive will not cross a 64K boundary.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To unmap and free such a DMA region, you call:
 | 
					To unmap and free such a DMA region, you call:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_free_consistent(pdev, size, cpu_addr, dma_handle);
 | 
						dma_free_coherent(dev, size, cpu_addr, dma_handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
where pdev, size are the same as in the above call and cpu_addr and
 | 
					where dev, size are the same as in the above call and cpu_addr and
 | 
				
			||||||
dma_handle are the values pci_alloc_consistent returned to you.
 | 
					dma_handle are the values dma_alloc_coherent returned to you.
 | 
				
			||||||
This function may not be called in interrupt context.
 | 
					This function may not be called in interrupt context.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If your driver needs lots of smaller memory regions, you can write
 | 
					If your driver needs lots of smaller memory regions, you can write
 | 
				
			||||||
custom code to subdivide pages returned by pci_alloc_consistent,
 | 
					custom code to subdivide pages returned by dma_alloc_coherent,
 | 
				
			||||||
or you can use the pci_pool API to do that.  A pci_pool is like
 | 
					or you can use the dma_pool API to do that.  A dma_pool is like
 | 
				
			||||||
a kmem_cache, but it uses pci_alloc_consistent not __get_free_pages.
 | 
					a kmem_cache, but it uses dma_alloc_coherent not __get_free_pages.
 | 
				
			||||||
Also, it understands common hardware constraints for alignment,
 | 
					Also, it understands common hardware constraints for alignment,
 | 
				
			||||||
like queue heads needing to be aligned on N byte boundaries.
 | 
					like queue heads needing to be aligned on N byte boundaries.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Create a pci_pool like this:
 | 
					Create a dma_pool like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct pci_pool *pool;
 | 
						struct dma_pool *pool;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pool = pci_pool_create(name, pdev, size, align, alloc);
 | 
						pool = dma_pool_create(name, dev, size, align, alloc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The "name" is for diagnostics (like a kmem_cache name); pdev and size
 | 
					The "name" is for diagnostics (like a kmem_cache name); dev and size
 | 
				
			||||||
are as above.  The device's hardware alignment requirement for this
 | 
					are as above.  The device's hardware alignment requirement for this
 | 
				
			||||||
type of data is "align" (which is expressed in bytes, and must be a
 | 
					type of data is "align" (which is expressed in bytes, and must be a
 | 
				
			||||||
power of two).  If your device has no boundary crossing restrictions,
 | 
					power of two).  If your device has no boundary crossing restrictions,
 | 
				
			||||||
pass 0 for alloc; passing 4096 says memory allocated from this pool
 | 
					pass 0 for alloc; passing 4096 says memory allocated from this pool
 | 
				
			||||||
must not cross 4KByte boundaries (but at that time it may be better to
 | 
					must not cross 4KByte boundaries (but at that time it may be better to
 | 
				
			||||||
go for pci_alloc_consistent directly instead).
 | 
					go for dma_alloc_coherent directly instead).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Allocate memory from a pci pool like this:
 | 
					Allocate memory from a dma pool like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cpu_addr = pci_pool_alloc(pool, flags, &dma_handle);
 | 
						cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor
 | 
					flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor
 | 
				
			||||||
holding SMP locks), SLAB_ATOMIC otherwise.  Like pci_alloc_consistent,
 | 
					holding SMP locks), SLAB_ATOMIC otherwise.  Like dma_alloc_coherent,
 | 
				
			||||||
this returns two values, cpu_addr and dma_handle.
 | 
					this returns two values, cpu_addr and dma_handle.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Free memory that was allocated from a pci_pool like this:
 | 
					Free memory that was allocated from a dma_pool like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_pool_free(pool, cpu_addr, dma_handle);
 | 
						dma_pool_free(pool, cpu_addr, dma_handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
where pool is what you passed to pci_pool_alloc, and cpu_addr and
 | 
					where pool is what you passed to dma_pool_alloc, and cpu_addr and
 | 
				
			||||||
dma_handle are the values pci_pool_alloc returned. This function
 | 
					dma_handle are the values dma_pool_alloc returned. This function
 | 
				
			||||||
may be called in interrupt context.
 | 
					may be called in interrupt context.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Destroy a pci_pool by calling:
 | 
					Destroy a dma_pool by calling:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_pool_destroy(pool);
 | 
						dma_pool_destroy(pool);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Make sure you've called pci_pool_free for all memory allocated
 | 
					Make sure you've called dma_pool_free for all memory allocated
 | 
				
			||||||
from a pool before you destroy the pool. This function may not
 | 
					from a pool before you destroy the pool. This function may not
 | 
				
			||||||
be called in interrupt context.
 | 
					be called in interrupt context.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -411,15 +406,15 @@ The interfaces described in subsequent portions of this document
 | 
				
			||||||
take a DMA direction argument, which is an integer and takes on
 | 
					take a DMA direction argument, which is an integer and takes on
 | 
				
			||||||
one of the following values:
 | 
					one of the following values:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 PCI_DMA_BIDIRECTIONAL
 | 
					 DMA_BIDIRECTIONAL
 | 
				
			||||||
 PCI_DMA_TODEVICE
 | 
					 DMA_TO_DEVICE
 | 
				
			||||||
 PCI_DMA_FROMDEVICE
 | 
					 DMA_FROM_DEVICE
 | 
				
			||||||
 PCI_DMA_NONE
 | 
					 DMA_NONE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
One should provide the exact DMA direction if you know it.
 | 
					One should provide the exact DMA direction if you know it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PCI_DMA_TODEVICE means "from main memory to the PCI device"
 | 
					DMA_TO_DEVICE means "from main memory to the device"
 | 
				
			||||||
PCI_DMA_FROMDEVICE means "from the PCI device to main memory"
 | 
					DMA_FROM_DEVICE means "from the device to main memory"
 | 
				
			||||||
It is the direction in which the data moves during the DMA
 | 
					It is the direction in which the data moves during the DMA
 | 
				
			||||||
transfer.
 | 
					transfer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -427,12 +422,12 @@ You are _strongly_ encouraged to specify this as precisely
 | 
				
			||||||
as you possibly can.
 | 
					as you possibly can.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If you absolutely cannot know the direction of the DMA transfer,
 | 
					If you absolutely cannot know the direction of the DMA transfer,
 | 
				
			||||||
specify PCI_DMA_BIDIRECTIONAL.  It means that the DMA can go in
 | 
					specify DMA_BIDIRECTIONAL.  It means that the DMA can go in
 | 
				
			||||||
either direction.  The platform guarantees that you may legally
 | 
					either direction.  The platform guarantees that you may legally
 | 
				
			||||||
specify this, and that it will work, but this may be at the
 | 
					specify this, and that it will work, but this may be at the
 | 
				
			||||||
cost of performance for example.
 | 
					cost of performance for example.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The value PCI_DMA_NONE is to be used for debugging.  One can
 | 
					The value DMA_NONE is to be used for debugging.  One can
 | 
				
			||||||
hold this in a data structure before you come to know the
 | 
					hold this in a data structure before you come to know the
 | 
				
			||||||
precise direction, and this will help catch cases where your
 | 
					precise direction, and this will help catch cases where your
 | 
				
			||||||
direction tracking logic has failed to set things up properly.
 | 
					direction tracking logic has failed to set things up properly.
 | 
				
			||||||
| 
						 | 
					@ -442,21 +437,21 @@ potential platform-specific optimizations of such) is for debugging.
 | 
				
			||||||
Some platforms actually have a write permission boolean which DMA
 | 
					Some platforms actually have a write permission boolean which DMA
 | 
				
			||||||
mappings can be marked with, much like page protections in the user
 | 
					mappings can be marked with, much like page protections in the user
 | 
				
			||||||
program address space.  Such platforms can and do report errors in the
 | 
					program address space.  Such platforms can and do report errors in the
 | 
				
			||||||
kernel logs when the PCI controller hardware detects violation of the
 | 
					kernel logs when the DMA controller hardware detects violation of the
 | 
				
			||||||
permission setting.
 | 
					permission setting.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Only streaming mappings specify a direction, consistent mappings
 | 
					Only streaming mappings specify a direction, consistent mappings
 | 
				
			||||||
implicitly have a direction attribute setting of
 | 
					implicitly have a direction attribute setting of
 | 
				
			||||||
PCI_DMA_BIDIRECTIONAL.
 | 
					DMA_BIDIRECTIONAL.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The SCSI subsystem tells you the direction to use in the
 | 
					The SCSI subsystem tells you the direction to use in the
 | 
				
			||||||
'sc_data_direction' member of the SCSI command your driver is
 | 
					'sc_data_direction' member of the SCSI command your driver is
 | 
				
			||||||
working on.
 | 
					working on.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
For Networking drivers, it's a rather simple affair.  For transmit
 | 
					For Networking drivers, it's a rather simple affair.  For transmit
 | 
				
			||||||
packets, map/unmap them with the PCI_DMA_TODEVICE direction
 | 
					packets, map/unmap them with the DMA_TO_DEVICE direction
 | 
				
			||||||
specifier.  For receive packets, just the opposite, map/unmap them
 | 
					specifier.  For receive packets, just the opposite, map/unmap them
 | 
				
			||||||
with the PCI_DMA_FROMDEVICE direction specifier.
 | 
					with the DMA_FROM_DEVICE direction specifier.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		  Using Streaming DMA mappings
 | 
							  Using Streaming DMA mappings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -467,43 +462,43 @@ scatterlist.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To map a single region, you do:
 | 
					To map a single region, you do:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct pci_dev *pdev = mydev->pdev;
 | 
						struct device *dev = &my_dev->dev;
 | 
				
			||||||
	dma_addr_t dma_handle;
 | 
						dma_addr_t dma_handle;
 | 
				
			||||||
	void *addr = buffer->ptr;
 | 
						void *addr = buffer->ptr;
 | 
				
			||||||
	size_t size = buffer->len;
 | 
						size_t size = buffer->len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dma_handle = pci_map_single(pdev, addr, size, direction);
 | 
						dma_handle = dma_map_single(dev, addr, size, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
and to unmap it:
 | 
					and to unmap it:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_unmap_single(pdev, dma_handle, size, direction);
 | 
						dma_unmap_single(dev, dma_handle, size, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
You should call pci_unmap_single when the DMA activity is finished, e.g.
 | 
					You should call dma_unmap_single when the DMA activity is finished, e.g.
 | 
				
			||||||
from the interrupt which told you that the DMA transfer is done.
 | 
					from the interrupt which told you that the DMA transfer is done.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Using cpu pointers like this for single mappings has a disadvantage,
 | 
					Using cpu pointers like this for single mappings has a disadvantage,
 | 
				
			||||||
you cannot reference HIGHMEM memory in this way.  Thus, there is a
 | 
					you cannot reference HIGHMEM memory in this way.  Thus, there is a
 | 
				
			||||||
map/unmap interface pair akin to pci_{map,unmap}_single.  These
 | 
					map/unmap interface pair akin to dma_{map,unmap}_single.  These
 | 
				
			||||||
interfaces deal with page/offset pairs instead of cpu pointers.
 | 
					interfaces deal with page/offset pairs instead of cpu pointers.
 | 
				
			||||||
Specifically:
 | 
					Specifically:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct pci_dev *pdev = mydev->pdev;
 | 
						struct device *dev = &my_dev->dev;
 | 
				
			||||||
	dma_addr_t dma_handle;
 | 
						dma_addr_t dma_handle;
 | 
				
			||||||
	struct page *page = buffer->page;
 | 
						struct page *page = buffer->page;
 | 
				
			||||||
	unsigned long offset = buffer->offset;
 | 
						unsigned long offset = buffer->offset;
 | 
				
			||||||
	size_t size = buffer->len;
 | 
						size_t size = buffer->len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dma_handle = pci_map_page(pdev, page, offset, size, direction);
 | 
						dma_handle = dma_map_page(dev, page, offset, size, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	...
 | 
						...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_unmap_page(pdev, dma_handle, size, direction);
 | 
						dma_unmap_page(dev, dma_handle, size, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here, "offset" means byte offset within the given page.
 | 
					Here, "offset" means byte offset within the given page.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
With scatterlists, you map a region gathered from several regions by:
 | 
					With scatterlists, you map a region gathered from several regions by:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int i, count = pci_map_sg(pdev, sglist, nents, direction);
 | 
						int i, count = dma_map_sg(dev, sglist, nents, direction);
 | 
				
			||||||
	struct scatterlist *sg;
 | 
						struct scatterlist *sg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for_each_sg(sglist, sg, count, i) {
 | 
						for_each_sg(sglist, sg, count, i) {
 | 
				
			||||||
| 
						 | 
					@ -527,16 +522,16 @@ accessed sg->address and sg->length as shown above.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To unmap a scatterlist, just call:
 | 
					To unmap a scatterlist, just call:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_unmap_sg(pdev, sglist, nents, direction);
 | 
						dma_unmap_sg(dev, sglist, nents, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Again, make sure DMA activity has already finished.
 | 
					Again, make sure DMA activity has already finished.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PLEASE NOTE:  The 'nents' argument to the pci_unmap_sg call must be
 | 
					PLEASE NOTE:  The 'nents' argument to the dma_unmap_sg call must be
 | 
				
			||||||
              the _same_ one you passed into the pci_map_sg call,
 | 
					              the _same_ one you passed into the dma_map_sg call,
 | 
				
			||||||
	      it should _NOT_ be the 'count' value _returned_ from the
 | 
						      it should _NOT_ be the 'count' value _returned_ from the
 | 
				
			||||||
              pci_map_sg call.
 | 
					              dma_map_sg call.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Every pci_map_{single,sg} call should have its pci_unmap_{single,sg}
 | 
					Every dma_map_{single,sg} call should have its dma_unmap_{single,sg}
 | 
				
			||||||
counterpart, because the bus address space is a shared resource (although
 | 
					counterpart, because the bus address space is a shared resource (although
 | 
				
			||||||
in some ports the mapping is per each BUS so less devices contend for the
 | 
					in some ports the mapping is per each BUS so less devices contend for the
 | 
				
			||||||
same bus address space) and you could render the machine unusable by eating
 | 
					same bus address space) and you could render the machine unusable by eating
 | 
				
			||||||
| 
						 | 
					@ -547,14 +542,14 @@ the data in between the DMA transfers, the buffer needs to be synced
 | 
				
			||||||
properly in order for the cpu and device to see the most uptodate and
 | 
					properly in order for the cpu and device to see the most uptodate and
 | 
				
			||||||
correct copy of the DMA buffer.
 | 
					correct copy of the DMA buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
So, firstly, just map it with pci_map_{single,sg}, and after each DMA
 | 
					So, firstly, just map it with dma_map_{single,sg}, and after each DMA
 | 
				
			||||||
transfer call either:
 | 
					transfer call either:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction);
 | 
						dma_sync_single_for_cpu(dev, dma_handle, size, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
or:
 | 
					or:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction);
 | 
						dma_sync_sg_for_cpu(dev, sglist, nents, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
as appropriate.
 | 
					as appropriate.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -562,27 +557,27 @@ Then, if you wish to let the device get at the DMA area again,
 | 
				
			||||||
finish accessing the data with the cpu, and then before actually
 | 
					finish accessing the data with the cpu, and then before actually
 | 
				
			||||||
giving the buffer to the hardware call either:
 | 
					giving the buffer to the hardware call either:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_dma_sync_single_for_device(pdev, dma_handle, size, direction);
 | 
						dma_sync_single_for_device(dev, dma_handle, size, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
or:
 | 
					or:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_dma_sync_sg_for_device(dev, sglist, nents, direction);
 | 
						dma_sync_sg_for_device(dev, sglist, nents, direction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
as appropriate.
 | 
					as appropriate.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
After the last DMA transfer call one of the DMA unmap routines
 | 
					After the last DMA transfer call one of the DMA unmap routines
 | 
				
			||||||
pci_unmap_{single,sg}. If you don't touch the data from the first pci_map_*
 | 
					dma_unmap_{single,sg}. If you don't touch the data from the first dma_map_*
 | 
				
			||||||
call till pci_unmap_*, then you don't have to call the pci_dma_sync_*
 | 
					call till dma_unmap_*, then you don't have to call the dma_sync_*
 | 
				
			||||||
routines at all.
 | 
					routines at all.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here is pseudo code which shows a situation in which you would need
 | 
					Here is pseudo code which shows a situation in which you would need
 | 
				
			||||||
to use the pci_dma_sync_*() interfaces.
 | 
					to use the dma_sync_*() interfaces.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
 | 
						my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		dma_addr_t mapping;
 | 
							dma_addr_t mapping;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		mapping = pci_map_single(cp->pdev, buffer, len, PCI_DMA_FROMDEVICE);
 | 
							mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		cp->rx_buf = buffer;
 | 
							cp->rx_buf = buffer;
 | 
				
			||||||
		cp->rx_len = len;
 | 
							cp->rx_len = len;
 | 
				
			||||||
| 
						 | 
					@ -606,25 +601,25 @@ to use the pci_dma_sync_*() interfaces.
 | 
				
			||||||
			 * the DMA transfer with the CPU first
 | 
								 * the DMA transfer with the CPU first
 | 
				
			||||||
			 * so that we see updated contents.
 | 
								 * so that we see updated contents.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			pci_dma_sync_single_for_cpu(cp->pdev, cp->rx_dma,
 | 
								dma_sync_single_for_cpu(&cp->dev, cp->rx_dma,
 | 
				
			||||||
						cp->rx_len,
 | 
											cp->rx_len,
 | 
				
			||||||
						    PCI_DMA_FROMDEVICE);
 | 
											DMA_FROM_DEVICE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/* Now it is safe to examine the buffer. */
 | 
								/* Now it is safe to examine the buffer. */
 | 
				
			||||||
			hp = (struct my_card_header *) cp->rx_buf;
 | 
								hp = (struct my_card_header *) cp->rx_buf;
 | 
				
			||||||
			if (header_is_ok(hp)) {
 | 
								if (header_is_ok(hp)) {
 | 
				
			||||||
				pci_unmap_single(cp->pdev, cp->rx_dma, cp->rx_len,
 | 
									dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len,
 | 
				
			||||||
						 PCI_DMA_FROMDEVICE);
 | 
											 DMA_FROM_DEVICE);
 | 
				
			||||||
				pass_to_upper_layers(cp->rx_buf);
 | 
									pass_to_upper_layers(cp->rx_buf);
 | 
				
			||||||
				make_and_setup_new_rx_buf(cp);
 | 
									make_and_setup_new_rx_buf(cp);
 | 
				
			||||||
			} else {
 | 
								} else {
 | 
				
			||||||
				/* Just sync the buffer and give it back
 | 
									/* Just sync the buffer and give it back
 | 
				
			||||||
				 * to the card.
 | 
									 * to the card.
 | 
				
			||||||
				 */
 | 
									 */
 | 
				
			||||||
				pci_dma_sync_single_for_device(cp->pdev,
 | 
									dma_sync_single_for_device(&cp->dev,
 | 
				
			||||||
							   cp->rx_dma,
 | 
												   cp->rx_dma,
 | 
				
			||||||
							   cp->rx_len,
 | 
												   cp->rx_len,
 | 
				
			||||||
							       PCI_DMA_FROMDEVICE);
 | 
												   DMA_FROM_DEVICE);
 | 
				
			||||||
				give_rx_buf_to_card(cp);
 | 
									give_rx_buf_to_card(cp);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -634,19 +629,19 @@ Drivers converted fully to this interface should not use virt_to_bus any
 | 
				
			||||||
longer, nor should they use bus_to_virt. Some drivers have to be changed a
 | 
					longer, nor should they use bus_to_virt. Some drivers have to be changed a
 | 
				
			||||||
little bit, because there is no longer an equivalent to bus_to_virt in the
 | 
					little bit, because there is no longer an equivalent to bus_to_virt in the
 | 
				
			||||||
dynamic DMA mapping scheme - you have to always store the DMA addresses
 | 
					dynamic DMA mapping scheme - you have to always store the DMA addresses
 | 
				
			||||||
returned by the pci_alloc_consistent, pci_pool_alloc, and pci_map_single
 | 
					returned by the dma_alloc_coherent, dma_pool_alloc, and dma_map_single
 | 
				
			||||||
calls (pci_map_sg stores them in the scatterlist itself if the platform
 | 
					calls (dma_map_sg stores them in the scatterlist itself if the platform
 | 
				
			||||||
supports dynamic DMA mapping in hardware) in your driver structures and/or
 | 
					supports dynamic DMA mapping in hardware) in your driver structures and/or
 | 
				
			||||||
in the card registers.
 | 
					in the card registers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
All PCI drivers should be using these interfaces with no exceptions.
 | 
					All drivers should be using these interfaces with no exceptions.  It
 | 
				
			||||||
It is planned to completely remove virt_to_bus() and bus_to_virt() as
 | 
					is planned to completely remove virt_to_bus() and bus_to_virt() as
 | 
				
			||||||
they are entirely deprecated.  Some ports already do not provide these
 | 
					they are entirely deprecated.  Some ports already do not provide these
 | 
				
			||||||
as it is impossible to correctly support them.
 | 
					as it is impossible to correctly support them.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Optimizing Unmap State Space Consumption
 | 
							Optimizing Unmap State Space Consumption
 | 
				
			||||||
 | 
					
 | 
				
			||||||
On many platforms, pci_unmap_{single,page}() is simply a nop.
 | 
					On many platforms, dma_unmap_{single,page}() is simply a nop.
 | 
				
			||||||
Therefore, keeping track of the mapping address and length is a waste
 | 
					Therefore, keeping track of the mapping address and length is a waste
 | 
				
			||||||
of space.  Instead of filling your drivers up with ifdefs and the like
 | 
					of space.  Instead of filling your drivers up with ifdefs and the like
 | 
				
			||||||
to "work around" this (which would defeat the whole purpose of a
 | 
					to "work around" this (which would defeat the whole purpose of a
 | 
				
			||||||
| 
						 | 
					@ -655,7 +650,7 @@ portable API) the following facilities are provided.
 | 
				
			||||||
Actually, instead of describing the macros one by one, we'll
 | 
					Actually, instead of describing the macros one by one, we'll
 | 
				
			||||||
transform some example code.
 | 
					transform some example code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1) Use DECLARE_PCI_UNMAP_{ADDR,LEN} in state saving structures.
 | 
					1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
 | 
				
			||||||
   Example, before:
 | 
					   Example, before:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct ring_state {
 | 
						struct ring_state {
 | 
				
			||||||
| 
						 | 
					@ -668,14 +663,11 @@ transform some example code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct ring_state {
 | 
						struct ring_state {
 | 
				
			||||||
		struct sk_buff *skb;
 | 
							struct sk_buff *skb;
 | 
				
			||||||
		DECLARE_PCI_UNMAP_ADDR(mapping)
 | 
							DEFINE_DMA_UNMAP_ADDR(mapping);
 | 
				
			||||||
		DECLARE_PCI_UNMAP_LEN(len)
 | 
							DEFINE_DMA_UNMAP_LEN(len);
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   NOTE: DO NOT put a semicolon at the end of the DECLARE_*()
 | 
					2) Use dma_unmap_{addr,len}_set to set these values.
 | 
				
			||||||
         macro.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2) Use pci_unmap_{addr,len}_set to set these values.
 | 
					 | 
				
			||||||
   Example, before:
 | 
					   Example, before:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ringp->mapping = FOO;
 | 
						ringp->mapping = FOO;
 | 
				
			||||||
| 
						 | 
					@ -683,21 +675,21 @@ transform some example code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   after:
 | 
					   after:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_unmap_addr_set(ringp, mapping, FOO);
 | 
						dma_unmap_addr_set(ringp, mapping, FOO);
 | 
				
			||||||
	pci_unmap_len_set(ringp, len, BAR);
 | 
						dma_unmap_len_set(ringp, len, BAR);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
3) Use pci_unmap_{addr,len} to access these values.
 | 
					3) Use dma_unmap_{addr,len} to access these values.
 | 
				
			||||||
   Example, before:
 | 
					   Example, before:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_unmap_single(pdev, ringp->mapping, ringp->len,
 | 
						dma_unmap_single(dev, ringp->mapping, ringp->len,
 | 
				
			||||||
			 PCI_DMA_FROMDEVICE);
 | 
								 DMA_FROM_DEVICE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   after:
 | 
					   after:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pci_unmap_single(pdev,
 | 
						dma_unmap_single(dev,
 | 
				
			||||||
			 pci_unmap_addr(ringp, mapping),
 | 
								 dma_unmap_addr(ringp, mapping),
 | 
				
			||||||
			 pci_unmap_len(ringp, len),
 | 
								 dma_unmap_len(ringp, len),
 | 
				
			||||||
			 PCI_DMA_FROMDEVICE);
 | 
								 DMA_FROM_DEVICE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
It really should be self-explanatory.  We treat the ADDR and LEN
 | 
					It really should be self-explanatory.  We treat the ADDR and LEN
 | 
				
			||||||
separately, because it is possible for an implementation to only
 | 
					separately, because it is possible for an implementation to only
 | 
				
			||||||
| 
						 | 
					@ -732,15 +724,15 @@ to "Closing".
 | 
				
			||||||
DMA address space is limited on some architectures and an allocation
 | 
					DMA address space is limited on some architectures and an allocation
 | 
				
			||||||
failure can be determined by:
 | 
					failure can be determined by:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- checking if pci_alloc_consistent returns NULL or pci_map_sg returns 0
 | 
					- checking if dma_alloc_coherent returns NULL or dma_map_sg returns 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- checking the returned dma_addr_t of pci_map_single and pci_map_page
 | 
					- checking the returned dma_addr_t of dma_map_single and dma_map_page
 | 
				
			||||||
  by using pci_dma_mapping_error():
 | 
					  by using dma_mapping_error():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dma_addr_t dma_handle;
 | 
						dma_addr_t dma_handle;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dma_handle = pci_map_single(pdev, addr, size, direction);
 | 
						dma_handle = dma_map_single(dev, addr, size, direction);
 | 
				
			||||||
	if (pci_dma_mapping_error(pdev, dma_handle)) {
 | 
						if (dma_mapping_error(dev, dma_handle)) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * reduce current DMA mapping usage,
 | 
							 * reduce current DMA mapping usage,
 | 
				
			||||||
		 * delay and try again later or
 | 
							 * delay and try again later or
 | 
				
			||||||
| 
						 | 
					@ -4,20 +4,18 @@
 | 
				
			||||||
        James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
 | 
					        James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This document describes the DMA API.  For a more gentle introduction
 | 
					This document describes the DMA API.  For a more gentle introduction
 | 
				
			||||||
phrased in terms of the pci_ equivalents (and actual examples) see
 | 
					of the API (and actual examples) see
 | 
				
			||||||
Documentation/PCI/PCI-DMA-mapping.txt.
 | 
					Documentation/DMA-API-HOWTO.txt.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This API is split into two pieces.  Part I describes the API and the
 | 
					This API is split into two pieces.  Part I describes the API.  Part II
 | 
				
			||||||
corresponding pci_ API.  Part II describes the extensions to the API
 | 
					describes the extensions to the API for supporting non-consistent
 | 
				
			||||||
for supporting non-consistent memory machines.  Unless you know that
 | 
					memory machines.  Unless you know that your driver absolutely has to
 | 
				
			||||||
your driver absolutely has to support non-consistent platforms (this
 | 
					support non-consistent platforms (this is usually only legacy
 | 
				
			||||||
is usually only legacy platforms) you should only use the API
 | 
					platforms) you should only use the API described in part I.
 | 
				
			||||||
described in part I.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Part I - pci_ and dma_ Equivalent API 
 | 
					Part I - dma_ API
 | 
				
			||||||
-------------------------------------
 | 
					-------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To get the pci_ API, you must #include <linux/pci.h>
 | 
					 | 
				
			||||||
To get the dma_ API, you must #include <linux/dma-mapping.h>
 | 
					To get the dma_ API, you must #include <linux/dma-mapping.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,9 +25,6 @@ Part Ia - Using large dma-coherent buffers
 | 
				
			||||||
void *
 | 
					void *
 | 
				
			||||||
dma_alloc_coherent(struct device *dev, size_t size,
 | 
					dma_alloc_coherent(struct device *dev, size_t size,
 | 
				
			||||||
			     dma_addr_t *dma_handle, gfp_t flag)
 | 
								     dma_addr_t *dma_handle, gfp_t flag)
 | 
				
			||||||
void *
 | 
					 | 
				
			||||||
pci_alloc_consistent(struct pci_dev *dev, size_t size,
 | 
					 | 
				
			||||||
			     dma_addr_t *dma_handle)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Consistent memory is memory for which a write by either the device or
 | 
					Consistent memory is memory for which a write by either the device or
 | 
				
			||||||
the processor can immediately be read by the processor or device
 | 
					the processor can immediately be read by the processor or device
 | 
				
			||||||
| 
						 | 
					@ -53,15 +48,11 @@ The simplest way to do that is to use the dma_pool calls (see below).
 | 
				
			||||||
The flag parameter (dma_alloc_coherent only) allows the caller to
 | 
					The flag parameter (dma_alloc_coherent only) allows the caller to
 | 
				
			||||||
specify the GFP_ flags (see kmalloc) for the allocation (the
 | 
					specify the GFP_ flags (see kmalloc) for the allocation (the
 | 
				
			||||||
implementation may choose to ignore flags that affect the location of
 | 
					implementation may choose to ignore flags that affect the location of
 | 
				
			||||||
the returned memory, like GFP_DMA).  For pci_alloc_consistent, you
 | 
					the returned memory, like GFP_DMA).
 | 
				
			||||||
must assume GFP_ATOMIC behaviour.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 | 
					dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 | 
				
			||||||
			   dma_addr_t dma_handle)
 | 
								   dma_addr_t dma_handle)
 | 
				
			||||||
void
 | 
					 | 
				
			||||||
pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr,
 | 
					 | 
				
			||||||
			   dma_addr_t dma_handle)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Free the region of consistent memory you previously allocated.  dev,
 | 
					Free the region of consistent memory you previously allocated.  dev,
 | 
				
			||||||
size and dma_handle must all be the same as those passed into the
 | 
					size and dma_handle must all be the same as those passed into the
 | 
				
			||||||
| 
						 | 
					@ -89,10 +80,6 @@ for alignment, like queue heads needing to be aligned on N-byte boundaries.
 | 
				
			||||||
	dma_pool_create(const char *name, struct device *dev,
 | 
						dma_pool_create(const char *name, struct device *dev,
 | 
				
			||||||
			size_t size, size_t align, size_t alloc);
 | 
								size_t size, size_t align, size_t alloc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct pci_pool *
 | 
					 | 
				
			||||||
	pci_pool_create(const char *name, struct pci_device *dev,
 | 
					 | 
				
			||||||
			size_t size, size_t align, size_t alloc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The pool create() routines initialize a pool of dma-coherent buffers
 | 
					The pool create() routines initialize a pool of dma-coherent buffers
 | 
				
			||||||
for use with a given device.  It must be called in a context which
 | 
					for use with a given device.  It must be called in a context which
 | 
				
			||||||
can sleep.
 | 
					can sleep.
 | 
				
			||||||
| 
						 | 
					@ -108,9 +95,6 @@ from this pool must not cross 4KByte boundaries.
 | 
				
			||||||
	void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
 | 
						void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
 | 
				
			||||||
			dma_addr_t *dma_handle);
 | 
								dma_addr_t *dma_handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags,
 | 
					 | 
				
			||||||
			dma_addr_t *dma_handle);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This allocates memory from the pool; the returned memory will meet the size
 | 
					This allocates memory from the pool; the returned memory will meet the size
 | 
				
			||||||
and alignment requirements specified at creation time.  Pass GFP_ATOMIC to
 | 
					and alignment requirements specified at creation time.  Pass GFP_ATOMIC to
 | 
				
			||||||
prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
 | 
					prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
 | 
				
			||||||
| 
						 | 
					@ -122,9 +106,6 @@ pool's device.
 | 
				
			||||||
	void dma_pool_free(struct dma_pool *pool, void *vaddr,
 | 
						void dma_pool_free(struct dma_pool *pool, void *vaddr,
 | 
				
			||||||
			dma_addr_t addr);
 | 
								dma_addr_t addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void pci_pool_free(struct pci_pool *pool, void *vaddr,
 | 
					 | 
				
			||||||
			dma_addr_t addr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This puts memory back into the pool.  The pool is what was passed to
 | 
					This puts memory back into the pool.  The pool is what was passed to
 | 
				
			||||||
the pool allocation routine; the cpu (vaddr) and dma addresses are what
 | 
					the pool allocation routine; the cpu (vaddr) and dma addresses are what
 | 
				
			||||||
were returned when that routine allocated the memory being freed.
 | 
					were returned when that routine allocated the memory being freed.
 | 
				
			||||||
| 
						 | 
					@ -132,8 +113,6 @@ were returned when that routine allocated the memory being freed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void dma_pool_destroy(struct dma_pool *pool);
 | 
						void dma_pool_destroy(struct dma_pool *pool);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void pci_pool_destroy(struct pci_pool *pool);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The pool destroy() routines free the resources of the pool.  They must be
 | 
					The pool destroy() routines free the resources of the pool.  They must be
 | 
				
			||||||
called in a context which can sleep.  Make sure you've freed all allocated
 | 
					called in a context which can sleep.  Make sure you've freed all allocated
 | 
				
			||||||
memory back to the pool before you destroy it.
 | 
					memory back to the pool before you destroy it.
 | 
				
			||||||
| 
						 | 
					@ -144,8 +123,6 @@ Part Ic - DMA addressing limitations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
dma_supported(struct device *dev, u64 mask)
 | 
					dma_supported(struct device *dev, u64 mask)
 | 
				
			||||||
int
 | 
					 | 
				
			||||||
pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Checks to see if the device can support DMA to the memory described by
 | 
					Checks to see if the device can support DMA to the memory described by
 | 
				
			||||||
mask.
 | 
					mask.
 | 
				
			||||||
| 
						 | 
					@ -159,8 +136,14 @@ driver writers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
dma_set_mask(struct device *dev, u64 mask)
 | 
					dma_set_mask(struct device *dev, u64 mask)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Checks to see if the mask is possible and updates the device
 | 
				
			||||||
 | 
					parameters if it is.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Returns: 0 if successful and a negative error if not.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
pci_set_dma_mask(struct pci_device *dev, u64 mask)
 | 
					dma_set_coherent_mask(struct device *dev, u64 mask)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Checks to see if the mask is possible and updates the device
 | 
					Checks to see if the mask is possible and updates the device
 | 
				
			||||||
parameters if it is.
 | 
					parameters if it is.
 | 
				
			||||||
| 
						 | 
					@ -187,9 +170,6 @@ Part Id - Streaming DMA mappings
 | 
				
			||||||
dma_addr_t
 | 
					dma_addr_t
 | 
				
			||||||
dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 | 
					dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 | 
				
			||||||
		      enum dma_data_direction direction)
 | 
							      enum dma_data_direction direction)
 | 
				
			||||||
dma_addr_t
 | 
					 | 
				
			||||||
pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size,
 | 
					 | 
				
			||||||
		      int direction)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Maps a piece of processor virtual memory so it can be accessed by the
 | 
					Maps a piece of processor virtual memory so it can be accessed by the
 | 
				
			||||||
device and returns the physical handle of the memory.
 | 
					device and returns the physical handle of the memory.
 | 
				
			||||||
| 
						 | 
					@ -198,14 +178,10 @@ The direction for both api's may be converted freely by casting.
 | 
				
			||||||
However the dma_ API uses a strongly typed enumerator for its
 | 
					However the dma_ API uses a strongly typed enumerator for its
 | 
				
			||||||
direction:
 | 
					direction:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DMA_NONE		= PCI_DMA_NONE		no direction (used for
 | 
					DMA_NONE		no direction (used for debugging)
 | 
				
			||||||
						debugging)
 | 
					DMA_TO_DEVICE		data is going from the memory to the device
 | 
				
			||||||
DMA_TO_DEVICE		= PCI_DMA_TODEVICE	data is going from the
 | 
					DMA_FROM_DEVICE		data is coming from the device to the memory
 | 
				
			||||||
						memory to the device
 | 
					DMA_BIDIRECTIONAL	direction isn't known
 | 
				
			||||||
DMA_FROM_DEVICE		= PCI_DMA_FROMDEVICE	data is coming from
 | 
					 | 
				
			||||||
						the device to the
 | 
					 | 
				
			||||||
						memory
 | 
					 | 
				
			||||||
DMA_BIDIRECTIONAL	= PCI_DMA_BIDIRECTIONAL	direction isn't known
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Notes:  Not all memory regions in a machine can be mapped by this
 | 
					Notes:  Not all memory regions in a machine can be mapped by this
 | 
				
			||||||
API.  Further, regions that appear to be physically contiguous in
 | 
					API.  Further, regions that appear to be physically contiguous in
 | 
				
			||||||
| 
						 | 
					@ -268,9 +244,6 @@ cache lines are updated with data that the device may have changed).
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 | 
					dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 | 
				
			||||||
		 enum dma_data_direction direction)
 | 
							 enum dma_data_direction direction)
 | 
				
			||||||
void
 | 
					 | 
				
			||||||
pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
 | 
					 | 
				
			||||||
		 size_t size, int direction)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Unmaps the region previously mapped.  All the parameters passed in
 | 
					Unmaps the region previously mapped.  All the parameters passed in
 | 
				
			||||||
must be identical to those passed in (and returned) by the mapping
 | 
					must be identical to those passed in (and returned) by the mapping
 | 
				
			||||||
| 
						 | 
					@ -280,15 +253,9 @@ dma_addr_t
 | 
				
			||||||
dma_map_page(struct device *dev, struct page *page,
 | 
					dma_map_page(struct device *dev, struct page *page,
 | 
				
			||||||
		    unsigned long offset, size_t size,
 | 
							    unsigned long offset, size_t size,
 | 
				
			||||||
		    enum dma_data_direction direction)
 | 
							    enum dma_data_direction direction)
 | 
				
			||||||
dma_addr_t
 | 
					 | 
				
			||||||
pci_map_page(struct pci_dev *hwdev, struct page *page,
 | 
					 | 
				
			||||||
		    unsigned long offset, size_t size, int direction)
 | 
					 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
 | 
					dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
 | 
				
			||||||
	       enum dma_data_direction direction)
 | 
						       enum dma_data_direction direction)
 | 
				
			||||||
void
 | 
					 | 
				
			||||||
pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
 | 
					 | 
				
			||||||
	       size_t size, int direction)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
API for mapping and unmapping for pages.  All the notes and warnings
 | 
					API for mapping and unmapping for pages.  All the notes and warnings
 | 
				
			||||||
for the other mapping APIs apply here.  Also, although the <offset>
 | 
					for the other mapping APIs apply here.  Also, although the <offset>
 | 
				
			||||||
| 
						 | 
					@ -299,9 +266,6 @@ cache width is.
 | 
				
			||||||
int
 | 
					int
 | 
				
			||||||
dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 | 
					dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					 | 
				
			||||||
pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In some circumstances dma_map_single and dma_map_page will fail to create
 | 
					In some circumstances dma_map_single and dma_map_page will fail to create
 | 
				
			||||||
a mapping. A driver can check for these errors by testing the returned
 | 
					a mapping. A driver can check for these errors by testing the returned
 | 
				
			||||||
dma address with dma_mapping_error(). A non-zero return value means the mapping
 | 
					dma address with dma_mapping_error(). A non-zero return value means the mapping
 | 
				
			||||||
| 
						 | 
					@ -311,9 +275,6 @@ reduce current DMA mapping usage or delay and try again later).
 | 
				
			||||||
	int
 | 
						int
 | 
				
			||||||
	dma_map_sg(struct device *dev, struct scatterlist *sg,
 | 
						dma_map_sg(struct device *dev, struct scatterlist *sg,
 | 
				
			||||||
		int nents, enum dma_data_direction direction)
 | 
							int nents, enum dma_data_direction direction)
 | 
				
			||||||
	int
 | 
					 | 
				
			||||||
	pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 | 
					 | 
				
			||||||
		int nents, int direction)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Returns: the number of physical segments mapped (this may be shorter
 | 
					Returns: the number of physical segments mapped (this may be shorter
 | 
				
			||||||
than <nents> passed in if some elements of the scatter/gather list are
 | 
					than <nents> passed in if some elements of the scatter/gather list are
 | 
				
			||||||
| 
						 | 
					@ -353,9 +314,6 @@ accessed sg->address and sg->length as shown above.
 | 
				
			||||||
	void
 | 
						void
 | 
				
			||||||
	dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 | 
						dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 | 
				
			||||||
		int nhwentries, enum dma_data_direction direction)
 | 
							int nhwentries, enum dma_data_direction direction)
 | 
				
			||||||
	void
 | 
					 | 
				
			||||||
	pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 | 
					 | 
				
			||||||
		int nents, int direction)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Unmap the previously mapped scatter/gather list.  All the parameters
 | 
					Unmap the previously mapped scatter/gather list.  All the parameters
 | 
				
			||||||
must be the same as those and passed in to the scatter/gather mapping
 | 
					must be the same as those and passed in to the scatter/gather mapping
 | 
				
			||||||
| 
						 | 
					@ -365,21 +323,23 @@ Note: <nents> must be the number you passed in, *not* the number of
 | 
				
			||||||
physical entries returned.
 | 
					physical entries returned.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
 | 
					dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
 | 
				
			||||||
			enum dma_data_direction direction)
 | 
								enum dma_data_direction direction)
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle,
 | 
					dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
 | 
				
			||||||
			   size_t size, int direction)
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
 | 
					 | 
				
			||||||
			   enum dma_data_direction direction)
 | 
								   enum dma_data_direction direction)
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg,
 | 
					dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
 | 
				
			||||||
		       int nelems, int direction)
 | 
							    enum dma_data_direction direction)
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 | 
				
			||||||
 | 
							       enum dma_data_direction direction)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Synchronise a single contiguous or scatter/gather mapping.  All the
 | 
					Synchronise a single contiguous or scatter/gather mapping for the cpu
 | 
				
			||||||
parameters must be the same as those passed into the single mapping
 | 
					and device. With the sync_sg API, all the parameters must be the same
 | 
				
			||||||
API.
 | 
					as those passed into the single mapping API. With the sync_single API,
 | 
				
			||||||
 | 
					you can use dma_handle and size parameters that aren't identical to
 | 
				
			||||||
 | 
					those passed into the single mapping API to do a partial sync.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Notes:  You must do this:
 | 
					Notes:  You must do this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -461,9 +421,9 @@ void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
 | 
				
			||||||
Part II - Advanced dma_ usage
 | 
					Part II - Advanced dma_ usage
 | 
				
			||||||
-----------------------------
 | 
					-----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Warning: These pieces of the DMA API have no PCI equivalent.  They
 | 
					Warning: These pieces of the DMA API should not be used in the
 | 
				
			||||||
should also not be used in the majority of cases, since they cater for
 | 
					majority of cases, since they cater for unlikely corner cases that
 | 
				
			||||||
unlikely corner cases that don't belong in usual drivers.
 | 
					don't belong in usual drivers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If you don't understand how cache line coherency works between a
 | 
					If you don't understand how cache line coherency works between a
 | 
				
			||||||
processor and an I/O device, you should not be using this part of the
 | 
					processor and an I/O device, you should not be using this part of the
 | 
				
			||||||
| 
						 | 
					@ -513,16 +473,6 @@ line, but it will guarantee that one or more cache lines fit exactly
 | 
				
			||||||
into the width returned by this call.  It will also always be a power
 | 
					into the width returned by this call.  It will also always be a power
 | 
				
			||||||
of two for easy alignment.
 | 
					of two for easy alignment.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void
 | 
					 | 
				
			||||||
dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
 | 
					 | 
				
			||||||
		      unsigned long offset, size_t size,
 | 
					 | 
				
			||||||
		      enum dma_data_direction direction)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Does a partial sync, starting at offset and continuing for size.  You
 | 
					 | 
				
			||||||
must be careful to observe the cache alignment and width when doing
 | 
					 | 
				
			||||||
anything like this.  You must also be extra careful about accessing
 | 
					 | 
				
			||||||
memory you intend to sync partially.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 | 
					dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 | 
				
			||||||
	       enum dma_data_direction direction)
 | 
						       enum dma_data_direction direction)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -488,7 +488,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 | 
				
			||||||
				The ECC bytes must be placed immidiately after the data
 | 
									The ECC bytes must be placed immidiately after the data
 | 
				
			||||||
				bytes in order to make the syndrome generator work. This
 | 
									bytes in order to make the syndrome generator work. This
 | 
				
			||||||
				is contrary to the usual layout used by software ECC. The
 | 
									is contrary to the usual layout used by software ECC. The
 | 
				
			||||||
				seperation of data and out of band area is not longer
 | 
									separation of data and out of band area is not longer
 | 
				
			||||||
				possible. The nand driver code handles this layout and
 | 
									possible. The nand driver code handles this layout and
 | 
				
			||||||
				the remaining free bytes in the oob area are managed by 
 | 
									the remaining free bytes in the oob area are managed by 
 | 
				
			||||||
				the autoplacement code. Provide a matching oob-layout
 | 
									the autoplacement code. Provide a matching oob-layout
 | 
				
			||||||
| 
						 | 
					@ -560,7 +560,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 | 
				
			||||||
				bad blocks. They have factory marked good blocks. The marker pattern
 | 
									bad blocks. They have factory marked good blocks. The marker pattern
 | 
				
			||||||
				is erased when the block is erased to be reused. So in case of
 | 
									is erased when the block is erased to be reused. So in case of
 | 
				
			||||||
				powerloss before writing the pattern back to the chip this block 
 | 
									powerloss before writing the pattern back to the chip this block 
 | 
				
			||||||
				would be lost and added to the bad blocks. Therefor we scan the 
 | 
									would be lost and added to the bad blocks. Therefore we scan the 
 | 
				
			||||||
				chip(s) when we detect them the first time for good blocks and 
 | 
									chip(s) when we detect them the first time for good blocks and 
 | 
				
			||||||
				store this information in a bad block table before erasing any 
 | 
									store this information in a bad block table before erasing any 
 | 
				
			||||||
				of the blocks.
 | 
									of the blocks.
 | 
				
			||||||
| 
						 | 
					@ -1094,7 +1094,7 @@ in this page</entry>
 | 
				
			||||||
		manufacturers specifications. This applies similar to the spare area. 
 | 
							manufacturers specifications. This applies similar to the spare area. 
 | 
				
			||||||
	</para>
 | 
						</para>
 | 
				
			||||||
	<para>
 | 
						<para>
 | 
				
			||||||
		Therefor NAND aware filesystems must either write in page size chunks
 | 
							Therefore NAND aware filesystems must either write in page size chunks
 | 
				
			||||||
		or hold a writebuffer to collect smaller writes until they sum up to 
 | 
							or hold a writebuffer to collect smaller writes until they sum up to 
 | 
				
			||||||
		pagesize. Available NAND aware filesystems: JFFS2, YAFFS. 		
 | 
							pagesize. Available NAND aware filesystems: JFFS2, YAFFS. 		
 | 
				
			||||||
	</para>
 | 
						</para>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1170,7 +1170,7 @@ frames per second. If less than this number of frames is to be
 | 
				
			||||||
captured or output, applications can request frame skipping or
 | 
					captured or output, applications can request frame skipping or
 | 
				
			||||||
duplicating on the driver side. This is especially useful when using
 | 
					duplicating on the driver side. This is especially useful when using
 | 
				
			||||||
the &func-read; or &func-write;, which are not augmented by timestamps
 | 
					the &func-read; or &func-write;, which are not augmented by timestamps
 | 
				
			||||||
or sequence counters, and to avoid unneccessary data copying.</para>
 | 
					or sequence counters, and to avoid unnecessary data copying.</para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <para>Finally these ioctls can be used to determine the number of
 | 
					    <para>Finally these ioctls can be used to determine the number of
 | 
				
			||||||
buffers used internally by a driver in read/write mode. For
 | 
					buffers used internally by a driver in read/write mode. For
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -55,7 +55,7 @@ captured or output, applications can request frame skipping or
 | 
				
			||||||
duplicating on the driver side. This is especially useful when using
 | 
					duplicating on the driver side. This is especially useful when using
 | 
				
			||||||
the <function>read()</function> or <function>write()</function>, which
 | 
					the <function>read()</function> or <function>write()</function>, which
 | 
				
			||||||
are not augmented by timestamps or sequence counters, and to avoid
 | 
					are not augmented by timestamps or sequence counters, and to avoid
 | 
				
			||||||
unneccessary data copying.</para>
 | 
					unnecessary data copying.</para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <para>Further these ioctls can be used to determine the number of
 | 
					    <para>Further these ioctls can be used to determine the number of
 | 
				
			||||||
buffers used internally by a driver in read/write mode. For
 | 
					buffers used internally by a driver in read/write mode. For
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -221,8 +221,8 @@ branches.  These different branches are:
 | 
				
			||||||
  - main 2.6.x kernel tree
 | 
					  - main 2.6.x kernel tree
 | 
				
			||||||
  - 2.6.x.y -stable kernel tree
 | 
					  - 2.6.x.y -stable kernel tree
 | 
				
			||||||
  - 2.6.x -git kernel patches
 | 
					  - 2.6.x -git kernel patches
 | 
				
			||||||
  - 2.6.x -mm kernel patches
 | 
					 | 
				
			||||||
  - subsystem specific kernel trees and patches
 | 
					  - subsystem specific kernel trees and patches
 | 
				
			||||||
 | 
					  - the 2.6.x -next kernel tree for integration tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2.6.x kernel tree
 | 
					2.6.x kernel tree
 | 
				
			||||||
-----------------
 | 
					-----------------
 | 
				
			||||||
| 
						 | 
					@ -232,7 +232,7 @@ process is as follows:
 | 
				
			||||||
  - As soon as a new kernel is released a two weeks window is open,
 | 
					  - As soon as a new kernel is released a two weeks window is open,
 | 
				
			||||||
    during this period of time maintainers can submit big diffs to
 | 
					    during this period of time maintainers can submit big diffs to
 | 
				
			||||||
    Linus, usually the patches that have already been included in the
 | 
					    Linus, usually the patches that have already been included in the
 | 
				
			||||||
    -mm kernel for a few weeks.  The preferred way to submit big changes
 | 
					    -next kernel for a few weeks.  The preferred way to submit big changes
 | 
				
			||||||
    is using git (the kernel's source management tool, more information
 | 
					    is using git (the kernel's source management tool, more information
 | 
				
			||||||
    can be found at http://git.or.cz/) but plain patches are also just
 | 
					    can be found at http://git.or.cz/) but plain patches are also just
 | 
				
			||||||
    fine.
 | 
					    fine.
 | 
				
			||||||
| 
						 | 
					@ -293,84 +293,43 @@ daily and represent the current state of Linus' tree.  They are more
 | 
				
			||||||
experimental than -rc kernels since they are generated automatically
 | 
					experimental than -rc kernels since they are generated automatically
 | 
				
			||||||
without even a cursory glance to see if they are sane.
 | 
					without even a cursory glance to see if they are sane.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2.6.x -mm kernel patches
 | 
					 | 
				
			||||||
------------------------
 | 
					 | 
				
			||||||
These are experimental kernel patches released by Andrew Morton.  Andrew
 | 
					 | 
				
			||||||
takes all of the different subsystem kernel trees and patches and mushes
 | 
					 | 
				
			||||||
them together, along with a lot of patches that have been plucked from
 | 
					 | 
				
			||||||
the linux-kernel mailing list.  This tree serves as a proving ground for
 | 
					 | 
				
			||||||
new features and patches.  Once a patch has proved its worth in -mm for
 | 
					 | 
				
			||||||
a while Andrew or the subsystem maintainer pushes it on to Linus for
 | 
					 | 
				
			||||||
inclusion in mainline.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It is heavily encouraged that all new patches get tested in the -mm tree
 | 
					 | 
				
			||||||
before they are sent to Linus for inclusion in the main kernel tree.  Code
 | 
					 | 
				
			||||||
which does not make an appearance in -mm before the opening of the merge
 | 
					 | 
				
			||||||
window will prove hard to merge into the mainline.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
These kernels are not appropriate for use on systems that are supposed
 | 
					 | 
				
			||||||
to be stable and they are more risky to run than any of the other
 | 
					 | 
				
			||||||
branches.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you wish to help out with the kernel development process, please test
 | 
					 | 
				
			||||||
and use these kernel releases and provide feedback to the linux-kernel
 | 
					 | 
				
			||||||
mailing list if you have any problems, and if everything works properly.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In addition to all the other experimental patches, these kernels usually
 | 
					 | 
				
			||||||
also contain any changes in the mainline -git kernels available at the
 | 
					 | 
				
			||||||
time of release.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The -mm kernels are not released on a fixed schedule, but usually a few
 | 
					 | 
				
			||||||
-mm kernels are released in between each -rc kernel (1 to 3 is common).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Subsystem Specific kernel trees and patches
 | 
					Subsystem Specific kernel trees and patches
 | 
				
			||||||
-------------------------------------------
 | 
					-------------------------------------------
 | 
				
			||||||
A number of the different kernel subsystem developers expose their
 | 
					The maintainers of the various kernel subsystems --- and also many
 | 
				
			||||||
development trees so that others can see what is happening in the
 | 
					kernel subsystem developers --- expose their current state of
 | 
				
			||||||
different areas of the kernel.  These trees are pulled into the -mm
 | 
					development in source repositories.  That way, others can see what is
 | 
				
			||||||
kernel releases as described above.
 | 
					happening in the different areas of the kernel.  In areas where
 | 
				
			||||||
 | 
					development is rapid, a developer may be asked to base his submissions
 | 
				
			||||||
 | 
					onto such a subsystem kernel tree so that conflicts between the
 | 
				
			||||||
 | 
					submission and other already ongoing work are avoided.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Here is a list of some of the different kernel trees available:
 | 
					Most of these repositories are git trees, but there are also other SCMs
 | 
				
			||||||
  git trees:
 | 
					in use, or patch queues being published as quilt series.  Addresses of
 | 
				
			||||||
    - Kbuild development tree, Sam Ravnborg <sam@ravnborg.org>
 | 
					these subsystem repositories are listed in the MAINTAINERS file.  Many
 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
 | 
					of them can be browsed at http://git.kernel.org/.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - ACPI development tree, Len Brown <len.brown@intel.com>
 | 
					Before a proposed patch is committed to such a subsystem tree, it is
 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
 | 
					subject to review which primarily happens on mailing lists (see the
 | 
				
			||||||
 | 
					respective section below).  For several kernel subsystems, this review
 | 
				
			||||||
 | 
					process is tracked with the tool patchwork.  Patchwork offers a web
 | 
				
			||||||
 | 
					interface which shows patch postings, any comments on a patch or
 | 
				
			||||||
 | 
					revisions to it, and maintainers can mark patches as under review,
 | 
				
			||||||
 | 
					accepted, or rejected.  Most of these patchwork sites are listed at
 | 
				
			||||||
 | 
					http://patchwork.kernel.org/ or http://patchwork.ozlabs.org/.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - Block development tree, Jens Axboe <jens.axboe@oracle.com>
 | 
					2.6.x -next kernel tree for integration tests
 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
 | 
					---------------------------------------------
 | 
				
			||||||
 | 
					Before updates from subsystem trees are merged into the mainline 2.6.x
 | 
				
			||||||
 | 
					tree, they need to be integration-tested.  For this purpose, a special
 | 
				
			||||||
 | 
					testing repository exists into which virtually all subsystem trees are
 | 
				
			||||||
 | 
					pulled on an almost daily basis:
 | 
				
			||||||
 | 
						http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
 | 
				
			||||||
 | 
						http://linux.f-seidel.de/linux-next/pmwiki/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - DRM development tree, Dave Airlie <airlied@linux.ie>
 | 
					This way, the -next kernel gives a summary outlook onto what will be
 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
 | 
					expected to go into the mainline kernel at the next merge period.
 | 
				
			||||||
 | 
					Adventurous testers are very welcome to runtime-test the -next kernel.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - ia64 development tree, Tony Luck <tony.luck@intel.com>
 | 
					 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    - infiniband, Roland Dreier <rolandd@cisco.com>
 | 
					 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    - libata, Jeff Garzik <jgarzik@pobox.com>
 | 
					 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    - network drivers, Jeff Garzik <jgarzik@pobox.com>
 | 
					 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
 | 
					 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    - SCSI, James Bottomley <James.Bottomley@hansenpartnership.com>
 | 
					 | 
				
			||||||
	git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    - x86, Ingo Molnar <mingo@elte.hu>
 | 
					 | 
				
			||||||
	git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  quilt trees:
 | 
					 | 
				
			||||||
    - USB, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
 | 
					 | 
				
			||||||
	kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Other kernel trees can be found listed at http://git.kernel.org/ and in
 | 
					 | 
				
			||||||
  the MAINTAINERS file.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Bug Reporting
 | 
					Bug Reporting
 | 
				
			||||||
-------------
 | 
					-------------
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -365,6 +365,7 @@ You can change this at module load time (for a module) with:
 | 
				
			||||||
       regshifts=<shift1>,<shift2>,...
 | 
					       regshifts=<shift1>,<shift2>,...
 | 
				
			||||||
       slave_addrs=<addr1>,<addr2>,...
 | 
					       slave_addrs=<addr1>,<addr2>,...
 | 
				
			||||||
       force_kipmid=<enable1>,<enable2>,...
 | 
					       force_kipmid=<enable1>,<enable2>,...
 | 
				
			||||||
 | 
					       kipmid_max_busy_us=<ustime1>,<ustime2>,...
 | 
				
			||||||
       unload_when_empty=[0|1]
 | 
					       unload_when_empty=[0|1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Each of these except si_trydefaults is a list, the first item for the
 | 
					Each of these except si_trydefaults is a list, the first item for the
 | 
				
			||||||
| 
						 | 
					@ -433,6 +434,7 @@ kernel command line as:
 | 
				
			||||||
       ipmi_si.regshifts=<shift1>,<shift2>,...
 | 
					       ipmi_si.regshifts=<shift1>,<shift2>,...
 | 
				
			||||||
       ipmi_si.slave_addrs=<addr1>,<addr2>,...
 | 
					       ipmi_si.slave_addrs=<addr1>,<addr2>,...
 | 
				
			||||||
       ipmi_si.force_kipmid=<enable1>,<enable2>,...
 | 
					       ipmi_si.force_kipmid=<enable1>,<enable2>,...
 | 
				
			||||||
 | 
					       ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
It works the same as the module parameters of the same names.
 | 
					It works the same as the module parameters of the same names.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -450,6 +452,16 @@ force this thread on or off.  If you force it off and don't have
 | 
				
			||||||
interrupts, the driver will run VERY slowly.  Don't blame me,
 | 
					interrupts, the driver will run VERY slowly.  Don't blame me,
 | 
				
			||||||
these interfaces suck.
 | 
					these interfaces suck.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unfortunately, this thread can use a lot of CPU depending on the
 | 
				
			||||||
 | 
					interface's performance.  This can waste a lot of CPU and cause
 | 
				
			||||||
 | 
					various issues with detecting idle CPU and using extra power.  To
 | 
				
			||||||
 | 
					avoid this, the kipmid_max_busy_us sets the maximum amount of time, in
 | 
				
			||||||
 | 
					microseconds, that kipmid will spin before sleeping for a tick.  This
 | 
				
			||||||
 | 
					value sets a balance between performance and CPU waste and needs to be
 | 
				
			||||||
 | 
					tuned to your needs.  Maybe, someday, auto-tuning will be added, but
 | 
				
			||||||
 | 
					that's not a simple thing and even the auto-tuning would need to be
 | 
				
			||||||
 | 
					tuned to the user's desired performance.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The driver supports a hot add and remove of interfaces.  This way,
 | 
					The driver supports a hot add and remove of interfaces.  This way,
 | 
				
			||||||
interfaces can be added or removed after the kernel is up and running.
 | 
					interfaces can be added or removed after the kernel is up and running.
 | 
				
			||||||
This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
 | 
					This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,3 +1,3 @@
 | 
				
			||||||
obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
 | 
					obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
 | 
				
			||||||
	filesystems/configfs/ ia64/ networking/ \
 | 
						filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
 | 
				
			||||||
	pcmcia/ spi/ video4linux/ vm/ watchdog/src/
 | 
						pcmcia/ spi/ timers/ video4linux/ vm/ watchdog/src/
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,10 +9,14 @@ Documentation/SubmittingPatches and elsewhere regarding submitting Linux
 | 
				
			||||||
kernel patches.
 | 
					kernel patches.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1: Builds cleanly with applicable or modified CONFIG options =y, =m, and
 | 
					1: If you use a facility then #include the file that defines/declares
 | 
				
			||||||
 | 
					   that facility.  Don't depend on other header files pulling in ones
 | 
				
			||||||
 | 
					   that you use.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2: Builds cleanly with applicable or modified CONFIG options =y, =m, and
 | 
				
			||||||
   =n.  No gcc warnings/errors, no linker warnings/errors.
 | 
					   =n.  No gcc warnings/errors, no linker warnings/errors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2: Passes allnoconfig, allmodconfig
 | 
					2b: Passes allnoconfig, allmodconfig
 | 
				
			||||||
 | 
					
 | 
				
			||||||
3: Builds on multiple CPU architectures by using local cross-compile tools
 | 
					3: Builds on multiple CPU architectures by using local cross-compile tools
 | 
				
			||||||
   or some other build farm.
 | 
					   or some other build farm.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,8 +14,8 @@ Introduction
 | 
				
			||||||
 how the clocks are arranged. The first implementation used as single
 | 
					 how the clocks are arranged. The first implementation used as single
 | 
				
			||||||
 PLL to feed the ARM, memory and peripherals via a series of dividers
 | 
					 PLL to feed the ARM, memory and peripherals via a series of dividers
 | 
				
			||||||
 and muxes and this is the implementation that is documented here. A
 | 
					 and muxes and this is the implementation that is documented here. A
 | 
				
			||||||
 newer version where there is a seperate PLL and clock divider for the
 | 
					 newer version where there is a separate PLL and clock divider for the
 | 
				
			||||||
 ARM core is available as a seperate driver.
 | 
					 ARM core is available as a separate driver.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Layout
 | 
					Layout
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										86
									
								
								Documentation/arm/Samsung/Overview.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								Documentation/arm/Samsung/Overview.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,86 @@
 | 
				
			||||||
 | 
							Samsung ARM Linux Overview
 | 
				
			||||||
 | 
							==========================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Introduction
 | 
				
			||||||
 | 
					------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  The Samsung range of ARM SoCs spans many similar devices, from the initial
 | 
				
			||||||
 | 
					  ARM9 through to the newest ARM cores. This document shows an overview of
 | 
				
			||||||
 | 
					  the current kernel support, how to use it and where to find the code
 | 
				
			||||||
 | 
					  that supports this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  The currently supported SoCs are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list
 | 
				
			||||||
 | 
					  - S3C64XX: S3C6400 and S3C6410
 | 
				
			||||||
 | 
					  - S5PC6440
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  S5PC100 and S5PC110 support is currently being merged
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					S3C24XX Systems
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
 | 
				
			||||||
 | 
					  deals with the architecture and drivers specific to these devices.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information
 | 
				
			||||||
 | 
					  on the implementation details and specific support.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Configuration
 | 
				
			||||||
 | 
					-------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  A number of configurations are supplied, as there is no current way of
 | 
				
			||||||
 | 
					  unifying all the SoCs into one kernel.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  s5p6440_defconfig - S5P6440 specific default configuration
 | 
				
			||||||
 | 
					  s5pc100_defconfig - S5PC100 specific default configuration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Layout
 | 
				
			||||||
 | 
					------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  The directory layout is currently being restructured, and consists of
 | 
				
			||||||
 | 
					  several platform directories and then the machine specific directories
 | 
				
			||||||
 | 
					  of the CPUs being built for.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plat-samsung provides the base for all the implementations, and is the
 | 
				
			||||||
 | 
					  last in the line of include directories that are processed for the build
 | 
				
			||||||
 | 
					  specific information. It contains the base clock, GPIO and device definitions
 | 
				
			||||||
 | 
					  to get the system running.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plat-s3c is the s3c24xx/s3c64xx platform directory, although it is currently
 | 
				
			||||||
 | 
					  involved in other builds this will be phased out once the relevant code is
 | 
				
			||||||
 | 
					  moved elsewhere.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plat-s3c64xx is for the s3c64xx specific bits, see the S3C24XX docs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plat-s5p is for s5p specific builds, more to be added.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  [ to finish ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Port Contributors
 | 
				
			||||||
 | 
					-----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Ben Dooks (BJD)
 | 
				
			||||||
 | 
					  Vincent Sanders
 | 
				
			||||||
 | 
					  Herbert Potzl
 | 
				
			||||||
 | 
					  Arnaud Patard (RTP)
 | 
				
			||||||
 | 
					  Roc Wu
 | 
				
			||||||
 | 
					  Klaus Fetscher
 | 
				
			||||||
 | 
					  Dimitry Andric
 | 
				
			||||||
 | 
					  Shannon Holland
 | 
				
			||||||
 | 
					  Guillaume Gourat (NexVision)
 | 
				
			||||||
 | 
					  Christer Weinigel (wingel) (Acer N30)
 | 
				
			||||||
 | 
					  Lucas Correia Villa Real (S3C2400 port)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Document Author
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
 | 
				
			||||||
							
								
								
									
										167
									
								
								Documentation/arm/Samsung/clksrc-change-registers.awk
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										167
									
								
								Documentation/arm/Samsung/clksrc-change-registers.awk
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
					@ -0,0 +1,167 @@
 | 
				
			||||||
 | 
					#!/usr/bin/awk -f
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# Copyright 2010 Ben Dooks <ben-linux@fluff.org>
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# Released under GPLv2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# example usage
 | 
				
			||||||
 | 
					# ./clksrc-change-registers.awk arch/arm/plat-s5pc1xx/include/plat/regs-clock.h < src > dst
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function extract_value(s)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    eqat = index(s, "=")
 | 
				
			||||||
 | 
					    comat = index(s, ",")
 | 
				
			||||||
 | 
					    return substr(s, eqat+2, (comat-eqat)-2)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function remove_brackets(b)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    return substr(b, 2, length(b)-2)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function splitdefine(l, p)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    r = split(l, tp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    p[0] = tp[2]
 | 
				
			||||||
 | 
					    p[1] = remove_brackets(tp[3])
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function find_length(f)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (0)
 | 
				
			||||||
 | 
						printf "find_length " f "\n" > "/dev/stderr"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (f ~ /0x1/)
 | 
				
			||||||
 | 
						return 1
 | 
				
			||||||
 | 
					    else if (f ~ /0x3/)
 | 
				
			||||||
 | 
						return 2
 | 
				
			||||||
 | 
					    else if (f ~ /0x7/)
 | 
				
			||||||
 | 
						return 3
 | 
				
			||||||
 | 
					    else if (f ~ /0xf/)
 | 
				
			||||||
 | 
						return 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    printf "unknown legnth " f "\n" > "/dev/stderr"
 | 
				
			||||||
 | 
					    exit
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function find_shift(s)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    id = index(s, "<")
 | 
				
			||||||
 | 
					    if (id <= 0) {
 | 
				
			||||||
 | 
						printf "cannot find shift " s "\n" > "/dev/stderr"
 | 
				
			||||||
 | 
						exit
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return substr(s, id+2)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BEGIN {
 | 
				
			||||||
 | 
					    if (ARGC < 2) {
 | 
				
			||||||
 | 
						print "too few arguments" > "/dev/stderr"
 | 
				
			||||||
 | 
						exit
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# read the header file and find the mask values that we will need
 | 
				
			||||||
 | 
					# to replace and create an associative array of values
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while (getline line < ARGV[1] > 0) {
 | 
				
			||||||
 | 
						if (line ~ /\#define.*_MASK/ &&
 | 
				
			||||||
 | 
						    !(line ~ /S5PC100_EPLL_MASK/) &&
 | 
				
			||||||
 | 
						    !(line ~ /USB_SIG_MASK/)) {
 | 
				
			||||||
 | 
						    splitdefine(line, fields)
 | 
				
			||||||
 | 
						    name = fields[0]
 | 
				
			||||||
 | 
						    if (0)
 | 
				
			||||||
 | 
							printf "MASK " line "\n" > "/dev/stderr"
 | 
				
			||||||
 | 
						    dmask[name,0] = find_length(fields[1])
 | 
				
			||||||
 | 
						    dmask[name,1] = find_shift(fields[1])
 | 
				
			||||||
 | 
						    if (0)
 | 
				
			||||||
 | 
							printf "=> '" name "' LENGTH=" dmask[name,0] " SHIFT=" dmask[name,1] "\n" > "/dev/stderr"
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    delete ARGV[1]
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/clksrc_clk.*=.*{/ {
 | 
				
			||||||
 | 
					    shift=""
 | 
				
			||||||
 | 
					    mask=""
 | 
				
			||||||
 | 
					    divshift=""
 | 
				
			||||||
 | 
					    reg_div=""
 | 
				
			||||||
 | 
					    reg_src=""
 | 
				
			||||||
 | 
					    indent=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print $0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(; indent >= 1;) {
 | 
				
			||||||
 | 
						if ((getline line) <= 0) {
 | 
				
			||||||
 | 
						    printf "unexpected end of file" > "/dev/stderr"
 | 
				
			||||||
 | 
						    exit 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (line ~ /\.shift/) {
 | 
				
			||||||
 | 
						    shift = extract_value(line)
 | 
				
			||||||
 | 
						} else if (line ~ /\.mask/) {
 | 
				
			||||||
 | 
						    mask = extract_value(line)
 | 
				
			||||||
 | 
						} else if (line ~ /\.reg_divider/) {
 | 
				
			||||||
 | 
						    reg_div = extract_value(line)
 | 
				
			||||||
 | 
						} else if (line ~ /\.reg_source/) {
 | 
				
			||||||
 | 
						    reg_src = extract_value(line)
 | 
				
			||||||
 | 
						} else if (line ~ /\.divider_shift/) {
 | 
				
			||||||
 | 
						    divshift = extract_value(line)
 | 
				
			||||||
 | 
						} else if (line ~ /{/) {
 | 
				
			||||||
 | 
							indent++
 | 
				
			||||||
 | 
							print line
 | 
				
			||||||
 | 
						    } else if (line ~ /}/) {
 | 
				
			||||||
 | 
						    indent--
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    if (indent == 0) {
 | 
				
			||||||
 | 
							if (0) {
 | 
				
			||||||
 | 
							    printf "shift '" shift   "' ='" dmask[shift,0] "'\n" > "/dev/stderr"
 | 
				
			||||||
 | 
							    printf "mask  '" mask    "'\n" > "/dev/stderr"
 | 
				
			||||||
 | 
							    printf "dshft '" divshift "'\n" > "/dev/stderr"
 | 
				
			||||||
 | 
							    printf "rdiv  '" reg_div "'\n" > "/dev/stderr"
 | 
				
			||||||
 | 
							    printf "rsrc  '" reg_src "'\n" > "/dev/stderr"
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							generated = mask
 | 
				
			||||||
 | 
							sub(reg_src, reg_div, generated)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (0) {
 | 
				
			||||||
 | 
							    printf "/* rsrc " reg_src " */\n"
 | 
				
			||||||
 | 
							    printf "/* rdiv " reg_div " */\n"
 | 
				
			||||||
 | 
							    printf "/* shift " shift " */\n"
 | 
				
			||||||
 | 
							    printf "/* mask " mask " */\n"
 | 
				
			||||||
 | 
							    printf "/* generated " generated " */\n"
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (reg_div != "") {
 | 
				
			||||||
 | 
							    printf "\t.reg_div = { "
 | 
				
			||||||
 | 
							    printf ".reg = " reg_div ", "
 | 
				
			||||||
 | 
							    printf ".shift = " dmask[generated,1] ", "
 | 
				
			||||||
 | 
							    printf ".size = " dmask[generated,0] ", "
 | 
				
			||||||
 | 
							    printf "},\n"
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							printf "\t.reg_src = { "
 | 
				
			||||||
 | 
							printf ".reg = " reg_src ", "
 | 
				
			||||||
 | 
							printf ".shift = " dmask[mask,1] ", "
 | 
				
			||||||
 | 
							printf ".size = " dmask[mask,0] ", "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							printf "},\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    print line
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
						    print line
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (0)
 | 
				
			||||||
 | 
						    printf indent ":" line "\n" > "/dev/stderr"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// && ! /clksrc_clk.*=.*{/ { print $0 }
 | 
				
			||||||
| 
						 | 
					@ -159,42 +159,7 @@ two arguments:  the CDROM device, and the slot number to which you wish
 | 
				
			||||||
to change.  If the slot number is -1, the drive is unloaded.
 | 
					to change.  If the slot number is -1, the drive is unloaded.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
4. Compilation options
 | 
					4. Common problems
 | 
				
			||||||
----------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There are a few additional options which can be set when compiling the
 | 
					 | 
				
			||||||
driver.  Most people should not need to mess with any of these; they
 | 
					 | 
				
			||||||
are listed here simply for completeness.  A compilation option can be
 | 
					 | 
				
			||||||
enabled by adding a line of the form `#define <option> 1' to the top
 | 
					 | 
				
			||||||
of ide-cd.c.  All these options are disabled by default.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERBOSE_IDE_CD_ERRORS
 | 
					 | 
				
			||||||
  If this is set, ATAPI error codes will be translated into textual
 | 
					 | 
				
			||||||
  descriptions.  In addition, a dump is made of the command which
 | 
					 | 
				
			||||||
  provoked the error.  This is off by default to save the memory used
 | 
					 | 
				
			||||||
  by the (somewhat long) table of error descriptions.  
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
STANDARD_ATAPI
 | 
					 | 
				
			||||||
  If this is set, the code needed to deal with certain drives which do
 | 
					 | 
				
			||||||
  not properly implement the ATAPI spec will be disabled.  If you know
 | 
					 | 
				
			||||||
  your drive implements ATAPI properly, you can turn this on to get a
 | 
					 | 
				
			||||||
  slightly smaller kernel.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
NO_DOOR_LOCKING
 | 
					 | 
				
			||||||
  If this is set, the driver will never attempt to lock the door of
 | 
					 | 
				
			||||||
  the drive.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CDROM_NBLOCKS_BUFFER
 | 
					 | 
				
			||||||
  This sets the size of the buffer to be used for a CDROMREADAUDIO
 | 
					 | 
				
			||||||
  ioctl.  The default is 8.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
TEST
 | 
					 | 
				
			||||||
  This currently enables an additional ioctl which enables a user-mode
 | 
					 | 
				
			||||||
  program to execute an arbitrary packet command.  See the source for
 | 
					 | 
				
			||||||
  details.  This should be left off unless you know what you're doing.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
5. Common problems
 | 
					 | 
				
			||||||
------------------
 | 
					------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This section discusses some common problems encountered when trying to
 | 
					This section discusses some common problems encountered when trying to
 | 
				
			||||||
| 
						 | 
					@ -371,7 +336,7 @@ f. Data corruption.
 | 
				
			||||||
    expense of low system performance.
 | 
					    expense of low system performance.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
6. cdchange.c
 | 
					5. cdchange.c
 | 
				
			||||||
-------------
 | 
					-------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										110
									
								
								Documentation/cgroups/cgroup_event_listener.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								Documentation/cgroups/cgroup_event_listener.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,110 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * cgroup_event_listener.c - Simple listener of cgroup events
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <assert.h>
 | 
				
			||||||
 | 
					#include <errno.h>
 | 
				
			||||||
 | 
					#include <fcntl.h>
 | 
				
			||||||
 | 
					#include <libgen.h>
 | 
				
			||||||
 | 
					#include <limits.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <sys/eventfd.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int efd = -1;
 | 
				
			||||||
 | 
						int cfd = -1;
 | 
				
			||||||
 | 
						int event_control = -1;
 | 
				
			||||||
 | 
						char event_control_path[PATH_MAX];
 | 
				
			||||||
 | 
						char line[LINE_MAX];
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (argc != 3) {
 | 
				
			||||||
 | 
							fputs(USAGE_STR, stderr);
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cfd = open(argv[1], O_RDONLY);
 | 
				
			||||||
 | 
						if (cfd == -1) {
 | 
				
			||||||
 | 
							fprintf(stderr, "Cannot open %s: %s\n", argv[1],
 | 
				
			||||||
 | 
									strerror(errno));
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
 | 
				
			||||||
 | 
								dirname(argv[1]));
 | 
				
			||||||
 | 
						if (ret >= PATH_MAX) {
 | 
				
			||||||
 | 
							fputs("Path to cgroup.event_control is too long\n", stderr);
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						event_control = open(event_control_path, O_WRONLY);
 | 
				
			||||||
 | 
						if (event_control == -1) {
 | 
				
			||||||
 | 
							fprintf(stderr, "Cannot open %s: %s\n", event_control_path,
 | 
				
			||||||
 | 
									strerror(errno));
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						efd = eventfd(0, 0);
 | 
				
			||||||
 | 
						if (efd == -1) {
 | 
				
			||||||
 | 
							perror("eventfd() failed");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
 | 
				
			||||||
 | 
						if (ret >= LINE_MAX) {
 | 
				
			||||||
 | 
							fputs("Arguments string is too long\n", stderr);
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = write(event_control, line, strlen(line) + 1);
 | 
				
			||||||
 | 
						if (ret == -1) {
 | 
				
			||||||
 | 
							perror("Cannot write to cgroup.event_control");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while (1) {
 | 
				
			||||||
 | 
							uint64_t result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = read(efd, &result, sizeof(result));
 | 
				
			||||||
 | 
							if (ret == -1) {
 | 
				
			||||||
 | 
								if (errno == EINTR)
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
 | 
								perror("Cannot read from eventfd");
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							assert(ret == sizeof(result));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = access(event_control_path, W_OK);
 | 
				
			||||||
 | 
							if ((ret == -1) && (errno == ENOENT)) {
 | 
				
			||||||
 | 
									puts("The cgroup seems to have removed.");
 | 
				
			||||||
 | 
									ret = 0;
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (ret == -1) {
 | 
				
			||||||
 | 
								perror("cgroup.event_control "
 | 
				
			||||||
 | 
										"is not accessable any more");
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							printf("%s %s: crossed\n", argv[1], argv[2]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						if (efd >= 0)
 | 
				
			||||||
 | 
							close(efd);
 | 
				
			||||||
 | 
						if (event_control >= 0)
 | 
				
			||||||
 | 
							close(event_control);
 | 
				
			||||||
 | 
						if (cfd >= 0)
 | 
				
			||||||
 | 
							close(cfd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return (ret != 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -22,6 +22,8 @@ CONTENTS:
 | 
				
			||||||
2. Usage Examples and Syntax
 | 
					2. Usage Examples and Syntax
 | 
				
			||||||
  2.1 Basic Usage
 | 
					  2.1 Basic Usage
 | 
				
			||||||
  2.2 Attaching processes
 | 
					  2.2 Attaching processes
 | 
				
			||||||
 | 
					  2.3 Mounting hierarchies by name
 | 
				
			||||||
 | 
					  2.4 Notification API
 | 
				
			||||||
3. Kernel API
 | 
					3. Kernel API
 | 
				
			||||||
  3.1 Overview
 | 
					  3.1 Overview
 | 
				
			||||||
  3.2 Synchronization
 | 
					  3.2 Synchronization
 | 
				
			||||||
| 
						 | 
					@ -434,6 +436,25 @@ you give a subsystem a name.
 | 
				
			||||||
The name of the subsystem appears as part of the hierarchy description
 | 
					The name of the subsystem appears as part of the hierarchy description
 | 
				
			||||||
in /proc/mounts and /proc/<pid>/cgroups.
 | 
					in /proc/mounts and /proc/<pid>/cgroups.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2.4 Notification API
 | 
				
			||||||
 | 
					--------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					There is mechanism which allows to get notifications about changing
 | 
				
			||||||
 | 
					status of a cgroup.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To register new notification handler you need:
 | 
				
			||||||
 | 
					 - create a file descriptor for event notification using eventfd(2);
 | 
				
			||||||
 | 
					 - open a control file to be monitored (e.g. memory.usage_in_bytes);
 | 
				
			||||||
 | 
					 - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
 | 
				
			||||||
 | 
					   Interpretation of args is defined by control file implementation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					eventfd will be woken up by control file implementation or when the
 | 
				
			||||||
 | 
					cgroup is removed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To unregister notification handler just close eventfd.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NOTE: Support of notifications should be implemented for the control
 | 
				
			||||||
 | 
					file. See documentation for the subsystem.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
3. Kernel API
 | 
					3. Kernel API
 | 
				
			||||||
=============
 | 
					=============
 | 
				
			||||||
| 
						 | 
					@ -488,6 +509,11 @@ Each subsystem should:
 | 
				
			||||||
- add an entry in linux/cgroup_subsys.h
 | 
					- add an entry in linux/cgroup_subsys.h
 | 
				
			||||||
- define a cgroup_subsys object called <name>_subsys
 | 
					- define a cgroup_subsys object called <name>_subsys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If a subsystem can be compiled as a module, it should also have in its
 | 
				
			||||||
 | 
					module initcall a call to cgroup_load_subsys(), and in its exitcall a
 | 
				
			||||||
 | 
					call to cgroup_unload_subsys(). It should also set its_subsys.module =
 | 
				
			||||||
 | 
					THIS_MODULE in its .c file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Each subsystem may export the following methods. The only mandatory
 | 
					Each subsystem may export the following methods. The only mandatory
 | 
				
			||||||
methods are create/destroy. Any others that are null are presumed to
 | 
					methods are create/destroy. Any others that are null are presumed to
 | 
				
			||||||
be successful no-ops.
 | 
					be successful no-ops.
 | 
				
			||||||
| 
						 | 
					@ -536,10 +562,21 @@ returns an error, this will abort the attach operation.  If a NULL
 | 
				
			||||||
task is passed, then a successful result indicates that *any*
 | 
					task is passed, then a successful result indicates that *any*
 | 
				
			||||||
unspecified task can be moved into the cgroup. Note that this isn't
 | 
					unspecified task can be moved into the cgroup. Note that this isn't
 | 
				
			||||||
called on a fork. If this method returns 0 (success) then this should
 | 
					called on a fork. If this method returns 0 (success) then this should
 | 
				
			||||||
remain valid while the caller holds cgroup_mutex. If threadgroup is
 | 
					remain valid while the caller holds cgroup_mutex and it is ensured that either
 | 
				
			||||||
 | 
					attach() or cancel_attach() will be called in future. If threadgroup is
 | 
				
			||||||
true, then a successful result indicates that all threads in the given
 | 
					true, then a successful result indicates that all threads in the given
 | 
				
			||||||
thread's threadgroup can be moved together.
 | 
					thread's threadgroup can be moved together.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 | 
				
			||||||
 | 
						       struct task_struct *task, bool threadgroup)
 | 
				
			||||||
 | 
					(cgroup_mutex held by caller)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Called when a task attach operation has failed after can_attach() has succeeded.
 | 
				
			||||||
 | 
					A subsystem whose can_attach() has some side-effects should provide this
 | 
				
			||||||
 | 
					function, so that the subsytem can implement a rollback. If not, not necessary.
 | 
				
			||||||
 | 
					This will be called only about subsystems whose can_attach() operation have
 | 
				
			||||||
 | 
					succeeded.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 | 
					void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 | 
				
			||||||
	    struct cgroup *old_cgrp, struct task_struct *task,
 | 
						    struct cgroup *old_cgrp, struct task_struct *task,
 | 
				
			||||||
	    bool threadgroup)
 | 
						    bool threadgroup)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -168,20 +168,20 @@ Each cpuset is represented by a directory in the cgroup file system
 | 
				
			||||||
containing (on top of the standard cgroup files) the following
 | 
					containing (on top of the standard cgroup files) the following
 | 
				
			||||||
files describing that cpuset:
 | 
					files describing that cpuset:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 - cpus: list of CPUs in that cpuset
 | 
					 - cpuset.cpus: list of CPUs in that cpuset
 | 
				
			||||||
 - mems: list of Memory Nodes in that cpuset
 | 
					 - cpuset.mems: list of Memory Nodes in that cpuset
 | 
				
			||||||
 - memory_migrate flag: if set, move pages to cpusets nodes
 | 
					 - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
 | 
				
			||||||
 - cpu_exclusive flag: is cpu placement exclusive?
 | 
					 - cpuset.cpu_exclusive flag: is cpu placement exclusive?
 | 
				
			||||||
 - mem_exclusive flag: is memory placement exclusive?
 | 
					 - cpuset.mem_exclusive flag: is memory placement exclusive?
 | 
				
			||||||
 - mem_hardwall flag:  is memory allocation hardwalled
 | 
					 - cpuset.mem_hardwall flag:  is memory allocation hardwalled
 | 
				
			||||||
 - memory_pressure: measure of how much paging pressure in cpuset
 | 
					 - cpuset.memory_pressure: measure of how much paging pressure in cpuset
 | 
				
			||||||
 - memory_spread_page flag: if set, spread page cache evenly on allowed nodes
 | 
					 - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
 | 
				
			||||||
 - memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
 | 
					 - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
 | 
				
			||||||
 - sched_load_balance flag: if set, load balance within CPUs on that cpuset
 | 
					 - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
 | 
				
			||||||
 - sched_relax_domain_level: the searching range when migrating tasks
 | 
					 - cpuset.sched_relax_domain_level: the searching range when migrating tasks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
In addition, the root cpuset only has the following file:
 | 
					In addition, the root cpuset only has the following file:
 | 
				
			||||||
 - memory_pressure_enabled flag: compute memory_pressure?
 | 
					 - cpuset.memory_pressure_enabled flag: compute memory_pressure?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
New cpusets are created using the mkdir system call or shell
 | 
					New cpusets are created using the mkdir system call or shell
 | 
				
			||||||
command.  The properties of a cpuset, such as its flags, allowed
 | 
					command.  The properties of a cpuset, such as its flags, allowed
 | 
				
			||||||
| 
						 | 
					@ -229,7 +229,7 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than
 | 
				
			||||||
a direct ancestor or descendant, may share any of the same CPUs or
 | 
					a direct ancestor or descendant, may share any of the same CPUs or
 | 
				
			||||||
Memory Nodes.
 | 
					Memory Nodes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
 | 
					A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
 | 
				
			||||||
i.e. it restricts kernel allocations for page, buffer and other data
 | 
					i.e. it restricts kernel allocations for page, buffer and other data
 | 
				
			||||||
commonly shared by the kernel across multiple users.  All cpusets,
 | 
					commonly shared by the kernel across multiple users.  All cpusets,
 | 
				
			||||||
whether hardwalled or not, restrict allocations of memory for user
 | 
					whether hardwalled or not, restrict allocations of memory for user
 | 
				
			||||||
| 
						 | 
					@ -304,15 +304,15 @@ times 1000.
 | 
				
			||||||
---------------------------
 | 
					---------------------------
 | 
				
			||||||
There are two boolean flag files per cpuset that control where the
 | 
					There are two boolean flag files per cpuset that control where the
 | 
				
			||||||
kernel allocates pages for the file system buffers and related in
 | 
					kernel allocates pages for the file system buffers and related in
 | 
				
			||||||
kernel data structures.  They are called 'memory_spread_page' and
 | 
					kernel data structures.  They are called 'cpuset.memory_spread_page' and
 | 
				
			||||||
'memory_spread_slab'.
 | 
					'cpuset.memory_spread_slab'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If the per-cpuset boolean flag file 'memory_spread_page' is set, then
 | 
					If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
 | 
				
			||||||
the kernel will spread the file system buffers (page cache) evenly
 | 
					the kernel will spread the file system buffers (page cache) evenly
 | 
				
			||||||
over all the nodes that the faulting task is allowed to use, instead
 | 
					over all the nodes that the faulting task is allowed to use, instead
 | 
				
			||||||
of preferring to put those pages on the node where the task is running.
 | 
					of preferring to put those pages on the node where the task is running.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If the per-cpuset boolean flag file 'memory_spread_slab' is set,
 | 
					If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
 | 
				
			||||||
then the kernel will spread some file system related slab caches,
 | 
					then the kernel will spread some file system related slab caches,
 | 
				
			||||||
such as for inodes and dentries evenly over all the nodes that the
 | 
					such as for inodes and dentries evenly over all the nodes that the
 | 
				
			||||||
faulting task is allowed to use, instead of preferring to put those
 | 
					faulting task is allowed to use, instead of preferring to put those
 | 
				
			||||||
| 
						 | 
					@ -337,21 +337,21 @@ their containing tasks memory spread settings.  If memory spreading
 | 
				
			||||||
is turned off, then the currently specified NUMA mempolicy once again
 | 
					is turned off, then the currently specified NUMA mempolicy once again
 | 
				
			||||||
applies to memory page allocations.
 | 
					applies to memory page allocations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag
 | 
					Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
 | 
				
			||||||
files.  By default they contain "0", meaning that the feature is off
 | 
					files.  By default they contain "0", meaning that the feature is off
 | 
				
			||||||
for that cpuset.  If a "1" is written to that file, then that turns
 | 
					for that cpuset.  If a "1" is written to that file, then that turns
 | 
				
			||||||
the named feature on.
 | 
					the named feature on.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The implementation is simple.
 | 
					The implementation is simple.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Setting the flag 'memory_spread_page' turns on a per-process flag
 | 
					Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
 | 
				
			||||||
PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
 | 
					PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
 | 
				
			||||||
joins that cpuset.  The page allocation calls for the page cache
 | 
					joins that cpuset.  The page allocation calls for the page cache
 | 
				
			||||||
is modified to perform an inline check for this PF_SPREAD_PAGE task
 | 
					is modified to perform an inline check for this PF_SPREAD_PAGE task
 | 
				
			||||||
flag, and if set, a call to a new routine cpuset_mem_spread_node()
 | 
					flag, and if set, a call to a new routine cpuset_mem_spread_node()
 | 
				
			||||||
returns the node to prefer for the allocation.
 | 
					returns the node to prefer for the allocation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Similarly, setting 'memory_spread_slab' turns on the flag
 | 
					Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
 | 
				
			||||||
PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
 | 
					PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
 | 
				
			||||||
pages from the node returned by cpuset_mem_spread_node().
 | 
					pages from the node returned by cpuset_mem_spread_node().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -404,24 +404,24 @@ the following two situations:
 | 
				
			||||||
    system overhead on those CPUs, including avoiding task load
 | 
					    system overhead on those CPUs, including avoiding task load
 | 
				
			||||||
    balancing if that is not needed.
 | 
					    balancing if that is not needed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When the per-cpuset flag "sched_load_balance" is enabled (the default
 | 
					When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
 | 
				
			||||||
setting), it requests that all the CPUs in that cpusets allowed 'cpus'
 | 
					setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
 | 
				
			||||||
be contained in a single sched domain, ensuring that load balancing
 | 
					be contained in a single sched domain, ensuring that load balancing
 | 
				
			||||||
can move a task (not otherwised pinned, as by sched_setaffinity)
 | 
					can move a task (not otherwised pinned, as by sched_setaffinity)
 | 
				
			||||||
from any CPU in that cpuset to any other.
 | 
					from any CPU in that cpuset to any other.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When the per-cpuset flag "sched_load_balance" is disabled, then the
 | 
					When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
 | 
				
			||||||
scheduler will avoid load balancing across the CPUs in that cpuset,
 | 
					scheduler will avoid load balancing across the CPUs in that cpuset,
 | 
				
			||||||
--except-- in so far as is necessary because some overlapping cpuset
 | 
					--except-- in so far as is necessary because some overlapping cpuset
 | 
				
			||||||
has "sched_load_balance" enabled.
 | 
					has "sched_load_balance" enabled.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
So, for example, if the top cpuset has the flag "sched_load_balance"
 | 
					So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
 | 
				
			||||||
enabled, then the scheduler will have one sched domain covering all
 | 
					enabled, then the scheduler will have one sched domain covering all
 | 
				
			||||||
CPUs, and the setting of the "sched_load_balance" flag in any other
 | 
					CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
 | 
				
			||||||
cpusets won't matter, as we're already fully load balancing.
 | 
					cpusets won't matter, as we're already fully load balancing.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Therefore in the above two situations, the top cpuset flag
 | 
					Therefore in the above two situations, the top cpuset flag
 | 
				
			||||||
"sched_load_balance" should be disabled, and only some of the smaller,
 | 
					"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
 | 
				
			||||||
child cpusets have this flag enabled.
 | 
					child cpusets have this flag enabled.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When doing this, you don't usually want to leave any unpinned tasks in
 | 
					When doing this, you don't usually want to leave any unpinned tasks in
 | 
				
			||||||
| 
						 | 
					@ -433,7 +433,7 @@ scheduler might not consider the possibility of load balancing that
 | 
				
			||||||
task to that underused CPU.
 | 
					task to that underused CPU.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Of course, tasks pinned to a particular CPU can be left in a cpuset
 | 
					Of course, tasks pinned to a particular CPU can be left in a cpuset
 | 
				
			||||||
that disables "sched_load_balance" as those tasks aren't going anywhere
 | 
					that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
 | 
				
			||||||
else anyway.
 | 
					else anyway.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
There is an impedance mismatch here, between cpusets and sched domains.
 | 
					There is an impedance mismatch here, between cpusets and sched domains.
 | 
				
			||||||
| 
						 | 
					@ -443,19 +443,19 @@ overlap and each CPU is in at most one sched domain.
 | 
				
			||||||
It is necessary for sched domains to be flat because load balancing
 | 
					It is necessary for sched domains to be flat because load balancing
 | 
				
			||||||
across partially overlapping sets of CPUs would risk unstable dynamics
 | 
					across partially overlapping sets of CPUs would risk unstable dynamics
 | 
				
			||||||
that would be beyond our understanding.  So if each of two partially
 | 
					that would be beyond our understanding.  So if each of two partially
 | 
				
			||||||
overlapping cpusets enables the flag 'sched_load_balance', then we
 | 
					overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
 | 
				
			||||||
form a single sched domain that is a superset of both.  We won't move
 | 
					form a single sched domain that is a superset of both.  We won't move
 | 
				
			||||||
a task to a CPU outside it cpuset, but the scheduler load balancing
 | 
					a task to a CPU outside it cpuset, but the scheduler load balancing
 | 
				
			||||||
code might waste some compute cycles considering that possibility.
 | 
					code might waste some compute cycles considering that possibility.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This mismatch is why there is not a simple one-to-one relation
 | 
					This mismatch is why there is not a simple one-to-one relation
 | 
				
			||||||
between which cpusets have the flag "sched_load_balance" enabled,
 | 
					between which cpusets have the flag "cpuset.sched_load_balance" enabled,
 | 
				
			||||||
and the sched domain configuration.  If a cpuset enables the flag, it
 | 
					and the sched domain configuration.  If a cpuset enables the flag, it
 | 
				
			||||||
will get balancing across all its CPUs, but if it disables the flag,
 | 
					will get balancing across all its CPUs, but if it disables the flag,
 | 
				
			||||||
it will only be assured of no load balancing if no other overlapping
 | 
					it will only be assured of no load balancing if no other overlapping
 | 
				
			||||||
cpuset enables the flag.
 | 
					cpuset enables the flag.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If two cpusets have partially overlapping 'cpus' allowed, and only
 | 
					If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
 | 
				
			||||||
one of them has this flag enabled, then the other may find its
 | 
					one of them has this flag enabled, then the other may find its
 | 
				
			||||||
tasks only partially load balanced, just on the overlapping CPUs.
 | 
					tasks only partially load balanced, just on the overlapping CPUs.
 | 
				
			||||||
This is just the general case of the top_cpuset example given a few
 | 
					This is just the general case of the top_cpuset example given a few
 | 
				
			||||||
| 
						 | 
					@ -468,23 +468,23 @@ load balancing to the other CPUs.
 | 
				
			||||||
1.7.1 sched_load_balance implementation details.
 | 
					1.7.1 sched_load_balance implementation details.
 | 
				
			||||||
------------------------------------------------
 | 
					------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary
 | 
					The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
 | 
				
			||||||
to most cpuset flags.)  When enabled for a cpuset, the kernel will
 | 
					to most cpuset flags.)  When enabled for a cpuset, the kernel will
 | 
				
			||||||
ensure that it can load balance across all the CPUs in that cpuset
 | 
					ensure that it can load balance across all the CPUs in that cpuset
 | 
				
			||||||
(makes sure that all the CPUs in the cpus_allowed of that cpuset are
 | 
					(makes sure that all the CPUs in the cpus_allowed of that cpuset are
 | 
				
			||||||
in the same sched domain.)
 | 
					in the same sched domain.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If two overlapping cpusets both have 'sched_load_balance' enabled,
 | 
					If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
 | 
				
			||||||
then they will be (must be) both in the same sched domain.
 | 
					then they will be (must be) both in the same sched domain.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If, as is the default, the top cpuset has 'sched_load_balance' enabled,
 | 
					If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
 | 
				
			||||||
then by the above that means there is a single sched domain covering
 | 
					then by the above that means there is a single sched domain covering
 | 
				
			||||||
the whole system, regardless of any other cpuset settings.
 | 
					the whole system, regardless of any other cpuset settings.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The kernel commits to user space that it will avoid load balancing
 | 
					The kernel commits to user space that it will avoid load balancing
 | 
				
			||||||
where it can.  It will pick as fine a granularity partition of sched
 | 
					where it can.  It will pick as fine a granularity partition of sched
 | 
				
			||||||
domains as it can while still providing load balancing for any set
 | 
					domains as it can while still providing load balancing for any set
 | 
				
			||||||
of CPUs allowed to a cpuset having 'sched_load_balance' enabled.
 | 
					of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The internal kernel cpuset to scheduler interface passes from the
 | 
					The internal kernel cpuset to scheduler interface passes from the
 | 
				
			||||||
cpuset code to the scheduler code a partition of the load balanced
 | 
					cpuset code to the scheduler code a partition of the load balanced
 | 
				
			||||||
| 
						 | 
					@ -495,9 +495,9 @@ all the CPUs that must be load balanced.
 | 
				
			||||||
The cpuset code builds a new such partition and passes it to the
 | 
					The cpuset code builds a new such partition and passes it to the
 | 
				
			||||||
scheduler sched domain setup code, to have the sched domains rebuilt
 | 
					scheduler sched domain setup code, to have the sched domains rebuilt
 | 
				
			||||||
as necessary, whenever:
 | 
					as necessary, whenever:
 | 
				
			||||||
 - the 'sched_load_balance' flag of a cpuset with non-empty CPUs changes,
 | 
					 - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
 | 
				
			||||||
 - or CPUs come or go from a cpuset with this flag enabled,
 | 
					 - or CPUs come or go from a cpuset with this flag enabled,
 | 
				
			||||||
 - or 'sched_relax_domain_level' value of a cpuset with non-empty CPUs
 | 
					 - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
 | 
				
			||||||
   and with this flag enabled changes,
 | 
					   and with this flag enabled changes,
 | 
				
			||||||
 - or a cpuset with non-empty CPUs and with this flag enabled is removed,
 | 
					 - or a cpuset with non-empty CPUs and with this flag enabled is removed,
 | 
				
			||||||
 - or a cpu is offlined/onlined.
 | 
					 - or a cpu is offlined/onlined.
 | 
				
			||||||
| 
						 | 
					@ -542,7 +542,7 @@ As the result, task B on CPU X need to wait task A or wait load balance
 | 
				
			||||||
on the next tick.  For some applications in special situation, waiting
 | 
					on the next tick.  For some applications in special situation, waiting
 | 
				
			||||||
1 tick may be too long.
 | 
					1 tick may be too long.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The 'sched_relax_domain_level' file allows you to request changing
 | 
					The 'cpuset.sched_relax_domain_level' file allows you to request changing
 | 
				
			||||||
this searching range as you like.  This file takes int value which
 | 
					this searching range as you like.  This file takes int value which
 | 
				
			||||||
indicates size of searching range in levels ideally as follows,
 | 
					indicates size of searching range in levels ideally as follows,
 | 
				
			||||||
otherwise initial value -1 that indicates the cpuset has no request.
 | 
					otherwise initial value -1 that indicates the cpuset has no request.
 | 
				
			||||||
| 
						 | 
					@ -559,8 +559,8 @@ The system default is architecture dependent.  The system default
 | 
				
			||||||
can be changed using the relax_domain_level= boot parameter.
 | 
					can be changed using the relax_domain_level= boot parameter.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This file is per-cpuset and affect the sched domain where the cpuset
 | 
					This file is per-cpuset and affect the sched domain where the cpuset
 | 
				
			||||||
belongs to.  Therefore if the flag 'sched_load_balance' of a cpuset
 | 
					belongs to.  Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
 | 
				
			||||||
is disabled, then 'sched_relax_domain_level' have no effect since
 | 
					is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
 | 
				
			||||||
there is no sched domain belonging the cpuset.
 | 
					there is no sched domain belonging the cpuset.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If multiple cpusets are overlapping and hence they form a single sched
 | 
					If multiple cpusets are overlapping and hence they form a single sched
 | 
				
			||||||
| 
						 | 
					@ -607,9 +607,9 @@ from one cpuset to another, then the kernel will adjust the tasks
 | 
				
			||||||
memory placement, as above, the next time that the kernel attempts
 | 
					memory placement, as above, the next time that the kernel attempts
 | 
				
			||||||
to allocate a page of memory for that task.
 | 
					to allocate a page of memory for that task.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If a cpuset has its 'cpus' modified, then each task in that cpuset
 | 
					If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
 | 
				
			||||||
will have its allowed CPU placement changed immediately.  Similarly,
 | 
					will have its allowed CPU placement changed immediately.  Similarly,
 | 
				
			||||||
if a tasks pid is written to another cpusets 'tasks' file, then its
 | 
					if a tasks pid is written to another cpusets 'cpuset.tasks' file, then its
 | 
				
			||||||
allowed CPU placement is changed immediately.  If such a task had been
 | 
					allowed CPU placement is changed immediately.  If such a task had been
 | 
				
			||||||
bound to some subset of its cpuset using the sched_setaffinity() call,
 | 
					bound to some subset of its cpuset using the sched_setaffinity() call,
 | 
				
			||||||
the task will be allowed to run on any CPU allowed in its new cpuset,
 | 
					the task will be allowed to run on any CPU allowed in its new cpuset,
 | 
				
			||||||
| 
						 | 
					@ -622,8 +622,8 @@ and the processor placement is updated immediately.
 | 
				
			||||||
Normally, once a page is allocated (given a physical page
 | 
					Normally, once a page is allocated (given a physical page
 | 
				
			||||||
of main memory) then that page stays on whatever node it
 | 
					of main memory) then that page stays on whatever node it
 | 
				
			||||||
was allocated, so long as it remains allocated, even if the
 | 
					was allocated, so long as it remains allocated, even if the
 | 
				
			||||||
cpusets memory placement policy 'mems' subsequently changes.
 | 
					cpusets memory placement policy 'cpuset.mems' subsequently changes.
 | 
				
			||||||
If the cpuset flag file 'memory_migrate' is set true, then when
 | 
					If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
 | 
				
			||||||
tasks are attached to that cpuset, any pages that task had
 | 
					tasks are attached to that cpuset, any pages that task had
 | 
				
			||||||
allocated to it on nodes in its previous cpuset are migrated
 | 
					allocated to it on nodes in its previous cpuset are migrated
 | 
				
			||||||
to the tasks new cpuset. The relative placement of the page within
 | 
					to the tasks new cpuset. The relative placement of the page within
 | 
				
			||||||
| 
						 | 
					@ -631,12 +631,12 @@ the cpuset is preserved during these migration operations if possible.
 | 
				
			||||||
For example if the page was on the second valid node of the prior cpuset
 | 
					For example if the page was on the second valid node of the prior cpuset
 | 
				
			||||||
then the page will be placed on the second valid node of the new cpuset.
 | 
					then the page will be placed on the second valid node of the new cpuset.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Also if 'memory_migrate' is set true, then if that cpusets
 | 
					Also if 'cpuset.memory_migrate' is set true, then if that cpusets
 | 
				
			||||||
'mems' file is modified, pages allocated to tasks in that
 | 
					'cpuset.mems' file is modified, pages allocated to tasks in that
 | 
				
			||||||
cpuset, that were on nodes in the previous setting of 'mems',
 | 
					cpuset, that were on nodes in the previous setting of 'cpuset.mems',
 | 
				
			||||||
will be moved to nodes in the new setting of 'mems.'
 | 
					will be moved to nodes in the new setting of 'mems.'
 | 
				
			||||||
Pages that were not in the tasks prior cpuset, or in the cpusets
 | 
					Pages that were not in the tasks prior cpuset, or in the cpusets
 | 
				
			||||||
prior 'mems' setting, will not be moved.
 | 
					prior 'cpuset.mems' setting, will not be moved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
There is an exception to the above.  If hotplug functionality is used
 | 
					There is an exception to the above.  If hotplug functionality is used
 | 
				
			||||||
to remove all the CPUs that are currently assigned to a cpuset,
 | 
					to remove all the CPUs that are currently assigned to a cpuset,
 | 
				
			||||||
| 
						 | 
					@ -678,8 +678,8 @@ and then start a subshell 'sh' in that cpuset:
 | 
				
			||||||
  cd /dev/cpuset
 | 
					  cd /dev/cpuset
 | 
				
			||||||
  mkdir Charlie
 | 
					  mkdir Charlie
 | 
				
			||||||
  cd Charlie
 | 
					  cd Charlie
 | 
				
			||||||
  /bin/echo 2-3 > cpus
 | 
					  /bin/echo 2-3 > cpuset.cpus
 | 
				
			||||||
  /bin/echo 1 > mems
 | 
					  /bin/echo 1 > cpuset.mems
 | 
				
			||||||
  /bin/echo $$ > tasks
 | 
					  /bin/echo $$ > tasks
 | 
				
			||||||
  sh
 | 
					  sh
 | 
				
			||||||
  # The subshell 'sh' is now running in cpuset Charlie
 | 
					  # The subshell 'sh' is now running in cpuset Charlie
 | 
				
			||||||
| 
						 | 
					@ -725,10 +725,13 @@ Now you want to do something with this cpuset.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
In this directory you can find several files:
 | 
					In this directory you can find several files:
 | 
				
			||||||
# ls
 | 
					# ls
 | 
				
			||||||
cpu_exclusive  memory_migrate      mems                      tasks
 | 
					cpuset.cpu_exclusive       cpuset.memory_spread_slab
 | 
				
			||||||
cpus           memory_pressure     notify_on_release
 | 
					cpuset.cpus                cpuset.mems
 | 
				
			||||||
mem_exclusive  memory_spread_page  sched_load_balance
 | 
					cpuset.mem_exclusive       cpuset.sched_load_balance
 | 
				
			||||||
mem_hardwall   memory_spread_slab  sched_relax_domain_level
 | 
					cpuset.mem_hardwall        cpuset.sched_relax_domain_level
 | 
				
			||||||
 | 
					cpuset.memory_migrate      notify_on_release
 | 
				
			||||||
 | 
					cpuset.memory_pressure     tasks
 | 
				
			||||||
 | 
					cpuset.memory_spread_page
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Reading them will give you information about the state of this cpuset:
 | 
					Reading them will give you information about the state of this cpuset:
 | 
				
			||||||
the CPUs and Memory Nodes it can use, the processes that are using
 | 
					the CPUs and Memory Nodes it can use, the processes that are using
 | 
				
			||||||
| 
						 | 
					@ -736,13 +739,13 @@ it, its properties.  By writing to these files you can manipulate
 | 
				
			||||||
the cpuset.
 | 
					the cpuset.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Set some flags:
 | 
					Set some flags:
 | 
				
			||||||
# /bin/echo 1 > cpu_exclusive
 | 
					# /bin/echo 1 > cpuset.cpu_exclusive
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Add some cpus:
 | 
					Add some cpus:
 | 
				
			||||||
# /bin/echo 0-7 > cpus
 | 
					# /bin/echo 0-7 > cpuset.cpus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Add some mems:
 | 
					Add some mems:
 | 
				
			||||||
# /bin/echo 0-7 > mems
 | 
					# /bin/echo 0-7 > cpuset.mems
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Now attach your shell to this cpuset:
 | 
					Now attach your shell to this cpuset:
 | 
				
			||||||
# /bin/echo $$ > tasks
 | 
					# /bin/echo $$ > tasks
 | 
				
			||||||
| 
						 | 
					@ -774,28 +777,28 @@ echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
 | 
				
			||||||
This is the syntax to use when writing in the cpus or mems files
 | 
					This is the syntax to use when writing in the cpus or mems files
 | 
				
			||||||
in cpuset directories:
 | 
					in cpuset directories:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# /bin/echo 1-4 > cpus		-> set cpus list to cpus 1,2,3,4
 | 
					# /bin/echo 1-4 > cpuset.cpus		-> set cpus list to cpus 1,2,3,4
 | 
				
			||||||
# /bin/echo 1,2,3,4 > cpus	-> set cpus list to cpus 1,2,3,4
 | 
					# /bin/echo 1,2,3,4 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To add a CPU to a cpuset, write the new list of CPUs including the
 | 
					To add a CPU to a cpuset, write the new list of CPUs including the
 | 
				
			||||||
CPU to be added. To add 6 to the above cpuset:
 | 
					CPU to be added. To add 6 to the above cpuset:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# /bin/echo 1-4,6 > cpus	-> set cpus list to cpus 1,2,3,4,6
 | 
					# /bin/echo 1-4,6 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4,6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Similarly to remove a CPU from a cpuset, write the new list of CPUs
 | 
					Similarly to remove a CPU from a cpuset, write the new list of CPUs
 | 
				
			||||||
without the CPU to be removed.
 | 
					without the CPU to be removed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To remove all the CPUs:
 | 
					To remove all the CPUs:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# /bin/echo "" > cpus		-> clear cpus list
 | 
					# /bin/echo "" > cpuset.cpus		-> clear cpus list
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2.3 Setting flags
 | 
					2.3 Setting flags
 | 
				
			||||||
-----------------
 | 
					-----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The syntax is very simple:
 | 
					The syntax is very simple:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# /bin/echo 1 > cpu_exclusive 	-> set flag 'cpu_exclusive'
 | 
					# /bin/echo 1 > cpuset.cpu_exclusive 	-> set flag 'cpuset.cpu_exclusive'
 | 
				
			||||||
# /bin/echo 0 > cpu_exclusive 	-> unset flag 'cpu_exclusive'
 | 
					# /bin/echo 0 > cpuset.cpu_exclusive 	-> unset flag 'cpuset.cpu_exclusive'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2.4 Attaching processes
 | 
					2.4 Attaching processes
 | 
				
			||||||
-----------------------
 | 
					-----------------------
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
Memory Resource Controller(Memcg)  Implementation Memo.
 | 
					Memory Resource Controller(Memcg)  Implementation Memo.
 | 
				
			||||||
Last Updated: 2009/1/20
 | 
					Last Updated: 2010/2
 | 
				
			||||||
Base Kernel Version: based on 2.6.29-rc2.
 | 
					Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
 | 
					Because VM is getting complex (one of reasons is memcg...), memcg's behavior
 | 
				
			||||||
is complex. This is a document for memcg's internal behavior.
 | 
					is complex. This is a document for memcg's internal behavior.
 | 
				
			||||||
| 
						 | 
					@ -337,7 +337,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 | 
				
			||||||
	race and lock dependency with other cgroup subsystems.
 | 
						race and lock dependency with other cgroup subsystems.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	example)
 | 
						example)
 | 
				
			||||||
	# mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices
 | 
						# mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	and do task move, mkdir, rmdir etc...under this.
 | 
						and do task move, mkdir, rmdir etc...under this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -348,7 +348,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	For example, test like following is good.
 | 
						For example, test like following is good.
 | 
				
			||||||
	(Shell-A)
 | 
						(Shell-A)
 | 
				
			||||||
	# mount -t cgroup none /cgroup -t memory
 | 
						# mount -t cgroup none /cgroup -o memory
 | 
				
			||||||
	# mkdir /cgroup/test
 | 
						# mkdir /cgroup/test
 | 
				
			||||||
	# echo 40M > /cgroup/test/memory.limit_in_bytes
 | 
						# echo 40M > /cgroup/test/memory.limit_in_bytes
 | 
				
			||||||
	# echo 0 > /cgroup/test/tasks
 | 
						# echo 0 > /cgroup/test/tasks
 | 
				
			||||||
| 
						 | 
					@ -378,3 +378,42 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 | 
				
			||||||
	#echo 50M > memory.limit_in_bytes
 | 
						#echo 50M > memory.limit_in_bytes
 | 
				
			||||||
	#echo 50M > memory.memsw.limit_in_bytes
 | 
						#echo 50M > memory.memsw.limit_in_bytes
 | 
				
			||||||
	run 51M of malloc
 | 
						run 51M of malloc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 9.9 Move charges at task migration
 | 
				
			||||||
 | 
						Charges associated with a task can be moved along with task migration.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(Shell-A)
 | 
				
			||||||
 | 
						#mkdir /cgroup/A
 | 
				
			||||||
 | 
						#echo $$ >/cgroup/A/tasks
 | 
				
			||||||
 | 
						run some programs which uses some amount of memory in /cgroup/A.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(Shell-B)
 | 
				
			||||||
 | 
						#mkdir /cgroup/B
 | 
				
			||||||
 | 
						#echo 1 >/cgroup/B/memory.move_charge_at_immigrate
 | 
				
			||||||
 | 
						#echo "pid of the program running in group A" >/cgroup/B/tasks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						You can see charges have been moved by reading *.usage_in_bytes or
 | 
				
			||||||
 | 
						memory.stat of both A and B.
 | 
				
			||||||
 | 
						See 8.2 of Documentation/cgroups/memory.txt to see what value should be
 | 
				
			||||||
 | 
						written to move_charge_at_immigrate.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 9.10 Memory thresholds
 | 
				
			||||||
 | 
						Memory controler implements memory thresholds using cgroups notification
 | 
				
			||||||
 | 
						API. You can use Documentation/cgroups/cgroup_event_listener.c to test
 | 
				
			||||||
 | 
						it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(Shell-A) Create cgroup and run event listener
 | 
				
			||||||
 | 
						# mkdir /cgroup/A
 | 
				
			||||||
 | 
						# ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						(Shell-B) Add task to cgroup and try to allocate and free memory
 | 
				
			||||||
 | 
						# echo $$ >/cgroup/A/tasks
 | 
				
			||||||
 | 
						# a="$(dd if=/dev/zero bs=1M count=10)"
 | 
				
			||||||
 | 
						# a=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						You will see message from cgroup_event_listener every time you cross
 | 
				
			||||||
 | 
						the thresholds.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						It's good idea to test root cgroup as well.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -182,6 +182,8 @@ list.
 | 
				
			||||||
NOTE: Reclaim does not work for the root cgroup, since we cannot set any
 | 
					NOTE: Reclaim does not work for the root cgroup, since we cannot set any
 | 
				
			||||||
limits on the root cgroup.
 | 
					limits on the root cgroup.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note2: When panic_on_oom is set to "2", the whole system will panic.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2. Locking
 | 
					2. Locking
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The memory controller uses the following hierarchy
 | 
					The memory controller uses the following hierarchy
 | 
				
			||||||
| 
						 | 
					@ -262,10 +264,12 @@ some of the pages cached in the cgroup (page cache pages).
 | 
				
			||||||
4.2 Task migration
 | 
					4.2 Task migration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When a task migrates from one cgroup to another, it's charge is not
 | 
					When a task migrates from one cgroup to another, it's charge is not
 | 
				
			||||||
carried forward. The pages allocated from the original cgroup still
 | 
					carried forward by default. The pages allocated from the original cgroup still
 | 
				
			||||||
remain charged to it, the charge is dropped when the page is freed or
 | 
					remain charged to it, the charge is dropped when the page is freed or
 | 
				
			||||||
reclaimed.
 | 
					reclaimed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note: You can move charges of a task along with task migration. See 8.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
4.3 Removing a cgroup
 | 
					4.3 Removing a cgroup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
 | 
					A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
 | 
				
			||||||
| 
						 | 
					@ -336,7 +340,7 @@ Note:
 | 
				
			||||||
5.3 swappiness
 | 
					5.3 swappiness
 | 
				
			||||||
  Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 | 
					  Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Following cgroups' swapiness can't be changed.
 | 
					  Following cgroups' swappiness can't be changed.
 | 
				
			||||||
  - root cgroup (uses /proc/sys/vm/swappiness).
 | 
					  - root cgroup (uses /proc/sys/vm/swappiness).
 | 
				
			||||||
  - a cgroup which uses hierarchy and it has child cgroup.
 | 
					  - a cgroup which uses hierarchy and it has child cgroup.
 | 
				
			||||||
  - a cgroup which uses hierarchy and not the root of hierarchy.
 | 
					  - a cgroup which uses hierarchy and not the root of hierarchy.
 | 
				
			||||||
| 
						 | 
					@ -377,7 +381,8 @@ The feature can be disabled by
 | 
				
			||||||
NOTE1: Enabling/disabling will fail if the cgroup already has other
 | 
					NOTE1: Enabling/disabling will fail if the cgroup already has other
 | 
				
			||||||
cgroups created below it.
 | 
					cgroups created below it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NOTE2: This feature can be enabled/disabled per subtree.
 | 
					NOTE2: When panic_on_oom is set to "2", the whole system will panic in
 | 
				
			||||||
 | 
					case of an oom event in any cgroup.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
7. Soft limits
 | 
					7. Soft limits
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -414,7 +419,76 @@ NOTE1: Soft limits take effect over a long period of time, since they involve
 | 
				
			||||||
NOTE2: It is recommended to set the soft limit always below the hard limit,
 | 
					NOTE2: It is recommended to set the soft limit always below the hard limit,
 | 
				
			||||||
       otherwise the hard limit will take precedence.
 | 
					       otherwise the hard limit will take precedence.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
8. TODO
 | 
					8. Move charges at task migration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Users can move charges associated with a task along with task migration, that
 | 
				
			||||||
 | 
					is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
 | 
				
			||||||
 | 
					This feature is not supported in !CONFIG_MMU environments because of lack of
 | 
				
			||||||
 | 
					page tables.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					8.1 Interface
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This feature is disabled by default. It can be enabled(and disabled again) by
 | 
				
			||||||
 | 
					writing to memory.move_charge_at_immigrate of the destination cgroup.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you want to enable it:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# echo (some positive value) > memory.move_charge_at_immigrate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note: Each bits of move_charge_at_immigrate has its own meaning about what type
 | 
				
			||||||
 | 
					      of charges should be moved. See 8.2 for details.
 | 
				
			||||||
 | 
					Note: Charges are moved only when you move mm->owner, IOW, a leader of a thread
 | 
				
			||||||
 | 
					      group.
 | 
				
			||||||
 | 
					Note: If we cannot find enough space for the task in the destination cgroup, we
 | 
				
			||||||
 | 
					      try to make space by reclaiming memory. Task migration may fail if we
 | 
				
			||||||
 | 
					      cannot make enough space.
 | 
				
			||||||
 | 
					Note: It can take several seconds if you move charges in giga bytes order.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					And if you want disable it again:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# echo 0 > memory.move_charge_at_immigrate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					8.2 Type of charges which can be move
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Each bits of move_charge_at_immigrate has its own meaning about what type of
 | 
				
			||||||
 | 
					charges should be moved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bit | what type of charges would be moved ?
 | 
				
			||||||
 | 
					 -----+------------------------------------------------------------------------
 | 
				
			||||||
 | 
					   0  | A charge of an anonymous page(or swap of it) used by the target task.
 | 
				
			||||||
 | 
					      | Those pages and swaps must be used only by the target task. You must
 | 
				
			||||||
 | 
					      | enable Swap Extension(see 2.4) to enable move of swap charges.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note: Those pages and swaps must be charged to the old cgroup.
 | 
				
			||||||
 | 
					Note: More type of pages(e.g. file cache, shmem,) will be supported by other
 | 
				
			||||||
 | 
					      bits in future.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					8.3 TODO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Add support for other types of pages(e.g. file cache, shmem, etc.).
 | 
				
			||||||
 | 
					- Implement madvise(2) to let users decide the vma to be moved or not to be
 | 
				
			||||||
 | 
					  moved.
 | 
				
			||||||
 | 
					- All of moving charge operations are done under cgroup_mutex. It's not good
 | 
				
			||||||
 | 
					  behavior to hold the mutex too long, so we may need some trick.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					9. Memory thresholds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Memory controler implements memory thresholds using cgroups notification
 | 
				
			||||||
 | 
					API (see cgroups.txt). It allows to register multiple memory and memsw
 | 
				
			||||||
 | 
					thresholds and gets notifications when it crosses.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To register a threshold application need:
 | 
				
			||||||
 | 
					 - create an eventfd using eventfd(2);
 | 
				
			||||||
 | 
					 - open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
 | 
				
			||||||
 | 
					 - write string like "<event_fd> <memory.usage_in_bytes> <threshold>" to
 | 
				
			||||||
 | 
					   cgroup.event_control.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Application will be notified through eventfd when memory usage crosses
 | 
				
			||||||
 | 
					threshold in any direction.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It's applicable for root and non-root cgroup.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					10. TODO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1. Add support for accounting huge pages (as a separate controller)
 | 
					1. Add support for accounting huge pages (as a separate controller)
 | 
				
			||||||
2. Make per-cgroup scanner reclaim not-shared pages first
 | 
					2. Make per-cgroup scanner reclaim not-shared pages first
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										234
									
								
								Documentation/circular-buffers.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										234
									
								
								Documentation/circular-buffers.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,234 @@
 | 
				
			||||||
 | 
								       ================
 | 
				
			||||||
 | 
								       CIRCULAR BUFFERS
 | 
				
			||||||
 | 
								       ================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By: David Howells <dhowells@redhat.com>
 | 
				
			||||||
 | 
					    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Linux provides a number of features that can be used to implement circular
 | 
				
			||||||
 | 
					buffering.  There are two sets of such features:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (1) Convenience functions for determining information about power-of-2 sized
 | 
				
			||||||
 | 
					     buffers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (2) Memory barriers for when the producer and the consumer of objects in the
 | 
				
			||||||
 | 
					     buffer don't want to share a lock.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To use these facilities, as discussed below, there needs to be just one
 | 
				
			||||||
 | 
					producer and just one consumer.  It is possible to handle multiple producers by
 | 
				
			||||||
 | 
					serialising them, and to handle multiple consumers by serialising them.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Contents:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) What is a circular buffer?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Measuring power-of-2 buffers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Using memory barriers with circular buffers.
 | 
				
			||||||
 | 
					     - The producer.
 | 
				
			||||||
 | 
					     - The consumer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					==========================
 | 
				
			||||||
 | 
					WHAT IS A CIRCULAR BUFFER?
 | 
				
			||||||
 | 
					==========================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					First of all, what is a circular buffer?  A circular buffer is a buffer of
 | 
				
			||||||
 | 
					fixed, finite size into which there are two indices:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (1) A 'head' index - the point at which the producer inserts items into the
 | 
				
			||||||
 | 
					     buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (2) A 'tail' index - the point at which the consumer finds the next item in
 | 
				
			||||||
 | 
					     the buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Typically when the tail pointer is equal to the head pointer, the buffer is
 | 
				
			||||||
 | 
					empty; and the buffer is full when the head pointer is one less than the tail
 | 
				
			||||||
 | 
					pointer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The head index is incremented when items are added, and the tail index when
 | 
				
			||||||
 | 
					items are removed.  The tail index should never jump the head index, and both
 | 
				
			||||||
 | 
					indices should be wrapped to 0 when they reach the end of the buffer, thus
 | 
				
			||||||
 | 
					allowing an infinite amount of data to flow through the buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Typically, items will all be of the same unit size, but this isn't strictly
 | 
				
			||||||
 | 
					required to use the techniques below.  The indices can be increased by more
 | 
				
			||||||
 | 
					than 1 if multiple items or variable-sized items are to be included in the
 | 
				
			||||||
 | 
					buffer, provided that neither index overtakes the other.  The implementer must
 | 
				
			||||||
 | 
					be careful, however, as a region more than one unit in size may wrap the end of
 | 
				
			||||||
 | 
					the buffer and be broken into two segments.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					============================
 | 
				
			||||||
 | 
					MEASURING POWER-OF-2 BUFFERS
 | 
				
			||||||
 | 
					============================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Calculation of the occupancy or the remaining capacity of an arbitrarily sized
 | 
				
			||||||
 | 
					circular buffer would normally be a slow operation, requiring the use of a
 | 
				
			||||||
 | 
					modulus (divide) instruction.  However, if the buffer is of a power-of-2 size,
 | 
				
			||||||
 | 
					then a much quicker bitwise-AND instruction can be used instead.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Linux provides a set of macros for handling power-of-2 circular buffers.  These
 | 
				
			||||||
 | 
					can be made use of by:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						#include <linux/circ_buf.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The macros are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Measure the remaining capacity of a buffer:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						CIRC_SPACE(head_index, tail_index, buffer_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     This returns the amount of space left in the buffer[1] into which items
 | 
				
			||||||
 | 
					     can be inserted.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Measure the maximum consecutive immediate space in a buffer:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						CIRC_SPACE_TO_END(head_index, tail_index, buffer_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     This returns the amount of consecutive space left in the buffer[1] into
 | 
				
			||||||
 | 
					     which items can be immediately inserted without having to wrap back to the
 | 
				
			||||||
 | 
					     beginning of the buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Measure the occupancy of a buffer:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						CIRC_CNT(head_index, tail_index, buffer_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     This returns the number of items currently occupying a buffer[2].
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Measure the non-wrapping occupancy of a buffer:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						CIRC_CNT_TO_END(head_index, tail_index, buffer_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     This returns the number of consecutive items[2] that can be extracted from
 | 
				
			||||||
 | 
					     the buffer without having to wrap back to the beginning of the buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Each of these macros will nominally return a value between 0 and buffer_size-1,
 | 
				
			||||||
 | 
					however:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 [1] CIRC_SPACE*() are intended to be used in the producer.  To the producer
 | 
				
			||||||
 | 
					     they will return a lower bound as the producer controls the head index,
 | 
				
			||||||
 | 
					     but the consumer may still be depleting the buffer on another CPU and
 | 
				
			||||||
 | 
					     moving the tail index.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     To the consumer it will show an upper bound as the producer may be busy
 | 
				
			||||||
 | 
					     depleting the space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 [2] CIRC_CNT*() are intended to be used in the consumer.  To the consumer they
 | 
				
			||||||
 | 
					     will return a lower bound as the consumer controls the tail index, but the
 | 
				
			||||||
 | 
					     producer may still be filling the buffer on another CPU and moving the
 | 
				
			||||||
 | 
					     head index.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     To the producer it will show an upper bound as the consumer may be busy
 | 
				
			||||||
 | 
					     emptying the buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 [3] To a third party, the order in which the writes to the indices by the
 | 
				
			||||||
 | 
					     producer and consumer become visible cannot be guaranteed as they are
 | 
				
			||||||
 | 
					     independent and may be made on different CPUs - so the result in such a
 | 
				
			||||||
 | 
					     situation will merely be a guess, and may even be negative.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					===========================================
 | 
				
			||||||
 | 
					USING MEMORY BARRIERS WITH CIRCULAR BUFFERS
 | 
				
			||||||
 | 
					===========================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By using memory barriers in conjunction with circular buffers, you can avoid
 | 
				
			||||||
 | 
					the need to:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (1) use a single lock to govern access to both ends of the buffer, thus
 | 
				
			||||||
 | 
					     allowing the buffer to be filled and emptied at the same time; and
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (2) use atomic counter operations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					There are two sides to this: the producer that fills the buffer, and the
 | 
				
			||||||
 | 
					consumer that empties it.  Only one thing should be filling a buffer at any one
 | 
				
			||||||
 | 
					time, and only one thing should be emptying a buffer at any one time, but the
 | 
				
			||||||
 | 
					two sides can operate simultaneously.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THE PRODUCER
 | 
				
			||||||
 | 
					------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The producer will look something like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&producer_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						unsigned long head = buffer->head;
 | 
				
			||||||
 | 
						unsigned long tail = ACCESS_ONCE(buffer->tail);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (CIRC_SPACE(head, tail, buffer->size) >= 1) {
 | 
				
			||||||
 | 
							/* insert one item into the buffer */
 | 
				
			||||||
 | 
							struct item *item = buffer[head];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							produce_item(item);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							smp_wmb(); /* commit the item before incrementing the head */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							buffer->head = (head + 1) & (buffer->size - 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* wake_up() will make sure that the head is committed before
 | 
				
			||||||
 | 
							 * waking anyone up */
 | 
				
			||||||
 | 
							wake_up(consumer);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_unlock(&producer_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This will instruct the CPU that the contents of the new item must be written
 | 
				
			||||||
 | 
					before the head index makes it available to the consumer and then instructs the
 | 
				
			||||||
 | 
					CPU that the revised head index must be written before the consumer is woken.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note that wake_up() doesn't have to be the exact mechanism used, but whatever
 | 
				
			||||||
 | 
					is used must guarantee a (write) memory barrier between the update of the head
 | 
				
			||||||
 | 
					index and the change of state of the consumer, if a change of state occurs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					THE CONSUMER
 | 
				
			||||||
 | 
					------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The consumer will look something like this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&consumer_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						unsigned long head = ACCESS_ONCE(buffer->head);
 | 
				
			||||||
 | 
						unsigned long tail = buffer->tail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (CIRC_CNT(head, tail, buffer->size) >= 1) {
 | 
				
			||||||
 | 
							/* read index before reading contents at that index */
 | 
				
			||||||
 | 
							smp_read_barrier_depends();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* extract one item from the buffer */
 | 
				
			||||||
 | 
							struct item *item = buffer[tail];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							consume_item(item);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							smp_mb(); /* finish reading descriptor before incrementing tail */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							buffer->tail = (tail + 1) & (buffer->size - 1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_unlock(&consumer_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This will instruct the CPU to make sure the index is up to date before reading
 | 
				
			||||||
 | 
					the new item, and then it shall make sure the CPU has finished reading the item
 | 
				
			||||||
 | 
					before it writes the new tail pointer, which will erase the item.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note the use of ACCESS_ONCE() in both algorithms to read the opposition index.
 | 
				
			||||||
 | 
					This prevents the compiler from discarding and reloading its cached value -
 | 
				
			||||||
 | 
					which some compilers will do across smp_read_barrier_depends().  This isn't
 | 
				
			||||||
 | 
					strictly needed if you can be sure that the opposition index will _only_ be
 | 
				
			||||||
 | 
					used the once.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					===============
 | 
				
			||||||
 | 
					FURTHER READING
 | 
				
			||||||
 | 
					===============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See also Documentation/memory-barriers.txt for a description of Linux's memory
 | 
				
			||||||
 | 
					barrier facilities.
 | 
				
			||||||
| 
						 | 
					@ -74,7 +74,7 @@ driver takes over the consoles vacated by the driver. Binding, on the other
 | 
				
			||||||
hand, will bind the driver to the consoles that are currently occupied by a
 | 
					hand, will bind the driver to the consoles that are currently occupied by a
 | 
				
			||||||
system driver.
 | 
					system driver.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NOTE1: Binding and binding must be selected in Kconfig. It's under:
 | 
					NOTE1: Binding and unbinding must be selected in Kconfig. It's under:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Device Drivers -> Character devices -> Support for binding and unbinding
 | 
					Device Drivers -> Character devices -> Support for binding and unbinding
 | 
				
			||||||
console drivers
 | 
					console drivers
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										207
									
								
								Documentation/cpu-freq/pcc-cpufreq.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								Documentation/cpu-freq/pcc-cpufreq.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,207 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 *  pcc-cpufreq.txt - PCC interface documentation
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
 | 
				
			||||||
 | 
					 *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
 | 
				
			||||||
 | 
					 *      Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					 *  it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					 *  the Free Software Foundation; version 2 of the License.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  This program is distributed in the hope that it will be useful, but
 | 
				
			||||||
 | 
					 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
 | 
				
			||||||
 | 
					 *  INFRINGEMENT. See the GNU General Public License for more details.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 *  You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					 *  with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					 *  675 Mass Ave, Cambridge, MA 02139, USA.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								Processor Clocking Control Driver
 | 
				
			||||||
 | 
								---------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Contents:
 | 
				
			||||||
 | 
					---------
 | 
				
			||||||
 | 
					1.	Introduction
 | 
				
			||||||
 | 
					1.1	PCC interface
 | 
				
			||||||
 | 
					1.1.1   Get Average Frequency
 | 
				
			||||||
 | 
					1.1.2	Set Desired Frequency
 | 
				
			||||||
 | 
					1.2	Platforms affected
 | 
				
			||||||
 | 
					2.	Driver and /sys details
 | 
				
			||||||
 | 
					2.1	scaling_available_frequencies
 | 
				
			||||||
 | 
					2.2	cpuinfo_transition_latency
 | 
				
			||||||
 | 
					2.3	cpuinfo_cur_freq
 | 
				
			||||||
 | 
					2.4	related_cpus
 | 
				
			||||||
 | 
					3.	Caveats
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. Introduction:
 | 
				
			||||||
 | 
					----------------
 | 
				
			||||||
 | 
					Processor Clocking Control (PCC) is an interface between the platform
 | 
				
			||||||
 | 
					firmware and OSPM. It is a mechanism for coordinating processor
 | 
				
			||||||
 | 
					performance (ie: frequency) between the platform firmware and the OS.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC
 | 
				
			||||||
 | 
					interface.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					OS utilizes the PCC interface to inform platform firmware what frequency the
 | 
				
			||||||
 | 
					OS wants for a logical processor. The platform firmware attempts to achieve
 | 
				
			||||||
 | 
					the requested frequency. If the request for the target frequency could not be
 | 
				
			||||||
 | 
					satisfied by platform firmware, then it usually means that power budget
 | 
				
			||||||
 | 
					conditions are in place, and "power capping" is taking place.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.1 PCC interface:
 | 
				
			||||||
 | 
					------------------
 | 
				
			||||||
 | 
					The complete PCC specification is available here:
 | 
				
			||||||
 | 
					http://www.acpica.org/download/Processor-Clocking-Control-v1p0.pdf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PCC relies on a shared memory region that provides a channel for communication
 | 
				
			||||||
 | 
					between the OS and platform firmware. PCC also implements a "doorbell" that
 | 
				
			||||||
 | 
					is used by the OS to inform the platform firmware that a command has been
 | 
				
			||||||
 | 
					sent.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ACPI PCCH() method is used to discover the location of the PCC shared
 | 
				
			||||||
 | 
					memory region. The shared memory region header contains the "command" and
 | 
				
			||||||
 | 
					"status" interface. PCCH() also contains details on how to access the platform
 | 
				
			||||||
 | 
					doorbell.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The following commands are supported by the PCC interface:
 | 
				
			||||||
 | 
					* Get Average Frequency
 | 
				
			||||||
 | 
					* Set Desired Frequency
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ACPI PCCP() method is implemented for each logical processor and is
 | 
				
			||||||
 | 
					used to discover the offsets for the input and output buffers in the shared
 | 
				
			||||||
 | 
					memory region.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When PCC mode is enabled, the platform will not expose processor performance
 | 
				
			||||||
 | 
					or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore,
 | 
				
			||||||
 | 
					the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for
 | 
				
			||||||
 | 
					AMD) will not load.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					However, OSPM remains in control of policy. The governor (eg: "ondemand")
 | 
				
			||||||
 | 
					computes the required performance for each processor based on server workload.
 | 
				
			||||||
 | 
					The PCC driver fills in the command interface, and the input buffer and
 | 
				
			||||||
 | 
					communicates the request to the platform firmware. The platform firmware is
 | 
				
			||||||
 | 
					responsible for delivering the requested performance.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Each PCC command is "global" in scope and can affect all the logical CPUs in
 | 
				
			||||||
 | 
					the system. Therefore, PCC is capable of performing "group" updates. With PCC
 | 
				
			||||||
 | 
					the OS is capable of getting/setting the frequency of all the logical CPUs in
 | 
				
			||||||
 | 
					the system with a single call to the BIOS.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.1.1 Get Average Frequency:
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					This command is used by the OSPM to query the running frequency of the
 | 
				
			||||||
 | 
					processor since the last time this command was completed. The output buffer
 | 
				
			||||||
 | 
					indicates the average unhalted frequency of the logical processor expressed as
 | 
				
			||||||
 | 
					a percentage of the nominal (ie: maximum) CPU frequency. The output buffer
 | 
				
			||||||
 | 
					also signifies if the CPU frequency is limited by a power budget condition.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.1.2 Set Desired Frequency:
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					This command is used by the OSPM to communicate to the platform firmware the
 | 
				
			||||||
 | 
					desired frequency for a logical processor. The output buffer is currently
 | 
				
			||||||
 | 
					ignored by OSPM. The next invocation of "Get Average Frequency" will inform
 | 
				
			||||||
 | 
					OSPM if the desired frequency was achieved or not.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.2 Platforms affected:
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					The PCC driver will load on any system where the platform firmware:
 | 
				
			||||||
 | 
					* supports the PCC interface, and the associated PCCH() and PCCP() methods
 | 
				
			||||||
 | 
					* assumes responsibility for managing the hardware clocking controls in order
 | 
				
			||||||
 | 
					to deliver the requested processor performance
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Currently, certain HP ProLiant platforms implement the PCC interface. On those
 | 
				
			||||||
 | 
					platforms PCC is the "default" choice.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					However, it is possible to disable this interface via a BIOS setting. In
 | 
				
			||||||
 | 
					such an instance, as is also the case on platforms where the PCC interface
 | 
				
			||||||
 | 
					is not implemented, the PCC driver will fail to load silently.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. Driver and /sys details:
 | 
				
			||||||
 | 
					---------------------------
 | 
				
			||||||
 | 
					When the driver loads, it merely prints the lowest and the highest CPU
 | 
				
			||||||
 | 
					frequencies supported by the platform firmware.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The PCC driver loads with a message such as:
 | 
				
			||||||
 | 
					pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933
 | 
				
			||||||
 | 
					MHz
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This means that the OPSM can request the CPU to run at any frequency in
 | 
				
			||||||
 | 
					between the limits (1600 MHz, and 2933 MHz) specified in the message.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Internally, there is no need for the driver to convert the "target" frequency
 | 
				
			||||||
 | 
					to a corresponding P-state.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The VERSION number for the driver will be of the format v.xy.ab.
 | 
				
			||||||
 | 
					eg: 1.00.02
 | 
				
			||||||
 | 
					   ----- --
 | 
				
			||||||
 | 
					    |    |
 | 
				
			||||||
 | 
					    |    -- this will increase with bug fixes/enhancements to the driver
 | 
				
			||||||
 | 
					    |-- this is the version of the PCC specification the driver adheres to
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The following is a brief discussion on some of the fields exported via the
 | 
				
			||||||
 | 
					/sys filesystem and how their values are affected by the PCC driver:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2.1 scaling_available_frequencies:
 | 
				
			||||||
 | 
					----------------------------------
 | 
				
			||||||
 | 
					scaling_available_frequencies is not created in /sys. No intermediate
 | 
				
			||||||
 | 
					frequencies need to be listed because the BIOS will try to achieve any
 | 
				
			||||||
 | 
					frequency, within limits, requested by the governor. A frequency does not have
 | 
				
			||||||
 | 
					to be strictly associated with a P-state.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2.2 cpuinfo_transition_latency:
 | 
				
			||||||
 | 
					-------------------------------
 | 
				
			||||||
 | 
					The cpuinfo_transition_latency field is 0. The PCC specification does
 | 
				
			||||||
 | 
					not include a field to expose this value currently.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2.3 cpuinfo_cur_freq:
 | 
				
			||||||
 | 
					---------------------
 | 
				
			||||||
 | 
					A) Often cpuinfo_cur_freq will show a value different than what is declared
 | 
				
			||||||
 | 
					in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq.
 | 
				
			||||||
 | 
					This is due to "turbo boost" available on recent Intel processors. If certain
 | 
				
			||||||
 | 
					conditions are met the BIOS can achieve a slightly higher speed than requested
 | 
				
			||||||
 | 
					by OSPM. An example:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					scaling_cur_freq	: 2933000
 | 
				
			||||||
 | 
					cpuinfo_cur_freq	: 3196000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					B) There is a round-off error associated with the cpuinfo_cur_freq value.
 | 
				
			||||||
 | 
					Since the driver obtains the current frequency as a "percentage" (%) of the
 | 
				
			||||||
 | 
					nominal frequency from the BIOS, sometimes, the values displayed by
 | 
				
			||||||
 | 
					scaling_cur_freq and cpuinfo_cur_freq may not match. An example:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					scaling_cur_freq	: 1600000
 | 
				
			||||||
 | 
					cpuinfo_cur_freq	: 1583000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In this example, the nominal frequency is 2933 MHz. The driver obtains the
 | 
				
			||||||
 | 
					current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						54% of 2933 MHz = 1583 MHz
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Nominal frequency is the maximum frequency of the processor, and it usually
 | 
				
			||||||
 | 
					corresponds to the frequency of the P0 P-state.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2.4 related_cpus:
 | 
				
			||||||
 | 
					-----------------
 | 
				
			||||||
 | 
					The related_cpus field is identical to affected_cpus.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					affected_cpus	: 4
 | 
				
			||||||
 | 
					related_cpus	: 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Currently, the PCC driver does not evaluate _PSD. The platforms that support
 | 
				
			||||||
 | 
					PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination
 | 
				
			||||||
 | 
					to ensure that the same frequency is requested of all dependent CPUs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. Caveats:
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					The "cpufreq_stats" module in its present form cannot be loaded and
 | 
				
			||||||
 | 
					expected to work with the PCC driver. Since the "cpufreq_stats" module
 | 
				
			||||||
 | 
					provides information wrt each P-state, it is not applicable to the PCC driver.
 | 
				
			||||||
| 
						 | 
					@ -122,3 +122,47 @@ volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
 | 
				
			||||||
brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
 | 
					brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
 | 
				
			||||||
brw-------  1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
 | 
					brw-------  1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
 | 
				
			||||||
brw-------  1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
 | 
					brw-------  1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					How to determine when a merging is complete
 | 
				
			||||||
 | 
					===========================================
 | 
				
			||||||
 | 
					The snapshot-merge and snapshot status lines end with:
 | 
				
			||||||
 | 
					  <sectors_allocated>/<total_sectors> <metadata_sectors>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Both <sectors_allocated> and <total_sectors> include both data and metadata.
 | 
				
			||||||
 | 
					During merging, the number of sectors allocated gets smaller and
 | 
				
			||||||
 | 
					smaller.  Merging has finished when the number of sectors holding data
 | 
				
			||||||
 | 
					is zero, in other words <sectors_allocated> == <metadata_sectors>.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Here is a practical example (using a hybrid of lvm and dmsetup commands):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# lvs
 | 
				
			||||||
 | 
					  LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
 | 
				
			||||||
 | 
					  base    volumeGroup owi-a- 4.00g
 | 
				
			||||||
 | 
					  snap    volumeGroup swi-a- 1.00g base  18.97
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# dmsetup status volumeGroup-snap
 | 
				
			||||||
 | 
					0 8388608 snapshot 397896/2097152 1560
 | 
				
			||||||
 | 
					                                  ^^^^ metadata sectors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# lvconvert --merge -b volumeGroup/snap
 | 
				
			||||||
 | 
					  Merging of volume snap started.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# lvs volumeGroup/snap
 | 
				
			||||||
 | 
					  LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
 | 
				
			||||||
 | 
					  base    volumeGroup Owi-a- 4.00g          17.23
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# dmsetup status volumeGroup-base
 | 
				
			||||||
 | 
					0 8388608 snapshot-merge 281688/2097152 1104
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# dmsetup status volumeGroup-base
 | 
				
			||||||
 | 
					0 8388608 snapshot-merge 180480/2097152 712
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# dmsetup status volumeGroup-base
 | 
				
			||||||
 | 
					0 8388608 snapshot-merge 16/2097152 16
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Merging has finished.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# lvs
 | 
				
			||||||
 | 
					  LV      VG          Attr   LSize Origin  Snap%  Move Log Copy%  Convert
 | 
				
			||||||
 | 
					  base    volumeGroup owi-a- 4.00g
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -192,7 +192,7 @@ command line. This will execute all matching early_param() callbacks.
 | 
				
			||||||
User specified early platform devices will be registered at this point.
 | 
					User specified early platform devices will be registered at this point.
 | 
				
			||||||
For the early serial console case the user can specify port on the
 | 
					For the early serial console case the user can specify port on the
 | 
				
			||||||
kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
 | 
					kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
 | 
				
			||||||
the class string, "serial" is the name of the platfrom driver and
 | 
					the class string, "serial" is the name of the platform driver and
 | 
				
			||||||
0 is the platform device id. If the id is -1 then the dot and the
 | 
					0 is the platform device id. If the id is -1 then the dot and the
 | 
				
			||||||
id can be omitted.
 | 
					id can be omitted.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -171,7 +171,7 @@ device.
 | 
				
			||||||
virtual_root.force_probe :
 | 
					virtual_root.force_probe :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Force the probing code to probe EISA slots even when it cannot find an
 | 
					Force the probing code to probe EISA slots even when it cannot find an
 | 
				
			||||||
EISA compliant mainboard (nothing appears on slot 0). Defaultd to 0
 | 
					EISA compliant mainboard (nothing appears on slot 0). Defaults to 0
 | 
				
			||||||
(don't force), and set to 1 (force probing) when either
 | 
					(don't force), and set to 1 (force probing) when either
 | 
				
			||||||
CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
 | 
					CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -216,26 +216,14 @@ Works.  Use "Insert file..." or external editor.
 | 
				
			||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
					~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
				
			||||||
Gmail (Web GUI)
 | 
					Gmail (Web GUI)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If you just have to use Gmail to send patches, it CAN be made to work.  It
 | 
					Does not work for sending patches.
 | 
				
			||||||
requires a bit of external help, though.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
The first problem is that Gmail converts tabs to spaces.  This will
 | 
					Gmail web client converts tabs to spaces automatically.
 | 
				
			||||||
totally break your patches.  To prevent this, you have to use a different
 | 
					 | 
				
			||||||
editor.  There is a firefox extension called "ViewSourceWith"
 | 
					 | 
				
			||||||
(https://addons.mozilla.org/en-US/firefox/addon/394) which allows you to
 | 
					 | 
				
			||||||
edit any text box in the editor of your choice.  Configure it to launch
 | 
					 | 
				
			||||||
your favorite editor.  When you want to send a patch, use this technique.
 | 
					 | 
				
			||||||
Once you have crafted your messsage + patch, save and exit the editor,
 | 
					 | 
				
			||||||
which should reload the Gmail edit box.  GMAIL WILL PRESERVE THE TABS.
 | 
					 | 
				
			||||||
Hoorah.  Apparently you can cut-n-paste literal tabs, but Gmail will
 | 
					 | 
				
			||||||
convert those to spaces upon sending!
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
The second problem is that Gmail converts tabs to spaces on replies.  If
 | 
					At the same time it wraps lines every 78 chars with CRLF style line breaks
 | 
				
			||||||
you reply to a patch, don't expect to be able to apply it as a patch.
 | 
					although tab2space problem can be solved with external editor.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The last problem is that Gmail will base64-encode any message that has a
 | 
					Another problem is that Gmail will base64-encode any message that has a
 | 
				
			||||||
non-ASCII character.  That includes things like European names.  Be aware.
 | 
					non-ASCII character. That includes things like European names.
 | 
				
			||||||
 | 
					 | 
				
			||||||
Gmail is not convenient for lkml patches, but CAN be made to work.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                ###
 | 
					                                ###
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										38
									
								
								Documentation/fault-injection/provoke-crashes.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								Documentation/fault-injection/provoke-crashes.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,38 @@
 | 
				
			||||||
 | 
					The lkdtm module provides an interface to crash or injure the kernel at
 | 
				
			||||||
 | 
					predefined crashpoints to evaluate the reliability of crash dumps obtained
 | 
				
			||||||
 | 
					using different dumping solutions. The module uses KPROBEs to instrument
 | 
				
			||||||
 | 
					crashing points, but can also crash the kernel directly without KRPOBE
 | 
				
			||||||
 | 
					support.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can provide the way either through module arguments when inserting
 | 
				
			||||||
 | 
					the module, or through a debugfs interface.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Usage: insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
 | 
				
			||||||
 | 
									[cpoint_count={>0}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  recur_count : Recursion level for the stack overflow test. Default is 10.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  cpoint_name : Crash point where the kernel is to be crashed. It can be
 | 
				
			||||||
 | 
						 one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
 | 
				
			||||||
 | 
						 FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
 | 
				
			||||||
 | 
						 IDE_CORE_CP, DIRECT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  cpoint_type : Indicates the action to be taken on hitting the crash point.
 | 
				
			||||||
 | 
					     It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW,
 | 
				
			||||||
 | 
					     CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION,
 | 
				
			||||||
 | 
					     WRITE_AFTER_FREE,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  cpoint_count : Indicates the number of times the crash point is to be hit
 | 
				
			||||||
 | 
					    to trigger an action. The default is 10.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can also induce failures by mounting debugfs and writing the type to
 | 
				
			||||||
 | 
					<mountpoint>/provoke-crash/<crashpoint>. E.g.,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  mount -t debugfs debugfs /mnt
 | 
				
			||||||
 | 
					  echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A special file is `DIRECT' which will induce the crash directly without
 | 
				
			||||||
 | 
					KPROBE instrumentation. This mode is the only one available when the module
 | 
				
			||||||
 | 
					is built on a kernel without KPROBEs support.
 | 
				
			||||||
| 
						 | 
					@ -117,19 +117,25 @@ Who:	Mauro Carvalho Chehab <mchehab@infradead.org>
 | 
				
			||||||
---------------------------
 | 
					---------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
 | 
					What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
 | 
				
			||||||
When:	November 2005
 | 
					When:	2.6.35/2.6.36
 | 
				
			||||||
Files:	drivers/pcmcia/: pcmcia_ioctl.c
 | 
					Files:	drivers/pcmcia/: pcmcia_ioctl.c
 | 
				
			||||||
Why:	With the 16-bit PCMCIA subsystem now behaving (almost) like a
 | 
					Why:	With the 16-bit PCMCIA subsystem now behaving (almost) like a
 | 
				
			||||||
	normal hotpluggable bus, and with it using the default kernel
 | 
						normal hotpluggable bus, and with it using the default kernel
 | 
				
			||||||
	infrastructure (hotplug, driver core, sysfs) keeping the PCMCIA
 | 
						infrastructure (hotplug, driver core, sysfs) keeping the PCMCIA
 | 
				
			||||||
	control ioctl needed by cardmgr and cardctl from pcmcia-cs is
 | 
						control ioctl needed by cardmgr and cardctl from pcmcia-cs is
 | 
				
			||||||
	unnecessary, and makes further cleanups and integration of the
 | 
						unnecessary and potentially harmful (it does not provide for
 | 
				
			||||||
 | 
						proper locking), and makes further cleanups and integration of the
 | 
				
			||||||
	PCMCIA subsystem into the Linux kernel device driver model more
 | 
						PCMCIA subsystem into the Linux kernel device driver model more
 | 
				
			||||||
	difficult. The features provided by cardmgr and cardctl are either
 | 
						difficult. The features provided by cardmgr and cardctl are either
 | 
				
			||||||
	handled by the kernel itself now or are available in the new
 | 
						handled by the kernel itself now or are available in the new
 | 
				
			||||||
	pcmciautils package available at
 | 
						pcmciautils package available at
 | 
				
			||||||
	http://kernel.org/pub/linux/utils/kernel/pcmcia/
 | 
						http://kernel.org/pub/linux/utils/kernel/pcmcia/
 | 
				
			||||||
Who:	Dominik Brodowski <linux@brodo.de>
 | 
					
 | 
				
			||||||
 | 
						For all architectures except ARM, the associated config symbol
 | 
				
			||||||
 | 
						has been removed from kernel 2.6.34; for ARM, it will be likely
 | 
				
			||||||
 | 
						be removed from kernel 2.6.35. The actual code will then likely
 | 
				
			||||||
 | 
						be removed from kernel 2.6.36.
 | 
				
			||||||
 | 
					Who:	Dominik Brodowski <linux@dominikbrodowski.net>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---------------------------
 | 
					---------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -443,12 +449,6 @@ Who:	Alok N Kataria <akataria@vmware.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
----------------------------
 | 
					----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
What:	adt7473 hardware monitoring driver
 | 
					 | 
				
			||||||
When:	February 2010
 | 
					 | 
				
			||||||
Why:	Obsoleted by the adt7475 driver.
 | 
					 | 
				
			||||||
Who:	Jean Delvare <khali@linux-fr.org>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
---------------------------
 | 
					 | 
				
			||||||
What:	Support for lcd_switch and display_get in asus-laptop driver
 | 
					What:	Support for lcd_switch and display_get in asus-laptop driver
 | 
				
			||||||
When:	March 2010
 | 
					When:	March 2010
 | 
				
			||||||
Why:	These two features use non-standard interfaces. There are the
 | 
					Why:	These two features use non-standard interfaces. There are the
 | 
				
			||||||
| 
						 | 
					@ -550,3 +550,42 @@ Why:	udev fully replaces this special file system that only contains CAPI
 | 
				
			||||||
	NCCI TTY device nodes. User space (pppdcapiplugin) works without
 | 
						NCCI TTY device nodes. User space (pppdcapiplugin) works without
 | 
				
			||||||
	noticing the difference.
 | 
						noticing the difference.
 | 
				
			||||||
Who:	Jan Kiszka <jan.kiszka@web.de>
 | 
					Who:	Jan Kiszka <jan.kiszka@web.de>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					What:	KVM memory aliases support
 | 
				
			||||||
 | 
					When:	July 2010
 | 
				
			||||||
 | 
					Why:	Memory aliasing support is used for speeding up guest vga access
 | 
				
			||||||
 | 
						through the vga windows.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Modern userspace no longer uses this feature, so it's just bitrotted
 | 
				
			||||||
 | 
						code and can be removed with no impact.
 | 
				
			||||||
 | 
					Who:	Avi Kivity <avi@redhat.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					What:	KVM kernel-allocated memory slots
 | 
				
			||||||
 | 
					When:	July 2010
 | 
				
			||||||
 | 
					Why:	Since 2.6.25, kvm supports user-allocated memory slots, which are
 | 
				
			||||||
 | 
						much more flexible than kernel-allocated slots.  All current userspace
 | 
				
			||||||
 | 
						supports the newer interface and this code can be removed with no
 | 
				
			||||||
 | 
						impact.
 | 
				
			||||||
 | 
					Who:	Avi Kivity <avi@redhat.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					What:	KVM paravirt mmu host support
 | 
				
			||||||
 | 
					When:	January 2011
 | 
				
			||||||
 | 
					Why:	The paravirt mmu host support is slower than non-paravirt mmu, both
 | 
				
			||||||
 | 
						on newer and older hardware.  It is already not exposed to the guest,
 | 
				
			||||||
 | 
						and kept only for live migration purposes.
 | 
				
			||||||
 | 
					Who:	Avi Kivity <avi@redhat.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					What: 	"acpi=ht" boot option
 | 
				
			||||||
 | 
					When:	2.6.35
 | 
				
			||||||
 | 
					Why:	Useful in 2003, implementation is a hack.
 | 
				
			||||||
 | 
						Generally invoked by accident today.
 | 
				
			||||||
 | 
						Seen as doing more harm than good.
 | 
				
			||||||
 | 
					Who:	Len Brown <len.brown@intel.com>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,6 +16,8 @@ befs.txt
 | 
				
			||||||
	- information about the BeOS filesystem for Linux.
 | 
						- information about the BeOS filesystem for Linux.
 | 
				
			||||||
bfs.txt
 | 
					bfs.txt
 | 
				
			||||||
	- info for the SCO UnixWare Boot Filesystem (BFS).
 | 
						- info for the SCO UnixWare Boot Filesystem (BFS).
 | 
				
			||||||
 | 
					ceph.txt
 | 
				
			||||||
 | 
						- info for the Ceph Distributed File System
 | 
				
			||||||
cifs.txt
 | 
					cifs.txt
 | 
				
			||||||
	- description of the CIFS filesystem.
 | 
						- description of the CIFS filesystem.
 | 
				
			||||||
coda.txt
 | 
					coda.txt
 | 
				
			||||||
| 
						 | 
					@ -32,6 +34,8 @@ dlmfs.txt
 | 
				
			||||||
	- info on the userspace interface to the OCFS2 DLM.
 | 
						- info on the userspace interface to the OCFS2 DLM.
 | 
				
			||||||
dnotify.txt
 | 
					dnotify.txt
 | 
				
			||||||
	- info about directory notification in Linux.
 | 
						- info about directory notification in Linux.
 | 
				
			||||||
 | 
					dnotify_test.c
 | 
				
			||||||
 | 
						- example program for dnotify
 | 
				
			||||||
ecryptfs.txt
 | 
					ecryptfs.txt
 | 
				
			||||||
	- docs on eCryptfs: stacked cryptographic filesystem for Linux.
 | 
						- docs on eCryptfs: stacked cryptographic filesystem for Linux.
 | 
				
			||||||
exofs.txt
 | 
					exofs.txt
 | 
				
			||||||
| 
						 | 
					@ -62,6 +66,8 @@ jfs.txt
 | 
				
			||||||
	- info and mount options for the JFS filesystem.
 | 
						- info and mount options for the JFS filesystem.
 | 
				
			||||||
locks.txt
 | 
					locks.txt
 | 
				
			||||||
	- info on file locking implementations, flock() vs. fcntl(), etc.
 | 
						- info on file locking implementations, flock() vs. fcntl(), etc.
 | 
				
			||||||
 | 
					logfs.txt
 | 
				
			||||||
 | 
						- info on the LogFS flash filesystem.
 | 
				
			||||||
mandatory-locking.txt
 | 
					mandatory-locking.txt
 | 
				
			||||||
	- info on the Linux implementation of Sys V mandatory file locking.
 | 
						- info on the Linux implementation of Sys V mandatory file locking.
 | 
				
			||||||
ncpfs.txt
 | 
					ncpfs.txt
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -460,13 +460,6 @@ in sys_read() and friends.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
--------------------------- dquot_operations -------------------------------
 | 
					--------------------------- dquot_operations -------------------------------
 | 
				
			||||||
prototypes:
 | 
					prototypes:
 | 
				
			||||||
	int (*initialize) (struct inode *, int);
 | 
					 | 
				
			||||||
	int (*drop) (struct inode *);
 | 
					 | 
				
			||||||
	int (*alloc_space) (struct inode *, qsize_t, int);
 | 
					 | 
				
			||||||
	int (*alloc_inode) (const struct inode *, unsigned long);
 | 
					 | 
				
			||||||
	int (*free_space) (struct inode *, qsize_t);
 | 
					 | 
				
			||||||
	int (*free_inode) (const struct inode *, unsigned long);
 | 
					 | 
				
			||||||
	int (*transfer) (struct inode *, struct iattr *);
 | 
					 | 
				
			||||||
	int (*write_dquot) (struct dquot *);
 | 
						int (*write_dquot) (struct dquot *);
 | 
				
			||||||
	int (*acquire_dquot) (struct dquot *);
 | 
						int (*acquire_dquot) (struct dquot *);
 | 
				
			||||||
	int (*release_dquot) (struct dquot *);
 | 
						int (*release_dquot) (struct dquot *);
 | 
				
			||||||
| 
						 | 
					@ -479,13 +472,6 @@ a proper locking wrt the filesystem and call the generic quota operations.
 | 
				
			||||||
What filesystem should expect from the generic quota functions:
 | 
					What filesystem should expect from the generic quota functions:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		FS recursion	Held locks when called
 | 
							FS recursion	Held locks when called
 | 
				
			||||||
initialize:	yes		maybe dqonoff_sem
 | 
					 | 
				
			||||||
drop:		yes		-
 | 
					 | 
				
			||||||
alloc_space:	->mark_dirty()	-
 | 
					 | 
				
			||||||
alloc_inode:	->mark_dirty()	-
 | 
					 | 
				
			||||||
free_space:	->mark_dirty()	-
 | 
					 | 
				
			||||||
free_inode:	->mark_dirty()	-
 | 
					 | 
				
			||||||
transfer:	yes		-
 | 
					 | 
				
			||||||
write_dquot:	yes		dqonoff_sem or dqptr_sem
 | 
					write_dquot:	yes		dqonoff_sem or dqptr_sem
 | 
				
			||||||
acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 | 
					acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 | 
				
			||||||
release_dquot:	yes		dqonoff_sem or dqptr_sem
 | 
					release_dquot:	yes		dqonoff_sem or dqptr_sem
 | 
				
			||||||
| 
						 | 
					@ -495,10 +481,6 @@ write_info:	yes		dqonoff_sem
 | 
				
			||||||
FS recursion means calling ->quota_read() and ->quota_write() from superblock
 | 
					FS recursion means calling ->quota_read() and ->quota_write() from superblock
 | 
				
			||||||
operations.
 | 
					operations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called
 | 
					 | 
				
			||||||
only directly by the filesystem and do not call any fs functions only
 | 
					 | 
				
			||||||
the ->mark_dirty() operation.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
More details about quota locking can be found in fs/dquot.c.
 | 
					More details about quota locking can be found in fs/dquot.c.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
--------------------------- vm_operations_struct -----------------------------
 | 
					--------------------------- vm_operations_struct -----------------------------
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										8
									
								
								Documentation/filesystems/Makefile
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								Documentation/filesystems/Makefile
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,8 @@
 | 
				
			||||||
 | 
					# kbuild trick to avoid linker error. Can be omitted if a module is built.
 | 
				
			||||||
 | 
					obj- := dummy.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# List of programs to build
 | 
				
			||||||
 | 
					hostprogs-y := dnotify_test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Tell kbuild to always build the programs
 | 
				
			||||||
 | 
					always := $(hostprogs-y)
 | 
				
			||||||
							
								
								
									
										140
									
								
								Documentation/filesystems/ceph.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								Documentation/filesystems/ceph.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,140 @@
 | 
				
			||||||
 | 
					Ceph Distributed File System
 | 
				
			||||||
 | 
					============================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Ceph is a distributed network file system designed to provide good
 | 
				
			||||||
 | 
					performance, reliability, and scalability.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Basic features include:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 * POSIX semantics
 | 
				
			||||||
 | 
					 * Seamless scaling from 1 to many thousands of nodes
 | 
				
			||||||
 | 
					 * High availability and reliability.  No single point of failure.
 | 
				
			||||||
 | 
					 * N-way replication of data across storage nodes
 | 
				
			||||||
 | 
					 * Fast recovery from node failures
 | 
				
			||||||
 | 
					 * Automatic rebalancing of data on node addition/removal
 | 
				
			||||||
 | 
					 * Easy deployment: most FS components are userspace daemons
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Also,
 | 
				
			||||||
 | 
					 * Flexible snapshots (on any directory)
 | 
				
			||||||
 | 
					 * Recursive accounting (nested files, directories, bytes)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely
 | 
				
			||||||
 | 
					on symmetric access by all clients to shared block devices, Ceph
 | 
				
			||||||
 | 
					separates data and metadata management into independent server
 | 
				
			||||||
 | 
					clusters, similar to Lustre.  Unlike Lustre, however, metadata and
 | 
				
			||||||
 | 
					storage nodes run entirely as user space daemons.  Storage nodes
 | 
				
			||||||
 | 
					utilize btrfs to store data objects, leveraging its advanced features
 | 
				
			||||||
 | 
					(checksumming, metadata replication, etc.).  File data is striped
 | 
				
			||||||
 | 
					across storage nodes in large chunks to distribute workload and
 | 
				
			||||||
 | 
					facilitate high throughputs.  When storage nodes fail, data is
 | 
				
			||||||
 | 
					re-replicated in a distributed fashion by the storage nodes themselves
 | 
				
			||||||
 | 
					(with some minimal coordination from a cluster monitor), making the
 | 
				
			||||||
 | 
					system extremely efficient and scalable.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Metadata servers effectively form a large, consistent, distributed
 | 
				
			||||||
 | 
					in-memory cache above the file namespace that is extremely scalable,
 | 
				
			||||||
 | 
					dynamically redistributes metadata in response to workload changes,
 | 
				
			||||||
 | 
					and can tolerate arbitrary (well, non-Byzantine) node failures.  The
 | 
				
			||||||
 | 
					metadata server takes a somewhat unconventional approach to metadata
 | 
				
			||||||
 | 
					storage to significantly improve performance for common workloads.  In
 | 
				
			||||||
 | 
					particular, inodes with only a single link are embedded in
 | 
				
			||||||
 | 
					directories, allowing entire directories of dentries and inodes to be
 | 
				
			||||||
 | 
					loaded into its cache with a single I/O operation.  The contents of
 | 
				
			||||||
 | 
					extremely large directories can be fragmented and managed by
 | 
				
			||||||
 | 
					independent metadata servers, allowing scalable concurrent access.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The system offers automatic data rebalancing/migration when scaling
 | 
				
			||||||
 | 
					from a small cluster of just a few nodes to many hundreds, without
 | 
				
			||||||
 | 
					requiring an administrator carve the data set into static volumes or
 | 
				
			||||||
 | 
					go through the tedious process of migrating data between servers.
 | 
				
			||||||
 | 
					When the file system approaches full, new nodes can be easily added
 | 
				
			||||||
 | 
					and things will "just work."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Ceph includes flexible snapshot mechanism that allows a user to create
 | 
				
			||||||
 | 
					a snapshot on any subdirectory (and its nested contents) in the
 | 
				
			||||||
 | 
					system.  Snapshot creation and deletion are as simple as 'mkdir
 | 
				
			||||||
 | 
					.snap/foo' and 'rmdir .snap/foo'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Ceph also provides some recursive accounting on directories for nested
 | 
				
			||||||
 | 
					files and bytes.  That is, a 'getfattr -d foo' on any directory in the
 | 
				
			||||||
 | 
					system will reveal the total number of nested regular files and
 | 
				
			||||||
 | 
					subdirectories, and a summation of all nested file sizes.  This makes
 | 
				
			||||||
 | 
					the identification of large disk space consumers relatively quick, as
 | 
				
			||||||
 | 
					no 'du' or similar recursive scan of the file system is required.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Mount Syntax
 | 
				
			||||||
 | 
					============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The basic mount syntax is:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You only need to specify a single monitor, as the client will get the
 | 
				
			||||||
 | 
					full list when it connects.  (However, if the monitor you specify
 | 
				
			||||||
 | 
					happens to be down, the mount won't succeed.)  The port can be left
 | 
				
			||||||
 | 
					off if the monitor is using the default.  So if the monitor is at
 | 
				
			||||||
 | 
					1.2.3.4,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 # mount -t ceph 1.2.3.4:/ /mnt/ceph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					is sufficient.  If /sbin/mount.ceph is installed, a hostname can be
 | 
				
			||||||
 | 
					used instead of an IP address.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Mount Options
 | 
				
			||||||
 | 
					=============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ip=A.B.C.D[:N]
 | 
				
			||||||
 | 
						Specify the IP and/or port the client should bind to locally.
 | 
				
			||||||
 | 
						There is normally not much reason to do this.  If the IP is not
 | 
				
			||||||
 | 
						specified, the client's IP address is determined by looking at the
 | 
				
			||||||
 | 
						address it's connection to the monitor originates from.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  wsize=X
 | 
				
			||||||
 | 
						Specify the maximum write size in bytes.  By default there is no
 | 
				
			||||||
 | 
						maximum.  Ceph will normally size writes based on the file stripe
 | 
				
			||||||
 | 
						size.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  rsize=X
 | 
				
			||||||
 | 
						Specify the maximum readahead.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  mount_timeout=X
 | 
				
			||||||
 | 
						Specify the timeout value for mount (in seconds), in the case
 | 
				
			||||||
 | 
						of a non-responsive Ceph file system.  The default is 30
 | 
				
			||||||
 | 
						seconds.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  rbytes
 | 
				
			||||||
 | 
						When stat() is called on a directory, set st_size to 'rbytes',
 | 
				
			||||||
 | 
						the summation of file sizes over all files nested beneath that
 | 
				
			||||||
 | 
						directory.  This is the default.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  norbytes
 | 
				
			||||||
 | 
						When stat() is called on a directory, set st_size to the
 | 
				
			||||||
 | 
						number of entries in that directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  nocrc
 | 
				
			||||||
 | 
						Disable CRC32C calculation for data writes.  If set, the storage node
 | 
				
			||||||
 | 
						must rely on TCP's error correction to detect data corruption
 | 
				
			||||||
 | 
						in the data payload.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  noasyncreaddir
 | 
				
			||||||
 | 
						Disable client's use its local cache to satisfy	readdir
 | 
				
			||||||
 | 
						requests.  (This does not change correctness; the client uses
 | 
				
			||||||
 | 
						cached metadata only when a lease or capability ensures it is
 | 
				
			||||||
 | 
						valid.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					More Information
 | 
				
			||||||
 | 
					================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For more information on Ceph, see the home page at
 | 
				
			||||||
 | 
						http://ceph.newdream.net/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The Linux kernel client source tree is available at
 | 
				
			||||||
 | 
						git://ceph.newdream.net/git/ceph-client.git
 | 
				
			||||||
 | 
						git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					and the source for the full system is at
 | 
				
			||||||
 | 
						git://ceph.newdream.net/git/ceph.git
 | 
				
			||||||
| 
						 | 
					@ -62,38 +62,9 @@ disabled, fcntl(fd, F_NOTIFY, ...) will return -EINVAL.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Example
 | 
					Example
 | 
				
			||||||
-------
 | 
					-------
 | 
				
			||||||
 | 
					See Documentation/filesystems/dnotify_test.c for an example.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	#define _GNU_SOURCE	/* needed to get the defines */
 | 
					NOTE
 | 
				
			||||||
	#include <fcntl.h>	/* in glibc 2.2 this has the needed
 | 
					----
 | 
				
			||||||
					   values defined */
 | 
					Beginning with Linux 2.6.13, dnotify has been replaced by inotify.
 | 
				
			||||||
	#include <signal.h>
 | 
					See Documentation/filesystems/inotify.txt for more information on it.
 | 
				
			||||||
	#include <stdio.h>
 | 
					 | 
				
			||||||
	#include <unistd.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	static volatile int event_fd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	static void handler(int sig, siginfo_t *si, void *data)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		event_fd = si->si_fd;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	int main(void)
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		struct sigaction act;
 | 
					 | 
				
			||||||
		int fd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		act.sa_sigaction = handler;
 | 
					 | 
				
			||||||
		sigemptyset(&act.sa_mask);
 | 
					 | 
				
			||||||
		act.sa_flags = SA_SIGINFO;
 | 
					 | 
				
			||||||
		sigaction(SIGRTMIN + 1, &act, NULL);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		fd = open(".", O_RDONLY);
 | 
					 | 
				
			||||||
		fcntl(fd, F_SETSIG, SIGRTMIN + 1);
 | 
					 | 
				
			||||||
		fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
 | 
					 | 
				
			||||||
		/* we will now be notified if any of the files
 | 
					 | 
				
			||||||
		   in "." is modified or new files are created */
 | 
					 | 
				
			||||||
		while (1) {
 | 
					 | 
				
			||||||
			pause();
 | 
					 | 
				
			||||||
			printf("Got event on fd=%d\n", event_fd);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										34
									
								
								Documentation/filesystems/dnotify_test.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								Documentation/filesystems/dnotify_test.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,34 @@
 | 
				
			||||||
 | 
					#define _GNU_SOURCE	/* needed to get the defines */
 | 
				
			||||||
 | 
					#include <fcntl.h>	/* in glibc 2.2 this has the needed
 | 
				
			||||||
 | 
									   values defined */
 | 
				
			||||||
 | 
					#include <signal.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static volatile int event_fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void handler(int sig, siginfo_t *si, void *data)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						event_fd = si->si_fd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct sigaction act;
 | 
				
			||||||
 | 
						int fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						act.sa_sigaction = handler;
 | 
				
			||||||
 | 
						sigemptyset(&act.sa_mask);
 | 
				
			||||||
 | 
						act.sa_flags = SA_SIGINFO;
 | 
				
			||||||
 | 
						sigaction(SIGRTMIN + 1, &act, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fd = open(".", O_RDONLY);
 | 
				
			||||||
 | 
						fcntl(fd, F_SETSIG, SIGRTMIN + 1);
 | 
				
			||||||
 | 
						fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
 | 
				
			||||||
 | 
						/* we will now be notified if any of the files
 | 
				
			||||||
 | 
						   in "." is modified or new files are created */
 | 
				
			||||||
 | 
						while (1) {
 | 
				
			||||||
 | 
							pause();
 | 
				
			||||||
 | 
							printf("Got event on fd=%d\n", event_fd);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										241
									
								
								Documentation/filesystems/logfs.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										241
									
								
								Documentation/filesystems/logfs.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,241 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The LogFS Flash Filesystem
 | 
				
			||||||
 | 
					==========================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Specification
 | 
				
			||||||
 | 
					=============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Superblocks
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Two superblocks exist at the beginning and end of the filesystem.
 | 
				
			||||||
 | 
					Each superblock is 256 Bytes large, with another 3840 Bytes reserved
 | 
				
			||||||
 | 
					for future purposes, making a total of 4096 Bytes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Superblock locations may differ for MTD and block devices.  On MTD the
 | 
				
			||||||
 | 
					first non-bad block contains a superblock in the first 4096 Bytes and
 | 
				
			||||||
 | 
					the last non-bad block contains a superblock in the last 4096 Bytes.
 | 
				
			||||||
 | 
					On block devices, the first 4096 Bytes of the device contain the first
 | 
				
			||||||
 | 
					superblock and the last aligned 4096 Byte-block contains the second
 | 
				
			||||||
 | 
					superblock.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For the most part, the superblocks can be considered read-only.  They
 | 
				
			||||||
 | 
					are written only to correct errors detected within the superblocks,
 | 
				
			||||||
 | 
					move the journal and change the filesystem parameters through tunefs.
 | 
				
			||||||
 | 
					As a result, the superblock does not contain any fields that require
 | 
				
			||||||
 | 
					constant updates, like the amount of free space, etc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Segments
 | 
				
			||||||
 | 
					--------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The space in the device is split up into equal-sized segments.
 | 
				
			||||||
 | 
					Segments are the primary write unit of LogFS.  Within each segments,
 | 
				
			||||||
 | 
					writes happen from front (low addresses) to back (high addresses.  If
 | 
				
			||||||
 | 
					only a partial segment has been written, the segment number, the
 | 
				
			||||||
 | 
					current position within and optionally a write buffer are stored in
 | 
				
			||||||
 | 
					the journal.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Segments are erased as a whole.  Therefore Garbage Collection may be
 | 
				
			||||||
 | 
					required to completely free a segment before doing so.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Journal
 | 
				
			||||||
 | 
					--------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The journal contains all global information about the filesystem that
 | 
				
			||||||
 | 
					is subject to frequent change.  At mount time, it has to be scanned
 | 
				
			||||||
 | 
					for the most recent commit entry, which contains a list of pointers to
 | 
				
			||||||
 | 
					all currently valid entries.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Object Store
 | 
				
			||||||
 | 
					------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					All space except for the superblocks and journal is part of the object
 | 
				
			||||||
 | 
					store.  Each segment contains a segment header and a number of
 | 
				
			||||||
 | 
					objects, each consisting of the object header and the payload.
 | 
				
			||||||
 | 
					Objects are either inodes, directory entries (dentries), file data
 | 
				
			||||||
 | 
					blocks or indirect blocks.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Levels
 | 
				
			||||||
 | 
					------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Garbage collection (GC) may fail if all data is written
 | 
				
			||||||
 | 
					indiscriminately.  One requirement of GC is that data is seperated
 | 
				
			||||||
 | 
					roughly according to the distance between the tree root and the data.
 | 
				
			||||||
 | 
					Effectively that means all file data is on level 0, indirect blocks
 | 
				
			||||||
 | 
					are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks,
 | 
				
			||||||
 | 
					respectively.  Inode file data is on level 6 for the inodes and 7-11
 | 
				
			||||||
 | 
					for indirect blocks.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Each segment contains objects of a single level only.  As a result,
 | 
				
			||||||
 | 
					each level requires its own seperate segment to be open for writing.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Inode File
 | 
				
			||||||
 | 
					----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					All inodes are stored in a special file, the inode file.  Single
 | 
				
			||||||
 | 
					exception is the inode file's inode (master inode) which for obvious
 | 
				
			||||||
 | 
					reasons is stored in the journal instead.  Instead of data blocks, the
 | 
				
			||||||
 | 
					leaf nodes of the inode files are inodes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Aliases
 | 
				
			||||||
 | 
					-------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Writes in LogFS are done by means of a wandering tree.  A naïve
 | 
				
			||||||
 | 
					implementation would require that for each write or a block, all
 | 
				
			||||||
 | 
					parent blocks are written as well, since the block pointers have
 | 
				
			||||||
 | 
					changed.  Such an implementation would not be very efficient.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In LogFS, the block pointer changes are cached in the journal by means
 | 
				
			||||||
 | 
					of alias entries.  Each alias consists of its logical address - inode
 | 
				
			||||||
 | 
					number, block index, level and child number (index into block) - and
 | 
				
			||||||
 | 
					the changed data.  Any 8-byte word can be changes in this manner.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Currently aliases are used for block pointers, file size, file used
 | 
				
			||||||
 | 
					bytes and the height of an inodes indirect tree.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Segment Aliases
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Related to regular aliases, these are used to handle bad blocks.
 | 
				
			||||||
 | 
					Initially, bad blocks are handled by moving the affected segment
 | 
				
			||||||
 | 
					content to a spare segment and noting this move in the journal with a
 | 
				
			||||||
 | 
					segment alias, a simple (to, from) tupel.  GC will later empty this
 | 
				
			||||||
 | 
					segment and the alias can be removed again.  This is used on MTD only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Vim
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By cleverly predicting the life time of data, it is possible to
 | 
				
			||||||
 | 
					seperate long-living data from short-living data and thereby reduce
 | 
				
			||||||
 | 
					the GC overhead later.  Each type of distinc life expectency (vim) can
 | 
				
			||||||
 | 
					have a seperate segment open for writing.  Each (level, vim) tupel can
 | 
				
			||||||
 | 
					be open just once.  If an open segment with unknown vim is encountered
 | 
				
			||||||
 | 
					at mount time, it is closed and ignored henceforth.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Indirect Tree
 | 
				
			||||||
 | 
					-------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Inodes in LogFS are similar to FFS-style filesystems with direct and
 | 
				
			||||||
 | 
					indirect block pointers.  One difference is that LogFS uses a single
 | 
				
			||||||
 | 
					indirect pointer that can be either a 1x, 2x, etc. indirect pointer.
 | 
				
			||||||
 | 
					A height field in the inode defines the height of the indirect tree
 | 
				
			||||||
 | 
					and thereby the indirection of the pointer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Another difference is the addressing of indirect blocks.  In LogFS,
 | 
				
			||||||
 | 
					the first 16 pointers in the first indirect block are left empty,
 | 
				
			||||||
 | 
					corresponding to the 16 direct pointers in the inode.  In ext2 (maybe
 | 
				
			||||||
 | 
					others as well) the first pointer in the first indirect block
 | 
				
			||||||
 | 
					corresponds to logical block 12, skipping the 12 direct pointers.
 | 
				
			||||||
 | 
					So where ext2 is using arithmetic to better utilize space, LogFS keeps
 | 
				
			||||||
 | 
					arithmetic simple and uses compression to save space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Compression
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Both file data and metadata can be compressed.  Compression for file
 | 
				
			||||||
 | 
					data can be enabled with chattr +c and disabled with chattr -c.  Doing
 | 
				
			||||||
 | 
					so has no effect on existing data, but new data will be stored
 | 
				
			||||||
 | 
					accordingly.  New inodes will inherit the compression flag of the
 | 
				
			||||||
 | 
					parent directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Metadata is always compressed.  However, the space accounting ignores
 | 
				
			||||||
 | 
					this and charges for the uncompressed size.  Failing to do so could
 | 
				
			||||||
 | 
					result in GC failures when, after moving some data, indirect blocks
 | 
				
			||||||
 | 
					compress worse than previously.  Even on a 100% full medium, GC may
 | 
				
			||||||
 | 
					not consume any extra space, so the compression gains are lost space
 | 
				
			||||||
 | 
					to the user.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					However, they are not lost space to the filesystem internals.  By
 | 
				
			||||||
 | 
					cheating the user for those bytes, the filesystem gained some slack
 | 
				
			||||||
 | 
					space and GC will run less often and faster.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Garbage Collection and Wear Leveling
 | 
				
			||||||
 | 
					------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Garbage collection is invoked whenever the number of free segments
 | 
				
			||||||
 | 
					falls below a threshold.  The best (known) candidate is picked based
 | 
				
			||||||
 | 
					on the least amount of valid data contained in the segment.  All
 | 
				
			||||||
 | 
					remaining valid data is copied elsewhere, thereby invalidating it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The GC code also checks for aliases and writes then back if their
 | 
				
			||||||
 | 
					number gets too large.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Wear leveling is done by occasionally picking a suboptimal segment for
 | 
				
			||||||
 | 
					garbage collection.  If a stale segments erase count is significantly
 | 
				
			||||||
 | 
					lower than the active segments' erase counts, it will be picked.  Wear
 | 
				
			||||||
 | 
					leveling is rate limited, so it will never monopolize the device for
 | 
				
			||||||
 | 
					more than one segment worth at a time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Values for "occasionally", "significantly lower" are compile time
 | 
				
			||||||
 | 
					constants.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Hashed directories
 | 
				
			||||||
 | 
					------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To satisfy efficient lookup(), directory entries are hashed and
 | 
				
			||||||
 | 
					located based on the hash.  In order to both support large directories
 | 
				
			||||||
 | 
					and not be overly inefficient for small directories, several hash
 | 
				
			||||||
 | 
					tables of increasing size are used.  For each table, the hash value
 | 
				
			||||||
 | 
					modulo the table size gives the table index.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Tables sizes are chosen to limit the number of indirect blocks with a
 | 
				
			||||||
 | 
					fully populated table to 0, 1, 2 or 3 respectively.  So the first
 | 
				
			||||||
 | 
					table contains 16 entries, the second 512-16, etc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The last table is special in several ways.  First its size depends on
 | 
				
			||||||
 | 
					the effective 32bit limit on telldir/seekdir cookies.  Since logfs
 | 
				
			||||||
 | 
					uses the upper half of the address space for indirect blocks, the size
 | 
				
			||||||
 | 
					is limited to 2^31.  Secondly the table contains hash buckets with 16
 | 
				
			||||||
 | 
					entries each.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Using single-entry buckets would result in birthday "attacks".  At
 | 
				
			||||||
 | 
					just 2^16 used entries, hash collisions would be likely (P >= 0.5).
 | 
				
			||||||
 | 
					My math skills are insufficient to do the combinatorics for the 17x
 | 
				
			||||||
 | 
					collisions necessary to overflow a bucket, but testing showed that in
 | 
				
			||||||
 | 
					10,000 runs the lowest directory fill before a bucket overflow was
 | 
				
			||||||
 | 
					188,057,130 entries with an average of 315,149,915 entries.  So for
 | 
				
			||||||
 | 
					directory sizes of up to a million, bucket overflows should be
 | 
				
			||||||
 | 
					virtually impossible under normal circumstances.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					With carefully chosen filenames, it is obviously possible to cause an
 | 
				
			||||||
 | 
					overflow with just 21 entries (4 higher tables + 16 entries + 1).  So
 | 
				
			||||||
 | 
					there may be a security concern if a malicious user has write access
 | 
				
			||||||
 | 
					to a directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Open For Discussion
 | 
				
			||||||
 | 
					===================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Device Address Space
 | 
				
			||||||
 | 
					--------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A device address space is used for caching.  Both block devices and
 | 
				
			||||||
 | 
					MTD provide functions to either read a single page or write a segment.
 | 
				
			||||||
 | 
					Partial segments may be written for data integrity, but where possible
 | 
				
			||||||
 | 
					complete segments are written for performance on simple block device
 | 
				
			||||||
 | 
					flash media.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Meta Inodes
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Inodes are stored in the inode file, which is just a regular file for
 | 
				
			||||||
 | 
					most purposes.  At umount time, however, the inode file needs to
 | 
				
			||||||
 | 
					remain open until all dirty inodes are written.  So
 | 
				
			||||||
 | 
					generic_shutdown_super() may not close this inode, but shouldn't
 | 
				
			||||||
 | 
					complain about remaining inodes due to the inode file either.  Same
 | 
				
			||||||
 | 
					goes for mapping inode of the device address space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Currently logfs uses a hack that essentially copies part of fs/inode.c
 | 
				
			||||||
 | 
					code over.  A general solution would be preferred.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Indirect block mapping
 | 
				
			||||||
 | 
					----------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					With compression, the block device (or mapping inode) cannot be used
 | 
				
			||||||
 | 
					to cache indirect blocks.  Some other place is required.  Currently
 | 
				
			||||||
 | 
					logfs uses the top half of each inode's address space.  The low 8TB
 | 
				
			||||||
 | 
					(on 32bit) are filled with file data, the high 8TB are used for
 | 
				
			||||||
 | 
					indirect blocks.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					One problem is that 16TB files created on 64bit systems actually have
 | 
				
			||||||
 | 
					data in the top 8TB.  But files >16TB would cause problems anyway, so
 | 
				
			||||||
 | 
					only the limit has changed.
 | 
				
			||||||
| 
						 | 
					@ -17,8 +17,7 @@ kernels must turn 4.1 on or off *before* turning support for version 4
 | 
				
			||||||
on or off; rpc.nfsd does this correctly.)
 | 
					on or off; rpc.nfsd does this correctly.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
 | 
					The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
 | 
				
			||||||
on the latest NFSv4.1 Internet Draft:
 | 
					on RFC 5661.
 | 
				
			||||||
http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
From the many new features in NFSv4.1 the current implementation
 | 
					From the many new features in NFSv4.1 the current implementation
 | 
				
			||||||
focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
 | 
					focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
 | 
				
			||||||
| 
						 | 
					@ -44,7 +43,7 @@ interoperability problems with future clients.  Known issues:
 | 
				
			||||||
	  trunking, but this is a mandatory feature, and its use is
 | 
						  trunking, but this is a mandatory feature, and its use is
 | 
				
			||||||
	  recommended to clients in a number of places.  (E.g. to ensure
 | 
						  recommended to clients in a number of places.  (E.g. to ensure
 | 
				
			||||||
	  timely renewal in case an existing connection's retry timeouts
 | 
						  timely renewal in case an existing connection's retry timeouts
 | 
				
			||||||
	  have gotten too long; see section 8.3 of the draft.)
 | 
						  have gotten too long; see section 8.3 of the RFC.)
 | 
				
			||||||
	  Therefore, lack of this feature may cause future clients to
 | 
						  Therefore, lack of this feature may cause future clients to
 | 
				
			||||||
	  fail.
 | 
						  fail.
 | 
				
			||||||
	- Incomplete backchannel support: incomplete backchannel gss
 | 
						- Incomplete backchannel support: incomplete backchannel gss
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -164,6 +164,7 @@ read the file /proc/PID/status:
 | 
				
			||||||
  VmExe:        68 kB
 | 
					  VmExe:        68 kB
 | 
				
			||||||
  VmLib:      1412 kB
 | 
					  VmLib:      1412 kB
 | 
				
			||||||
  VmPTE:        20 kb
 | 
					  VmPTE:        20 kb
 | 
				
			||||||
 | 
					  VmSwap:        0 kB
 | 
				
			||||||
  Threads:        1
 | 
					  Threads:        1
 | 
				
			||||||
  SigQ:   0/28578
 | 
					  SigQ:   0/28578
 | 
				
			||||||
  SigPnd: 0000000000000000
 | 
					  SigPnd: 0000000000000000
 | 
				
			||||||
| 
						 | 
					@ -188,7 +189,13 @@ memory usage. Its seven fields are explained in Table 1-3.  The stat file
 | 
				
			||||||
contains details information about the process itself.  Its fields are
 | 
					contains details information about the process itself.  Its fields are
 | 
				
			||||||
explained in Table 1-4.
 | 
					explained in Table 1-4.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
 | 
					(for SMP CONFIG users)
 | 
				
			||||||
 | 
					For making accounting scalable, RSS related information are handled in
 | 
				
			||||||
 | 
					asynchronous manner and the vaule may not be very precise. To see a precise
 | 
				
			||||||
 | 
					snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
 | 
				
			||||||
 | 
					It's slow but very precise.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Table 1-2: Contents of the status files (as of 2.6.30-rc7)
 | 
				
			||||||
..............................................................................
 | 
					..............................................................................
 | 
				
			||||||
 Field                       Content
 | 
					 Field                       Content
 | 
				
			||||||
 Name                        filename of the executable
 | 
					 Name                        filename of the executable
 | 
				
			||||||
| 
						 | 
					@ -213,6 +220,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
 | 
				
			||||||
 VmExe                       size of text segment
 | 
					 VmExe                       size of text segment
 | 
				
			||||||
 VmLib                       size of shared library code
 | 
					 VmLib                       size of shared library code
 | 
				
			||||||
 VmPTE                       size of page table entries
 | 
					 VmPTE                       size of page table entries
 | 
				
			||||||
 | 
					 VmSwap                      size of swap usage (the number of referred swapents)
 | 
				
			||||||
 Threads                     number of threads
 | 
					 Threads                     number of threads
 | 
				
			||||||
 SigQ                        number of signals queued/max. number for queue
 | 
					 SigQ                        number of signals queued/max. number for queue
 | 
				
			||||||
 SigPnd                      bitmap of pending signals for the thread
 | 
					 SigPnd                      bitmap of pending signals for the thread
 | 
				
			||||||
| 
						 | 
					@ -430,6 +438,7 @@ Table 1-5: Kernel info in /proc
 | 
				
			||||||
 modules     List of loaded modules                            
 | 
					 modules     List of loaded modules                            
 | 
				
			||||||
 mounts      Mounted filesystems                               
 | 
					 mounts      Mounted filesystems                               
 | 
				
			||||||
 net         Networking info (see text)                        
 | 
					 net         Networking info (see text)                        
 | 
				
			||||||
 | 
					 pagetypeinfo Additional page allocator information (see text)  (2.5)
 | 
				
			||||||
 partitions  Table of partitions known to the system           
 | 
					 partitions  Table of partitions known to the system           
 | 
				
			||||||
 pci	     Deprecated info of PCI bus (new way -> /proc/bus/pci/,
 | 
					 pci	     Deprecated info of PCI bus (new way -> /proc/bus/pci/,
 | 
				
			||||||
             decoupled by lspci					(2.4)
 | 
					             decoupled by lspci					(2.4)
 | 
				
			||||||
| 
						 | 
					@ -584,7 +593,7 @@ Node 0, zone      DMA      0      4      5      4      4      3 ...
 | 
				
			||||||
Node 0, zone   Normal      1      0      0      1    101      8 ...
 | 
					Node 0, zone   Normal      1      0      0      1    101      8 ...
 | 
				
			||||||
Node 0, zone  HighMem      2      0      0      1      1      0 ...
 | 
					Node 0, zone  HighMem      2      0      0      1      1      0 ...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Memory fragmentation is a problem under some workloads, and buddyinfo is a 
 | 
					External fragmentation is a problem under some workloads, and buddyinfo is a
 | 
				
			||||||
useful tool for helping diagnose these problems.  Buddyinfo will give you a 
 | 
					useful tool for helping diagnose these problems.  Buddyinfo will give you a 
 | 
				
			||||||
clue as to how big an area you can safely allocate, or why a previous
 | 
					clue as to how big an area you can safely allocate, or why a previous
 | 
				
			||||||
allocation failed.
 | 
					allocation failed.
 | 
				
			||||||
| 
						 | 
					@ -594,6 +603,48 @@ available.  In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
 | 
				
			||||||
ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE 
 | 
					ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE 
 | 
				
			||||||
available in ZONE_NORMAL, etc... 
 | 
					available in ZONE_NORMAL, etc... 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					More information relevant to external fragmentation can be found in
 | 
				
			||||||
 | 
					pagetypeinfo.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					> cat /proc/pagetypeinfo
 | 
				
			||||||
 | 
					Page block order: 9
 | 
				
			||||||
 | 
					Pages per block:  512
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Free pages count per migrate type at order       0      1      2      3      4      5      6      7      8      9     10
 | 
				
			||||||
 | 
					Node    0, zone      DMA, type    Unmovable      0      0      0      1      1      1      1      1      1      1      0
 | 
				
			||||||
 | 
					Node    0, zone      DMA, type  Reclaimable      0      0      0      0      0      0      0      0      0      0      0
 | 
				
			||||||
 | 
					Node    0, zone      DMA, type      Movable      1      1      2      1      2      1      1      0      1      0      2
 | 
				
			||||||
 | 
					Node    0, zone      DMA, type      Reserve      0      0      0      0      0      0      0      0      0      1      0
 | 
				
			||||||
 | 
					Node    0, zone      DMA, type      Isolate      0      0      0      0      0      0      0      0      0      0      0
 | 
				
			||||||
 | 
					Node    0, zone    DMA32, type    Unmovable    103     54     77      1      1      1     11      8      7      1      9
 | 
				
			||||||
 | 
					Node    0, zone    DMA32, type  Reclaimable      0      0      2      1      0      0      0      0      1      0      0
 | 
				
			||||||
 | 
					Node    0, zone    DMA32, type      Movable    169    152    113     91     77     54     39     13      6      1    452
 | 
				
			||||||
 | 
					Node    0, zone    DMA32, type      Reserve      1      2      2      2      2      0      1      1      1      1      0
 | 
				
			||||||
 | 
					Node    0, zone    DMA32, type      Isolate      0      0      0      0      0      0      0      0      0      0      0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Number of blocks type     Unmovable  Reclaimable      Movable      Reserve      Isolate
 | 
				
			||||||
 | 
					Node 0, zone      DMA            2            0            5            1            0
 | 
				
			||||||
 | 
					Node 0, zone    DMA32           41            6          967            2            0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fragmentation avoidance in the kernel works by grouping pages of different
 | 
				
			||||||
 | 
					migrate types into the same contiguous regions of memory called page blocks.
 | 
				
			||||||
 | 
					A page block is typically the size of the default hugepage size e.g. 2MB on
 | 
				
			||||||
 | 
					X86-64. By keeping pages grouped based on their ability to move, the kernel
 | 
				
			||||||
 | 
					can reclaim pages within a page block to satisfy a high-order allocation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The pagetypinfo begins with information on the size of a page block. It
 | 
				
			||||||
 | 
					then gives the same type of information as buddyinfo except broken down
 | 
				
			||||||
 | 
					by migrate-type and finishes with details on how many page blocks of each
 | 
				
			||||||
 | 
					type exist.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If min_free_kbytes has been tuned correctly (recommendations made by hugeadm
 | 
				
			||||||
 | 
					from libhugetlbfs http://sourceforge.net/projects/libhugetlbfs/), one can
 | 
				
			||||||
 | 
					make an estimate of the likely number of huge pages that can be allocated
 | 
				
			||||||
 | 
					at a given point in time. All the "Movable" blocks should be allocatable
 | 
				
			||||||
 | 
					unless memory has been mlock()'d. Some of the Reclaimable blocks should
 | 
				
			||||||
 | 
					also be allocatable although a lot of filesystem metadata may have to be
 | 
				
			||||||
 | 
					reclaimed to achieve this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
..............................................................................
 | 
					..............................................................................
 | 
				
			||||||
 | 
					
 | 
				
			||||||
meminfo:
 | 
					meminfo:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -837,6 +837,9 @@ replicas continue to be exactly same.
 | 
				
			||||||
	 individual lists does not affect propagation or the way propagation
 | 
						 individual lists does not affect propagation or the way propagation
 | 
				
			||||||
	 tree is modified by operations.
 | 
						 tree is modified by operations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						All vfsmounts in a peer group have the same ->mnt_master.  If it is
 | 
				
			||||||
 | 
						non-NULL, they form a contiguous (ordered) segment of slave list.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	A example propagation tree looks as shown in the figure below.
 | 
						A example propagation tree looks as shown in the figure below.
 | 
				
			||||||
	[ NOTE: Though it looks like a forest, if we consider all the shared
 | 
						[ NOTE: Though it looks like a forest, if we consider all the shared
 | 
				
			||||||
	mounts as a conceptual entity called 'pnode', it becomes a tree]
 | 
						mounts as a conceptual entity called 'pnode', it becomes a tree]
 | 
				
			||||||
| 
						 | 
					@ -874,8 +877,19 @@ replicas continue to be exactly same.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	NOTE: The propagation tree is orthogonal to the mount tree.
 | 
						NOTE: The propagation tree is orthogonal to the mount tree.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					8B Locking:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
8B Algorithm:
 | 
						->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected
 | 
				
			||||||
 | 
						by namespace_sem (exclusive for modifications, shared for reading).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Normally we have ->mnt_flags modifications serialized by vfsmount_lock.
 | 
				
			||||||
 | 
						There are two exceptions: do_add_mount() and clone_mnt().
 | 
				
			||||||
 | 
						The former modifies a vfsmount that has not been visible in any shared
 | 
				
			||||||
 | 
						data structures yet.
 | 
				
			||||||
 | 
						The latter holds namespace_sem and the only references to vfsmount
 | 
				
			||||||
 | 
						are in lists that can't be traversed without namespace_sem.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					8C Algorithm:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	The crux of the implementation resides in rbind/move operation.
 | 
						The crux of the implementation resides in rbind/move operation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -82,11 +82,13 @@ tmpfs has a mount option to set the NUMA memory allocation policy for
 | 
				
			||||||
all files in that instance (if CONFIG_NUMA is enabled) - which can be
 | 
					all files in that instance (if CONFIG_NUMA is enabled) - which can be
 | 
				
			||||||
adjusted on the fly via 'mount -o remount ...'
 | 
					adjusted on the fly via 'mount -o remount ...'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
mpol=default             prefers to allocate memory from the local node
 | 
					mpol=default             use the process allocation policy
 | 
				
			||||||
 | 
					                         (see set_mempolicy(2))
 | 
				
			||||||
mpol=prefer:Node         prefers to allocate memory from the given Node
 | 
					mpol=prefer:Node         prefers to allocate memory from the given Node
 | 
				
			||||||
mpol=bind:NodeList       allocates memory only from nodes in NodeList
 | 
					mpol=bind:NodeList       allocates memory only from nodes in NodeList
 | 
				
			||||||
mpol=interleave          prefers to allocate from each node in turn
 | 
					mpol=interleave          prefers to allocate from each node in turn
 | 
				
			||||||
mpol=interleave:NodeList allocates from each node of NodeList in turn
 | 
					mpol=interleave:NodeList allocates from each node of NodeList in turn
 | 
				
			||||||
 | 
					mpol=local		 prefers to allocate memory from the local node
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NodeList format is a comma-separated list of decimal numbers and ranges,
 | 
					NodeList format is a comma-separated list of decimal numbers and ranges,
 | 
				
			||||||
a range being two hyphen-separated decimal numbers, the smallest and
 | 
					a range being two hyphen-separated decimal numbers, the smallest and
 | 
				
			||||||
| 
						 | 
					@ -134,3 +136,5 @@ Author:
 | 
				
			||||||
   Christoph Rohland <cr@sap.com>, 1.12.01
 | 
					   Christoph Rohland <cr@sap.com>, 1.12.01
 | 
				
			||||||
Updated:
 | 
					Updated:
 | 
				
			||||||
   Hugh Dickins, 4 June 2007
 | 
					   Hugh Dickins, 4 June 2007
 | 
				
			||||||
 | 
					Updated:
 | 
				
			||||||
 | 
					   KOSAKI Motohiro, 16 Mar 2010
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -253,6 +253,70 @@ pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
 | 
				
			||||||
Also note that it's your responsibility to have stopped using a GPIO
 | 
					Also note that it's your responsibility to have stopped using a GPIO
 | 
				
			||||||
before you free it.
 | 
					before you free it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Considering in most cases GPIOs are actually configured right after they
 | 
				
			||||||
 | 
					are claimed, three additional calls are defined:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* request a single GPIO, with initial configuration specified by
 | 
				
			||||||
 | 
						 * 'flags', identical to gpio_request() wrt other arguments and
 | 
				
			||||||
 | 
						 * return value
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* request multiple GPIOs in a single call
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						int gpio_request_array(struct gpio *array, size_t num);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* release multiple GPIOs in a single call
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						void gpio_free_array(struct gpio *array, size_t num);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					where 'flags' is currently defined to specify the following properties:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						* GPIOF_DIR_IN		- to configure direction as input
 | 
				
			||||||
 | 
						* GPIOF_DIR_OUT		- to configure direction as output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						* GPIOF_INIT_LOW	- as output, set initial level to LOW
 | 
				
			||||||
 | 
						* GPIOF_INIT_HIGH	- as output, set initial level to HIGH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					since GPIOF_INIT_* are only valid when configured as output, so group valid
 | 
				
			||||||
 | 
					combinations as:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						* GPIOF_IN		- configure as input
 | 
				
			||||||
 | 
						* GPIOF_OUT_INIT_LOW	- configured as output, initial level LOW
 | 
				
			||||||
 | 
						* GPIOF_OUT_INIT_HIGH	- configured as output, initial level HIGH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In the future, these flags can be extended to support more properties such
 | 
				
			||||||
 | 
					as open-drain status.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is
 | 
				
			||||||
 | 
					introduced to encapsulate all three fields as:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						struct gpio {
 | 
				
			||||||
 | 
							unsigned	gpio;
 | 
				
			||||||
 | 
							unsigned long	flags;
 | 
				
			||||||
 | 
							const char	*label;
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A typical example of usage:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						static struct gpio leds_gpios[] = {
 | 
				
			||||||
 | 
							{ 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */
 | 
				
			||||||
 | 
							{ 33, GPIOF_OUT_INIT_LOW,  "Green LED" }, /* default to OFF */
 | 
				
			||||||
 | 
							{ 34, GPIOF_OUT_INIT_LOW,  "Red LED"   }, /* default to OFF */
 | 
				
			||||||
 | 
							{ 35, GPIOF_OUT_INIT_LOW,  "Blue LED"  }, /* default to OFF */
 | 
				
			||||||
 | 
							{ ... },
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = gpio_request_one(31, GPIOF_IN, "Reset Button");
 | 
				
			||||||
 | 
						if (err)
 | 
				
			||||||
 | 
							...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios));
 | 
				
			||||||
 | 
						if (err)
 | 
				
			||||||
 | 
							...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GPIOs mapped to IRQs
 | 
					GPIOs mapped to IRQs
 | 
				
			||||||
--------------------
 | 
					--------------------
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,7 +30,7 @@ Supported chips:
 | 
				
			||||||
	   bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
 | 
						   bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
 | 
				
			||||||
	   You may also need to specify the fan_sensors option for these boards
 | 
						   You may also need to specify the fan_sensors option for these boards
 | 
				
			||||||
	   fan_sensors=5
 | 
						   fan_sensors=5
 | 
				
			||||||
	2) There is a seperate abituguru3 driver for these motherboards,
 | 
						2) There is a separate abituguru3 driver for these motherboards,
 | 
				
			||||||
	   the abituguru (without the 3 !) driver will not work on these
 | 
						   the abituguru (without the 3 !) driver will not work on these
 | 
				
			||||||
	   motherboards (and visa versa)!
 | 
						   motherboards (and visa versa)!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										42
									
								
								Documentation/hwmon/adt7411
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								Documentation/hwmon/adt7411
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,42 @@
 | 
				
			||||||
 | 
					Kernel driver adt7411
 | 
				
			||||||
 | 
					=====================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Supported chips:
 | 
				
			||||||
 | 
					  * Analog Devices ADT7411
 | 
				
			||||||
 | 
					    Prefix: 'adt7411'
 | 
				
			||||||
 | 
					    Addresses scanned: 0x48, 0x4a, 0x4b
 | 
				
			||||||
 | 
					    Datasheet: Publicly available at the Analog Devices website
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Wolfram Sang (based on adt7470 by Darrick J. Wong)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Description
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This driver implements support for the Analog Devices ADT7411 chip. There may
 | 
				
			||||||
 | 
					be other chips that implement this interface.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ADT7411 can use an I2C/SMBus compatible 2-wire interface or an
 | 
				
			||||||
 | 
					SPI-compatible 4-wire interface. It provides a 10-bit analog to digital
 | 
				
			||||||
 | 
					converter which measures 1 temperature, vdd and 8 input voltages. It has an
 | 
				
			||||||
 | 
					internal temperature sensor, but an external one can also be connected (one
 | 
				
			||||||
 | 
					loses 2 inputs then). There are high- and low-limit registers for all inputs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Check the datasheet for details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sysfs-Interface
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					in0_input	- vdd voltage input
 | 
				
			||||||
 | 
					in[1-8]_input	- analog 1-8 input
 | 
				
			||||||
 | 
					temp1_input	- temperature input
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Besides standard interfaces, this driver adds (0 = off, 1 = on):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  adc_ref_vdd	- Use vdd as reference instead of 2.25 V
 | 
				
			||||||
 | 
					  fast_sampling	- Sample at 22.5 kHz instead of 1.4 kHz, but drop filters
 | 
				
			||||||
 | 
					  no_average	- Turn off averaging over 16 samples
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Notes
 | 
				
			||||||
 | 
					-----
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SPI, external temperature sensor and limit registers are not supported yet.
 | 
				
			||||||
| 
						 | 
					@ -1,74 +0,0 @@
 | 
				
			||||||
Kernel driver adt7473
 | 
					 | 
				
			||||||
======================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Supported chips:
 | 
					 | 
				
			||||||
  * Analog Devices ADT7473
 | 
					 | 
				
			||||||
    Prefix: 'adt7473'
 | 
					 | 
				
			||||||
    Addresses scanned: I2C 0x2C, 0x2D, 0x2E
 | 
					 | 
				
			||||||
    Datasheet: Publicly available at the Analog Devices website
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Author: Darrick J. Wong
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This driver is depreacted, please use the adt7475 driver instead.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Description
 | 
					 | 
				
			||||||
-----------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This driver implements support for the Analog Devices ADT7473 chip family.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The ADT7473 uses the 2-wire interface compatible with the SMBUS 2.0
 | 
					 | 
				
			||||||
specification. Using an analog to digital converter it measures three (3)
 | 
					 | 
				
			||||||
temperatures and two (2) voltages. It has four (4) 16-bit counters for
 | 
					 | 
				
			||||||
measuring fan speed. There are three (3) PWM outputs that can be used
 | 
					 | 
				
			||||||
to control fan speed.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
A sophisticated control system for the PWM outputs is designed into the
 | 
					 | 
				
			||||||
ADT7473 that allows fan speed to be adjusted automatically based on any of the
 | 
					 | 
				
			||||||
three temperature sensors. Each PWM output is individually adjustable and
 | 
					 | 
				
			||||||
programmable. Once configured, the ADT7473 will adjust the PWM outputs in
 | 
					 | 
				
			||||||
response to the measured temperatures without further host intervention.
 | 
					 | 
				
			||||||
This feature can also be disabled for manual control of the PWM's.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Each of the measured inputs (voltage, temperature, fan speed) has
 | 
					 | 
				
			||||||
corresponding high/low limit values. The ADT7473 will signal an ALARM if
 | 
					 | 
				
			||||||
any measured value exceeds either limit.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The ADT7473 samples all inputs continuously. The driver will not read
 | 
					 | 
				
			||||||
the registers more often than once every other second. Further,
 | 
					 | 
				
			||||||
configuration data is only read once per minute.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Special Features
 | 
					 | 
				
			||||||
----------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The ADT7473 have a 10-bit ADC and can therefore measure temperatures
 | 
					 | 
				
			||||||
with 0.25 degC resolution. Temperature readings can be configured either
 | 
					 | 
				
			||||||
for twos complement format or "Offset 64" format, wherein 63 is subtracted
 | 
					 | 
				
			||||||
from the raw value to get the temperature value.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The Analog Devices datasheet is very detailed and describes a procedure for
 | 
					 | 
				
			||||||
determining an optimal configuration for the automatic PWM control.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Configuration Notes
 | 
					 | 
				
			||||||
-------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Besides standard interfaces driver adds the following:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
* PWM Control
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
* pwm#_auto_point1_pwm and temp#_auto_point1_temp and
 | 
					 | 
				
			||||||
* pwm#_auto_point2_pwm and temp#_auto_point2_temp -
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
point1: Set the pwm speed at a lower temperature bound.
 | 
					 | 
				
			||||||
point2: Set the pwm speed at a higher temperature bound.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The ADT7473 will scale the pwm between the lower and higher pwm speed when
 | 
					 | 
				
			||||||
the temperature is between the two temperature boundaries.  PWM values range
 | 
					 | 
				
			||||||
from 0 (off) to 255 (full speed).  Fan speed will be set to maximum when the
 | 
					 | 
				
			||||||
temperature sensor associated with the PWM control exceeds temp#_max.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Notes
 | 
					 | 
				
			||||||
-----
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The NVIDIA binary driver presents an ADT7473 chip via an on-card i2c bus.
 | 
					 | 
				
			||||||
Unfortunately, they fail to set the i2c adapter class, so this driver may
 | 
					 | 
				
			||||||
fail to find the chip until the nvidia driver is patched.
 | 
					 | 
				
			||||||
							
								
								
									
										296
									
								
								Documentation/hwmon/asc7621
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										296
									
								
								Documentation/hwmon/asc7621
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,296 @@
 | 
				
			||||||
 | 
					Kernel driver asc7621
 | 
				
			||||||
 | 
					==================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Supported chips:
 | 
				
			||||||
 | 
					    Andigilog aSC7621 and aSC7621a
 | 
				
			||||||
 | 
					    Prefix: 'asc7621'
 | 
				
			||||||
 | 
					    Addresses scanned: I2C 0x2c, 0x2d, 0x2e
 | 
				
			||||||
 | 
					    Datasheet: http://www.fairview5.com/linux/asc7621/asc7621.pdf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author:
 | 
				
			||||||
 | 
							George Joseph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Description provided by Dave Pivin @ Andigilog:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Andigilog has both the PECI and pre-PECI versions of the Heceta-6, as
 | 
				
			||||||
 | 
					Intel calls them. Heceta-6e has high frequency PWM and Heceta-6p has
 | 
				
			||||||
 | 
					added PECI and a 4th thermal zone. The Andigilog aSC7611 is the
 | 
				
			||||||
 | 
					Heceta-6e part and aSC7621 is the Heceta-6p part. They are both in
 | 
				
			||||||
 | 
					volume production, shipping to Intel and their subs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We have enhanced both parts relative to the governing Intel
 | 
				
			||||||
 | 
					specification. First enhancement is temperature reading resolution. We
 | 
				
			||||||
 | 
					have used registers below 20h for vendor-specific functions in addition
 | 
				
			||||||
 | 
					to those in the Intel-specified vendor range.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Our conversion process produces a result that is reported as two bytes.
 | 
				
			||||||
 | 
					The fan speed control uses this finer value to produce a "step-less" fan
 | 
				
			||||||
 | 
					PWM output. These two bytes are "read-locked" to guarantee that once a
 | 
				
			||||||
 | 
					high or low byte is read, the other byte is locked-in until after the
 | 
				
			||||||
 | 
					next read of any register. So to get an atomic reading, read high or low
 | 
				
			||||||
 | 
					byte, then the very next read should be the opposite byte. Our data
 | 
				
			||||||
 | 
					sheet says 10-bits of resolution, although you may find the lower bits
 | 
				
			||||||
 | 
					are active, they are not necessarily reliable or useful externally. We
 | 
				
			||||||
 | 
					chose not to mask them.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We employ significant filtering that is user tunable as described in the
 | 
				
			||||||
 | 
					data sheet. Our temperature reports and fan PWM outputs are very smooth
 | 
				
			||||||
 | 
					when compared to the competition, in addition to the higher resolution
 | 
				
			||||||
 | 
					temperature reports. The smoother PWM output does not require user
 | 
				
			||||||
 | 
					intervention.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We offer GPIO features on the former VID pins. These are open-drain
 | 
				
			||||||
 | 
					outputs or inputs and may be used as general purpose I/O or as alarm
 | 
				
			||||||
 | 
					outputs that are based on temperature limits. These are in 19h and 1Ah.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We offer flexible mapping of temperature readings to thermal zones. Any
 | 
				
			||||||
 | 
					temperature may be mapped to any zone, which has a default assignment
 | 
				
			||||||
 | 
					that follows Intel's specs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Since there is a fan to zone assignment that allows for the "hotter" of
 | 
				
			||||||
 | 
					a set of zones to control the PWM of an individual fan, but there is no
 | 
				
			||||||
 | 
					indication to the user, we have added an indicator that shows which zone
 | 
				
			||||||
 | 
					is currently controlling the PWM for a given fan. This is in register
 | 
				
			||||||
 | 
					00h.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Both remote diode temperature readings may be given an offset value such
 | 
				
			||||||
 | 
					that the reported reading as well as the temperature used to determine
 | 
				
			||||||
 | 
					PWM may be offset for system calibration purposes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PECI Extended configuration allows for having more than two domains per
 | 
				
			||||||
 | 
					PECI address and also provides an enabling function for each PECI
 | 
				
			||||||
 | 
					address. One could use our flexible zone assignment to have a zone
 | 
				
			||||||
 | 
					assigned to up to 4 PECI addresses. This is not possible in the default
 | 
				
			||||||
 | 
					Intel configuration. This would be useful in multi-CPU systems with
 | 
				
			||||||
 | 
					individual fans on each that would benefit from individual fan control.
 | 
				
			||||||
 | 
					This is in register 0Eh.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The tachometer measurement system is flexible and able to adapt to many
 | 
				
			||||||
 | 
					fan types. We can also support pulse-stretched PWM so that 3-wire fans
 | 
				
			||||||
 | 
					may be used. These characteristics are in registers 04h to 07h.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Finally, we have added a tach disable function that turns off the tach
 | 
				
			||||||
 | 
					measurement system for individual tachs in order to save power. That is
 | 
				
			||||||
 | 
					in register 75h.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					--
 | 
				
			||||||
 | 
					aSC7621 Product Description
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The aSC7621 has a two wire digital interface compatible with SMBus 2.0.
 | 
				
			||||||
 | 
					Using a 10-bit ADC, the aSC7621 measures the temperature of two remote diode
 | 
				
			||||||
 | 
					connected transistors as well as its own die. Support for Platform
 | 
				
			||||||
 | 
					Environmental Control Interface (PECI) is included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Using temperature information from these four zones, an automatic fan speed
 | 
				
			||||||
 | 
					control algorithm is employed to minimize acoustic impact while achieving
 | 
				
			||||||
 | 
					recommended CPU temperature under varying operational loads.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To set fan speed, the aSC7621 has three independent pulse width modulation
 | 
				
			||||||
 | 
					(PWM) outputs that are controlled by one, or a combination of three,
 | 
				
			||||||
 | 
					temperature zones. Both high- and low-frequency PWM ranges are supported.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The aSC7621 also includes a digital filter that can be invoked to smooth
 | 
				
			||||||
 | 
					temperature readings for better control of fan speed and minimum acoustic
 | 
				
			||||||
 | 
					impact.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The aSC7621 has tachometer inputs to measure fan speed on up to four fans.
 | 
				
			||||||
 | 
					Limit and status registers for all measured values are included to alert
 | 
				
			||||||
 | 
					the system host that any measurements are outside of programmed limits
 | 
				
			||||||
 | 
					via status registers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					System voltages of VCCP, 2.5V, 3.3V, 5.0V, and 12V motherboard power are
 | 
				
			||||||
 | 
					monitored efficiently with internal scaling resistors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Features
 | 
				
			||||||
 | 
					- Supports PECI interface and monitors internal and remote thermal diodes
 | 
				
			||||||
 | 
					- 2-wire, SMBus 2.0 compliant, serial interface
 | 
				
			||||||
 | 
					- 10-bit ADC
 | 
				
			||||||
 | 
					- Monitors VCCP, 2.5V, 3.3V, 5.0V, and 12V motherboard/processor supplies
 | 
				
			||||||
 | 
					- Programmable autonomous fan control based on temperature readings
 | 
				
			||||||
 | 
					- Noise filtering of temperature reading for fan speed control
 | 
				
			||||||
 | 
					- 0.25C digital temperature sensor resolution
 | 
				
			||||||
 | 
					- 3 PWM fan speed control outputs for 2-, 3- or 4-wire fans and up to 4 fan
 | 
				
			||||||
 | 
						tachometer inputs
 | 
				
			||||||
 | 
					- Enhanced measured temperature to Temperature Zone assignment.
 | 
				
			||||||
 | 
					- Provides high and low PWM frequency ranges
 | 
				
			||||||
 | 
					- 3 GPIO pins for custom use
 | 
				
			||||||
 | 
					- 24-Lead QSOP package
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Configuration Notes
 | 
				
			||||||
 | 
					===================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Except where noted below, the sysfs entries created by this driver follow
 | 
				
			||||||
 | 
					the standards defined in "sysfs-interface".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp1_source
 | 
				
			||||||
 | 
						0 	(default) peci_legacy = 0, Remote 1 Temperature
 | 
				
			||||||
 | 
								peci_legacy = 1, PECI Processor Temperature 0
 | 
				
			||||||
 | 
						1 	Remote 1 Temperature
 | 
				
			||||||
 | 
						2 	Remote 2 Temperature
 | 
				
			||||||
 | 
						3 	Internal Temperature
 | 
				
			||||||
 | 
						4 	PECI Processor Temperature 0
 | 
				
			||||||
 | 
						5 	PECI Processor Temperature 1
 | 
				
			||||||
 | 
						6 	PECI Processor Temperature 2
 | 
				
			||||||
 | 
						7  PECI Processor Temperature 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp2_source
 | 
				
			||||||
 | 
						0 	(default) Internal Temperature
 | 
				
			||||||
 | 
						1 	Remote 1 Temperature
 | 
				
			||||||
 | 
						2 	Remote 2 Temperature
 | 
				
			||||||
 | 
						3 	Internal Temperature
 | 
				
			||||||
 | 
						4 	PECI Processor Temperature 0
 | 
				
			||||||
 | 
						5 	PECI Processor Temperature 1
 | 
				
			||||||
 | 
						6 	PECI Processor Temperature 2
 | 
				
			||||||
 | 
						7 	PECI Processor Temperature 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp3_source
 | 
				
			||||||
 | 
						0 	(default) Remote 2 Temperature
 | 
				
			||||||
 | 
						1 	Remote 1 Temperature
 | 
				
			||||||
 | 
						2 	Remote 2 Temperature
 | 
				
			||||||
 | 
						3 	Internal Temperature
 | 
				
			||||||
 | 
						4 	PECI Processor Temperature 0
 | 
				
			||||||
 | 
						5 	PECI Processor Temperature 1
 | 
				
			||||||
 | 
						6 	PECI Processor Temperature 2
 | 
				
			||||||
 | 
						7 	PECI Processor Temperature 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp4_source
 | 
				
			||||||
 | 
						0 	(default) peci_legacy = 0, PECI Processor Temperature 0
 | 
				
			||||||
 | 
								peci_legacy = 1, Remote 1 Temperature
 | 
				
			||||||
 | 
						1 	Remote 1 Temperature
 | 
				
			||||||
 | 
						2 	Remote 2 Temperature
 | 
				
			||||||
 | 
						3 	Internal Temperature
 | 
				
			||||||
 | 
						4 	PECI Processor Temperature 0
 | 
				
			||||||
 | 
						5 	PECI Processor Temperature 1
 | 
				
			||||||
 | 
						6 	PECI Processor Temperature 2
 | 
				
			||||||
 | 
						7 	PECI Processor Temperature 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp[1-4]_smoothing_enable
 | 
				
			||||||
 | 
					temp[1-4]_smoothing_time
 | 
				
			||||||
 | 
						Smooths spikes in temp readings caused by noise.
 | 
				
			||||||
 | 
						Valid values in milliseconds are:
 | 
				
			||||||
 | 
						35000
 | 
				
			||||||
 | 
						17600
 | 
				
			||||||
 | 
						11800
 | 
				
			||||||
 | 
						 7000
 | 
				
			||||||
 | 
						 4400
 | 
				
			||||||
 | 
						 3000
 | 
				
			||||||
 | 
						 1600
 | 
				
			||||||
 | 
						  800
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp[1-4]_crit
 | 
				
			||||||
 | 
						When the corresponding zone temperature reaches this value,
 | 
				
			||||||
 | 
						ALL pwm outputs will got to 100%.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					temp[5-8]_input
 | 
				
			||||||
 | 
					temp[5-8]_enable
 | 
				
			||||||
 | 
						The aSC7621 can also read temperatures provided by the processor
 | 
				
			||||||
 | 
						via the PECI bus.  Usually these are "core" temps and are relative
 | 
				
			||||||
 | 
						to the point where the automatic thermal control circuit starts
 | 
				
			||||||
 | 
						throttling.  This means that these are usually negative numbers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pwm[1-3]_enable
 | 
				
			||||||
 | 
						0		Fan off.
 | 
				
			||||||
 | 
						1		Fan on manual control.
 | 
				
			||||||
 | 
						2		Fan on automatic control and will run at the minimum pwm
 | 
				
			||||||
 | 
									if the temperature for the zone is below the minimum.
 | 
				
			||||||
 | 
						3		Fan on automatic control but will be off if the temperature
 | 
				
			||||||
 | 
									for the zone is below the minimum.
 | 
				
			||||||
 | 
						4-254	Ignored.
 | 
				
			||||||
 | 
						255		Fan on full.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pwm[1-3]_auto_channels
 | 
				
			||||||
 | 
						Bitmap as described in sysctl-interface with the following
 | 
				
			||||||
 | 
						exceptions...
 | 
				
			||||||
 | 
						Only the following combination of zones (and their corresponding masks)
 | 
				
			||||||
 | 
						are valid:
 | 
				
			||||||
 | 
						1
 | 
				
			||||||
 | 
						2
 | 
				
			||||||
 | 
						3
 | 
				
			||||||
 | 
						2,3
 | 
				
			||||||
 | 
						1,2,3
 | 
				
			||||||
 | 
						4
 | 
				
			||||||
 | 
						1,2,3,4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Special values:
 | 
				
			||||||
 | 
						0			Disabled.
 | 
				
			||||||
 | 
						16		Fan on manual control.
 | 
				
			||||||
 | 
						31		Fan on full.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pwm[1-3]_invert
 | 
				
			||||||
 | 
						When set, inverts the meaning of pwm[1-3].
 | 
				
			||||||
 | 
						i.e.  when pwm = 0, the fan will be on full and
 | 
				
			||||||
 | 
						when pwm = 255 the fan will be off.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pwm[1-3]_freq
 | 
				
			||||||
 | 
						PWM frequency in Hz
 | 
				
			||||||
 | 
						Valid values in Hz are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						10
 | 
				
			||||||
 | 
						15
 | 
				
			||||||
 | 
						23
 | 
				
			||||||
 | 
						30  (default)
 | 
				
			||||||
 | 
						38
 | 
				
			||||||
 | 
						47
 | 
				
			||||||
 | 
						62
 | 
				
			||||||
 | 
						94
 | 
				
			||||||
 | 
						23000
 | 
				
			||||||
 | 
						24000
 | 
				
			||||||
 | 
						25000
 | 
				
			||||||
 | 
						26000
 | 
				
			||||||
 | 
						27000
 | 
				
			||||||
 | 
						28000
 | 
				
			||||||
 | 
						29000
 | 
				
			||||||
 | 
						30000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Setting any other value will be ignored.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					peci_enable
 | 
				
			||||||
 | 
						Enables or disables PECI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					peci_avg
 | 
				
			||||||
 | 
						Input filter average time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						0 	0 Sec. (no Smoothing) (default)
 | 
				
			||||||
 | 
						1 	0.25 Sec.
 | 
				
			||||||
 | 
						2 	0.5 Sec.
 | 
				
			||||||
 | 
						3 	1.0 Sec.
 | 
				
			||||||
 | 
						4 	2.0 Sec.
 | 
				
			||||||
 | 
						5 	4.0 Sec.
 | 
				
			||||||
 | 
						6 	8.0 Sec.
 | 
				
			||||||
 | 
						7 	0.0 Sec.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					peci_legacy
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						0	Standard Mode (default)
 | 
				
			||||||
 | 
							Remote Diode 1 reading is associated with
 | 
				
			||||||
 | 
							Temperature Zone 1, PECI is associated with
 | 
				
			||||||
 | 
							Zone 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						1	Legacy Mode
 | 
				
			||||||
 | 
							PECI is associated with Temperature Zone 1,
 | 
				
			||||||
 | 
							Remote Diode 1 is associated with Zone 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					peci_diode
 | 
				
			||||||
 | 
						Diode filter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						0	0.25 Sec.
 | 
				
			||||||
 | 
						1 	1.1 Sec.
 | 
				
			||||||
 | 
						2 	2.4 Sec.  (default)
 | 
				
			||||||
 | 
						3 	3.4 Sec.
 | 
				
			||||||
 | 
						4 	5.0 Sec.
 | 
				
			||||||
 | 
						5 	6.8 Sec.
 | 
				
			||||||
 | 
						6 	10.2 Sec.
 | 
				
			||||||
 | 
						7 	16.4 Sec.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					peci_4domain
 | 
				
			||||||
 | 
						Four domain enable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						0 	1 or 2 Domains for enabled processors (default)
 | 
				
			||||||
 | 
						1 	3 or 4 Domains for enabled processors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					peci_domain
 | 
				
			||||||
 | 
						Domain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						0 	Processor contains a single domain (0) 	 (default)
 | 
				
			||||||
 | 
						1 	Processor contains two domains (0,1)
 | 
				
			||||||
| 
						 | 
					@ -5,31 +5,23 @@ Supported chips:
 | 
				
			||||||
  * IT8705F
 | 
					  * IT8705F
 | 
				
			||||||
    Prefix: 'it87'
 | 
					    Prefix: 'it87'
 | 
				
			||||||
    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
					    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
				
			||||||
    Datasheet: Publicly available at the ITE website
 | 
					    Datasheet: Once publicly available at the ITE website, but no longer
 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8705F_V.0.4.1.pdf
 | 
					 | 
				
			||||||
  * IT8712F
 | 
					  * IT8712F
 | 
				
			||||||
    Prefix: 'it8712'
 | 
					    Prefix: 'it8712'
 | 
				
			||||||
    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
					    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
				
			||||||
    Datasheet: Publicly available at the ITE website
 | 
					    Datasheet: Once publicly available at the ITE website, but no longer
 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8712F_V0.9.1.pdf
 | 
					 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/Errata%20V0.1%20for%20IT8712F%20V0.9.1.pdf
 | 
					 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8712F_V0.9.3.pdf
 | 
					 | 
				
			||||||
  * IT8716F/IT8726F
 | 
					  * IT8716F/IT8726F
 | 
				
			||||||
    Prefix: 'it8716'
 | 
					    Prefix: 'it8716'
 | 
				
			||||||
    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
					    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
				
			||||||
    Datasheet: Publicly available at the ITE website
 | 
					    Datasheet: Once publicly available at the ITE website, but no longer
 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8716F_V0.3.ZIP
 | 
					 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8726F_V0.3.pdf
 | 
					 | 
				
			||||||
  * IT8718F
 | 
					  * IT8718F
 | 
				
			||||||
    Prefix: 'it8718'
 | 
					    Prefix: 'it8718'
 | 
				
			||||||
    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
					    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
				
			||||||
    Datasheet: Publicly available at the ITE website
 | 
					    Datasheet: Once publicly available at the ITE website, but no longer
 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8718F_V0.2.zip
 | 
					 | 
				
			||||||
               http://www.ite.com.tw/product_info/file/pc/IT8718F_V0%203_(for%20C%20version).zip
 | 
					 | 
				
			||||||
  * IT8720F
 | 
					  * IT8720F
 | 
				
			||||||
    Prefix: 'it8720'
 | 
					    Prefix: 'it8720'
 | 
				
			||||||
    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
					    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
				
			||||||
    Datasheet: Not yet publicly available.
 | 
					    Datasheet: Not publicly available
 | 
				
			||||||
  * SiS950   [clone of IT8705F]
 | 
					  * SiS950   [clone of IT8705F]
 | 
				
			||||||
    Prefix: 'it87'
 | 
					    Prefix: 'it87'
 | 
				
			||||||
    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
					    Addresses scanned: from Super I/O config space (8 I/O ports)
 | 
				
			||||||
| 
						 | 
					@ -136,6 +128,10 @@ registers are read whenever any data is read (unless it is less than 1.5
 | 
				
			||||||
seconds since the last update). This means that you can easily miss
 | 
					seconds since the last update). This means that you can easily miss
 | 
				
			||||||
once-only alarms.
 | 
					once-only alarms.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Out-of-limit readings can also result in beeping, if the chip is properly
 | 
				
			||||||
 | 
					wired and configured. Beeping can be enabled or disabled per sensor type
 | 
				
			||||||
 | 
					(temperatures, voltages and fans.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The IT87xx only updates its values each 1.5 seconds; reading it more often
 | 
					The IT87xx only updates its values each 1.5 seconds; reading it more often
 | 
				
			||||||
will do no harm, but will return 'old' values.
 | 
					will do no harm, but will return 'old' values.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -150,11 +146,38 @@ Fan speed control
 | 
				
			||||||
-----------------
 | 
					-----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The fan speed control features are limited to manual PWM mode. Automatic
 | 
					The fan speed control features are limited to manual PWM mode. Automatic
 | 
				
			||||||
"Smart Guardian" mode control handling is not implemented. However
 | 
					"Smart Guardian" mode control handling is only implemented for older chips
 | 
				
			||||||
if you want to go for "manual mode" just write 1 to pwmN_enable.
 | 
					(see below.) However if you want to go for "manual mode" just write 1 to
 | 
				
			||||||
 | 
					pwmN_enable.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If you are only able to control the fan speed with very small PWM values,
 | 
					If you are only able to control the fan speed with very small PWM values,
 | 
				
			||||||
try lowering the PWM base frequency (pwm1_freq). Depending on the fan,
 | 
					try lowering the PWM base frequency (pwm1_freq). Depending on the fan,
 | 
				
			||||||
it may give you a somewhat greater control range. The same frequency is
 | 
					it may give you a somewhat greater control range. The same frequency is
 | 
				
			||||||
used to drive all fan outputs, which is why pwm2_freq and pwm3_freq are
 | 
					used to drive all fan outputs, which is why pwm2_freq and pwm3_freq are
 | 
				
			||||||
read-only.
 | 
					read-only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Automatic fan speed control (old interface)
 | 
				
			||||||
 | 
					-------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The driver supports the old interface to automatic fan speed control
 | 
				
			||||||
 | 
					which is implemented by IT8705F chips up to revision F and IT8712F
 | 
				
			||||||
 | 
					chips up to revision G.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This interface implements 4 temperature vs. PWM output trip points.
 | 
				
			||||||
 | 
					The PWM output of trip point 4 is always the maximum value (fan running
 | 
				
			||||||
 | 
					at full speed) while the PWM output of the other 3 trip points can be
 | 
				
			||||||
 | 
					freely chosen. The temperature of all 4 trip points can be freely chosen.
 | 
				
			||||||
 | 
					Additionally, trip point 1 has an hysteresis temperature attached, to
 | 
				
			||||||
 | 
					prevent fast switching between fan on and off.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The chip automatically computes the PWM output value based on the input
 | 
				
			||||||
 | 
					temperature, based on this simple rule: if the temperature value is
 | 
				
			||||||
 | 
					between trip point N and trip point N+1 then the PWM output value is
 | 
				
			||||||
 | 
					the one of trip point N. The automatic control mode is less flexible
 | 
				
			||||||
 | 
					than the manual control mode, but it reacts faster, is more robust and
 | 
				
			||||||
 | 
					doesn't use CPU cycles.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Trip points must be set properly before switching to automatic fan speed
 | 
				
			||||||
 | 
					control mode. The driver will perform basic integrity checks before
 | 
				
			||||||
 | 
					actually switching to automatic control mode.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -84,6 +84,10 @@ Supported chips:
 | 
				
			||||||
    Addresses scanned: I2C 0x4c
 | 
					    Addresses scanned: I2C 0x4c
 | 
				
			||||||
    Datasheet: Publicly available at the Maxim website
 | 
					    Datasheet: Publicly available at the Maxim website
 | 
				
			||||||
               http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3500
 | 
					               http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3500
 | 
				
			||||||
 | 
					  * Winbond/Nuvoton W83L771AWG/ASG
 | 
				
			||||||
 | 
					    Prefix: 'w83l771'
 | 
				
			||||||
 | 
					    Addresses scanned: I2C 0x4c
 | 
				
			||||||
 | 
					    Datasheet: Not publicly available, can be requested from Nuvoton
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Author: Jean Delvare <khali@linux-fr.org>
 | 
					Author: Jean Delvare <khali@linux-fr.org>
 | 
				
			||||||
| 
						 | 
					@ -147,6 +151,12 @@ MAX6680 and MAX6681:
 | 
				
			||||||
  * Selectable address
 | 
					  * Selectable address
 | 
				
			||||||
  * Remote sensor type selection
 | 
					  * Remote sensor type selection
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					W83L771AWG/ASG
 | 
				
			||||||
 | 
					  * The AWG and ASG variants only differ in package format.
 | 
				
			||||||
 | 
					  * Filter and alert configuration register at 0xBF
 | 
				
			||||||
 | 
					  * Diode ideality factor configuration (remote sensor) at 0xE3
 | 
				
			||||||
 | 
					  * Moving average (depending on conversion rate)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
All temperature values are given in degrees Celsius. Resolution
 | 
					All temperature values are given in degrees Celsius. Resolution
 | 
				
			||||||
is 1.0 degree for the local temperature, 0.125 degree for the remote
 | 
					is 1.0 degree for the local temperature, 0.125 degree for the remote
 | 
				
			||||||
temperature, except for the MAX6657, MAX6658 and MAX6659 which have a
 | 
					temperature, except for the MAX6657, MAX6658 and MAX6659 which have a
 | 
				
			||||||
| 
						 | 
					@ -163,6 +173,18 @@ The lm90 driver will not update its values more frequently than every
 | 
				
			||||||
other second; reading them more often will do no harm, but will return
 | 
					other second; reading them more often will do no harm, but will return
 | 
				
			||||||
'old' values.
 | 
					'old' values.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SMBus Alert Support
 | 
				
			||||||
 | 
					-------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This driver has basic support for SMBus alert. When an alert is received,
 | 
				
			||||||
 | 
					the status register is read and the faulty temperature channel is logged.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The Analog Devices chips (ADM1032 and ADT7461) do not implement the SMBus
 | 
				
			||||||
 | 
					alert protocol properly so additional care is needed: the ALERT output is
 | 
				
			||||||
 | 
					disabled when an alert is received, and is re-enabled only when the alarm
 | 
				
			||||||
 | 
					is gone. Otherwise the chip would block alerts from other chips in the bus
 | 
				
			||||||
 | 
					as long as the alarm is active.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PEC Support
 | 
					PEC Support
 | 
				
			||||||
-----------
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										49
									
								
								Documentation/init.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								Documentation/init.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,49 @@
 | 
				
			||||||
 | 
					Explaining the dreaded "No init found." boot hang message
 | 
				
			||||||
 | 
					=========================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					OK, so you've got this pretty unintuitive message (currently located
 | 
				
			||||||
 | 
					in init/main.c) and are wondering what the H*** went wrong.
 | 
				
			||||||
 | 
					Some high-level reasons for failure (listed roughly in order of execution)
 | 
				
			||||||
 | 
					to load the init binary are:
 | 
				
			||||||
 | 
					A) Unable to mount root FS
 | 
				
			||||||
 | 
					B) init binary doesn't exist on rootfs
 | 
				
			||||||
 | 
					C) broken console device
 | 
				
			||||||
 | 
					D) binary exists but dependencies not available
 | 
				
			||||||
 | 
					E) binary cannot be loaded
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Detailed explanations:
 | 
				
			||||||
 | 
					0) Set "debug" kernel parameter (in bootloader config file or CONFIG_CMDLINE)
 | 
				
			||||||
 | 
					   to get more detailed kernel messages.
 | 
				
			||||||
 | 
					A) make sure you have the correct root FS type
 | 
				
			||||||
 | 
					   (and root= kernel parameter points to the correct partition),
 | 
				
			||||||
 | 
					   required drivers such as storage hardware (such as SCSI or USB!)
 | 
				
			||||||
 | 
					   and filesystem (ext3, jffs2 etc.) are builtin (alternatively as modules,
 | 
				
			||||||
 | 
					   to be pre-loaded by an initrd)
 | 
				
			||||||
 | 
					C) Possibly a conflict in console= setup --> initial console unavailable.
 | 
				
			||||||
 | 
					   E.g. some serial consoles are unreliable due to serial IRQ issues (e.g.
 | 
				
			||||||
 | 
					   missing interrupt-based configuration).
 | 
				
			||||||
 | 
					   Try using a different console= device or e.g. netconsole= .
 | 
				
			||||||
 | 
					D) e.g. required library dependencies of the init binary such as
 | 
				
			||||||
 | 
					   /lib/ld-linux.so.2 missing or broken. Use readelf -d <INIT>|grep NEEDED
 | 
				
			||||||
 | 
					   to find out which libraries are required.
 | 
				
			||||||
 | 
					E) make sure the binary's architecture matches your hardware.
 | 
				
			||||||
 | 
					   E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM hardware.
 | 
				
			||||||
 | 
					   In case you tried loading a non-binary file here (shell script?),
 | 
				
			||||||
 | 
					   you should make sure that the script specifies an interpreter in its shebang
 | 
				
			||||||
 | 
					   header line (#!/...) that is fully working (including its library
 | 
				
			||||||
 | 
					   dependencies). And before tackling scripts, better first test a simple
 | 
				
			||||||
 | 
					   non-script binary such as /bin/sh and confirm its successful execution.
 | 
				
			||||||
 | 
					   To find out more, add code to init/main.c to display kernel_execve()s
 | 
				
			||||||
 | 
					   return values.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Please extend this explanation whenever you find new failure causes
 | 
				
			||||||
 | 
					(after all loading the init binary is a CRITICAL and hard transition step
 | 
				
			||||||
 | 
					which needs to be made as painless as possible), then submit patch to LKML.
 | 
				
			||||||
 | 
					Further TODOs:
 | 
				
			||||||
 | 
					- Implement the various run_init_process() invocations via a struct array
 | 
				
			||||||
 | 
					  which can then store the kernel_execve() result value and on failure
 | 
				
			||||||
 | 
					  log it all by iterating over _all_ results (very important usability fix).
 | 
				
			||||||
 | 
					- try to make the implementation itself more helpful in general,
 | 
				
			||||||
 | 
					  e.g. by providing additional error messages at affected places.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Andreas Mohr <andi at lisas period de>
 | 
				
			||||||
| 
						 | 
					@ -75,7 +75,7 @@ and the number of steps or will clamp at the maximum and zero depending on
 | 
				
			||||||
the configuration.
 | 
					the configuration.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Because GPIO to IRQ mapping is platform specific, this information must
 | 
					Because GPIO to IRQ mapping is platform specific, this information must
 | 
				
			||||||
be given in seperately to the driver. See the example below.
 | 
					be given in separately to the driver. See the example below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
---------<snip>---------
 | 
					---------<snip>---------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -291,6 +291,7 @@ Code  Seq#(hex)	Include File		Comments
 | 
				
			||||||
0x92	00-0F	drivers/usb/mon/mon_bin.c
 | 
					0x92	00-0F	drivers/usb/mon/mon_bin.c
 | 
				
			||||||
0x93	60-7F	linux/auto_fs.h
 | 
					0x93	60-7F	linux/auto_fs.h
 | 
				
			||||||
0x94	all	fs/btrfs/ioctl.h
 | 
					0x94	all	fs/btrfs/ioctl.h
 | 
				
			||||||
 | 
					0x97	00-7F	fs/ceph/ioctl.h		Ceph file system
 | 
				
			||||||
0x99	00-0F				537-Addinboard driver
 | 
					0x99	00-0F				537-Addinboard driver
 | 
				
			||||||
					<mailto:buk@buks.ipn.de>
 | 
										<mailto:buk@buks.ipn.de>
 | 
				
			||||||
0xA0	all	linux/sdp/sdp.h		Industrial Device Project
 | 
					0xA0	all	linux/sdp/sdp.h		Industrial Device Project
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -201,10 +201,6 @@ and is between 256 and 4096 characters. It is defined in the file
 | 
				
			||||||
			acpi_display_output=video
 | 
								acpi_display_output=video
 | 
				
			||||||
			See above.
 | 
								See above.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	acpi_early_pdc_eval	[HW,ACPI] Evaluate processor _PDC methods
 | 
					 | 
				
			||||||
				early. Needed on some platforms to properly
 | 
					 | 
				
			||||||
				initialize the EC.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	acpi_irq_balance [HW,ACPI]
 | 
						acpi_irq_balance [HW,ACPI]
 | 
				
			||||||
			ACPI will balance active IRQs
 | 
								ACPI will balance active IRQs
 | 
				
			||||||
			default in APIC mode
 | 
								default in APIC mode
 | 
				
			||||||
| 
						 | 
					@ -2844,6 +2840,12 @@ and is between 256 and 4096 characters. It is defined in the file
 | 
				
			||||||
			default x2apic cluster mode on platforms
 | 
								default x2apic cluster mode on platforms
 | 
				
			||||||
			supporting x2apic.
 | 
								supporting x2apic.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						x86_mrst_timer= [X86-32,APBT]
 | 
				
			||||||
 | 
								Choose timer option for x86 Moorestown MID platform.
 | 
				
			||||||
 | 
								Two valid options are apbt timer only and lapic timer
 | 
				
			||||||
 | 
								plus one apbt timer for broadcast timer.
 | 
				
			||||||
 | 
								x86_mrst_timer=apbt_only | lapic_and_apbt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xd=		[HW,XT] Original XT pre-IDE (RLL encoded) disks.
 | 
						xd=		[HW,XT] Original XT pre-IDE (RLL encoded) disks.
 | 
				
			||||||
	xd_geo=		See header of drivers/block/xd.c.
 | 
						xd_geo=		See header of drivers/block/xd.c.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -59,18 +59,20 @@ nice to have in other objects.  The C language does not allow for the
 | 
				
			||||||
direct expression of inheritance, so other techniques - such as structure
 | 
					direct expression of inheritance, so other techniques - such as structure
 | 
				
			||||||
embedding - must be used.
 | 
					embedding - must be used.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
So, for example, the UIO code has a structure that defines the memory
 | 
					(As an aside, for those familiar with the kernel linked list implementation,
 | 
				
			||||||
region associated with a uio device:
 | 
					this is analogous as to how "list_head" structs are rarely useful on
 | 
				
			||||||
 | 
					their own, but are invariably found embedded in the larger objects of
 | 
				
			||||||
 | 
					interest.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct uio_mem {
 | 
					So, for example, the UIO code in drivers/uio/uio.c has a structure that
 | 
				
			||||||
 | 
					defines the memory region associated with a uio device:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct uio_map {
 | 
				
			||||||
	struct kobject kobj;
 | 
						struct kobject kobj;
 | 
				
			||||||
	unsigned long addr;
 | 
						struct uio_mem *mem;
 | 
				
			||||||
	unsigned long size;
 | 
					    };
 | 
				
			||||||
	int memtype;
 | 
					 | 
				
			||||||
	void __iomem *internal_addr;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
If you have a struct uio_mem structure, finding its embedded kobject is
 | 
					If you have a struct uio_map structure, finding its embedded kobject is
 | 
				
			||||||
just a matter of using the kobj member.  Code that works with kobjects will
 | 
					just a matter of using the kobj member.  Code that works with kobjects will
 | 
				
			||||||
often have the opposite problem, however: given a struct kobject pointer,
 | 
					often have the opposite problem, however: given a struct kobject pointer,
 | 
				
			||||||
what is the pointer to the containing structure?  You must avoid tricks
 | 
					what is the pointer to the containing structure?  You must avoid tricks
 | 
				
			||||||
| 
						 | 
					@ -79,17 +81,34 @@ and, instead, use the container_of() macro, found in <linux/kernel.h>:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    container_of(pointer, type, member)
 | 
					    container_of(pointer, type, member)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
where pointer is the pointer to the embedded kobject, type is the type of
 | 
					where:
 | 
				
			||||||
the containing structure, and member is the name of the structure field to
 | 
					 | 
				
			||||||
which pointer points.  The return value from container_of() is a pointer to
 | 
					 | 
				
			||||||
the given type. So, for example, a pointer "kp" to a struct kobject
 | 
					 | 
				
			||||||
embedded within a struct uio_mem could be converted to a pointer to the
 | 
					 | 
				
			||||||
containing uio_mem structure with:
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    struct uio_mem *u_mem = container_of(kp, struct uio_mem, kobj);
 | 
					  * "pointer" is the pointer to the embedded kobject,
 | 
				
			||||||
 | 
					  * "type" is the type of the containing structure, and
 | 
				
			||||||
 | 
					  * "member" is the name of the structure field to which "pointer" points.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Programmers often define a simple macro for "back-casting" kobject pointers
 | 
					The return value from container_of() is a pointer to the corresponding
 | 
				
			||||||
to the containing type.
 | 
					container type. So, for example, a pointer "kp" to a struct kobject
 | 
				
			||||||
 | 
					embedded *within* a struct uio_map could be converted to a pointer to the
 | 
				
			||||||
 | 
					*containing* uio_map structure with:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For convenience, programmers often define a simple macro for "back-casting"
 | 
				
			||||||
 | 
					kobject pointers to the containing type.  Exactly this happens in the
 | 
				
			||||||
 | 
					earlier drivers/uio/uio.c, as you can see here:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct uio_map {
 | 
				
			||||||
 | 
					        struct kobject kobj;
 | 
				
			||||||
 | 
					        struct uio_mem *mem;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #define to_map(map) container_of(map, struct uio_map, kobj)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					where the macro argument "map" is a pointer to the struct kobject in
 | 
				
			||||||
 | 
					question.  That macro is subsequently invoked with:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct uio_map *map = to_map(kobj);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Initialization of kobjects
 | 
					Initialization of kobjects
 | 
				
			||||||
| 
						 | 
					@ -266,7 +285,7 @@ kobj_type:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    struct kobj_type {
 | 
					    struct kobj_type {
 | 
				
			||||||
	    void (*release)(struct kobject *);
 | 
						    void (*release)(struct kobject *);
 | 
				
			||||||
	    struct sysfs_ops	*sysfs_ops;
 | 
						    const struct sysfs_ops *sysfs_ops;
 | 
				
			||||||
	    struct attribute	**default_attrs;
 | 
						    struct attribute	**default_attrs;
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -387,4 +406,5 @@ called, and the objects in the former circle release each other.
 | 
				
			||||||
Example code to copy from
 | 
					Example code to copy from
 | 
				
			||||||
 | 
					
 | 
				
			||||||
For a more complete example of using ksets and kobjects properly, see the
 | 
					For a more complete example of using ksets and kobjects properly, see the
 | 
				
			||||||
sample/kobject/kset-example.c code.
 | 
					example programs samples/kobject/{kobject-example.c,kset-example.c},
 | 
				
			||||||
 | 
					which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,7 @@
 | 
				
			||||||
Title	: Kernel Probes (Kprobes)
 | 
					Title	: Kernel Probes (Kprobes)
 | 
				
			||||||
Authors	: Jim Keniston <jkenisto@us.ibm.com>
 | 
					Authors	: Jim Keniston <jkenisto@us.ibm.com>
 | 
				
			||||||
	: Prasanna S Panchamukhi <prasanna@in.ibm.com>
 | 
						: Prasanna S Panchamukhi <prasanna.panchamukhi@gmail.com>
 | 
				
			||||||
 | 
						: Masami Hiramatsu <mhiramat@redhat.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CONTENTS
 | 
					CONTENTS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,6 +16,7 @@ CONTENTS
 | 
				
			||||||
9. Jprobes Example
 | 
					9. Jprobes Example
 | 
				
			||||||
10. Kretprobes Example
 | 
					10. Kretprobes Example
 | 
				
			||||||
Appendix A: The kprobes debugfs interface
 | 
					Appendix A: The kprobes debugfs interface
 | 
				
			||||||
 | 
					Appendix B: The kprobes sysctl interface
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1. Concepts: Kprobes, Jprobes, Return Probes
 | 
					1. Concepts: Kprobes, Jprobes, Return Probes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,13 +44,13 @@ registration/unregistration of a group of *probes. These functions
 | 
				
			||||||
can speed up unregistration process when you have to unregister
 | 
					can speed up unregistration process when you have to unregister
 | 
				
			||||||
a lot of probes at once.
 | 
					a lot of probes at once.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The next three subsections explain how the different types of
 | 
					The next four subsections explain how the different types of
 | 
				
			||||||
probes work.  They explain certain things that you'll need to
 | 
					probes work and how jump optimization works.  They explain certain
 | 
				
			||||||
know in order to make the best use of Kprobes -- e.g., the
 | 
					things that you'll need to know in order to make the best use of
 | 
				
			||||||
difference between a pre_handler and a post_handler, and how
 | 
					Kprobes -- e.g., the difference between a pre_handler and
 | 
				
			||||||
to use the maxactive and nmissed fields of a kretprobe.  But
 | 
					a post_handler, and how to use the maxactive and nmissed fields of
 | 
				
			||||||
if you're in a hurry to start using Kprobes, you can skip ahead
 | 
					a kretprobe.  But if you're in a hurry to start using Kprobes, you
 | 
				
			||||||
to section 2.
 | 
					can skip ahead to section 2.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1.1 How Does a Kprobe Work?
 | 
					1.1 How Does a Kprobe Work?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -161,13 +163,125 @@ In case probed function is entered but there is no kretprobe_instance
 | 
				
			||||||
object available, then in addition to incrementing the nmissed count,
 | 
					object available, then in addition to incrementing the nmissed count,
 | 
				
			||||||
the user entry_handler invocation is also skipped.
 | 
					the user entry_handler invocation is also skipped.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4 How Does Jump Optimization Work?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you configured your kernel with CONFIG_OPTPROBES=y (currently
 | 
				
			||||||
 | 
					this option is supported on x86/x86-64, non-preemptive kernel) and
 | 
				
			||||||
 | 
					the "debug.kprobes_optimization" kernel parameter is set to 1 (see
 | 
				
			||||||
 | 
					sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump
 | 
				
			||||||
 | 
					instruction instead of a breakpoint instruction at each probepoint.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4.1 Init a Kprobe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When a probe is registered, before attempting this optimization,
 | 
				
			||||||
 | 
					Kprobes inserts an ordinary, breakpoint-based kprobe at the specified
 | 
				
			||||||
 | 
					address. So, even if it's not possible to optimize this particular
 | 
				
			||||||
 | 
					probepoint, there'll be a probe there.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4.2 Safety Check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Before optimizing a probe, Kprobes performs the following safety checks:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Kprobes verifies that the region that will be replaced by the jump
 | 
				
			||||||
 | 
					instruction (the "optimized region") lies entirely within one function.
 | 
				
			||||||
 | 
					(A jump instruction is multiple bytes, and so may overlay multiple
 | 
				
			||||||
 | 
					instructions.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Kprobes analyzes the entire function and verifies that there is no
 | 
				
			||||||
 | 
					jump into the optimized region.  Specifically:
 | 
				
			||||||
 | 
					  - the function contains no indirect jump;
 | 
				
			||||||
 | 
					  - the function contains no instruction that causes an exception (since
 | 
				
			||||||
 | 
					  the fixup code triggered by the exception could jump back into the
 | 
				
			||||||
 | 
					  optimized region -- Kprobes checks the exception tables to verify this);
 | 
				
			||||||
 | 
					  and
 | 
				
			||||||
 | 
					  - there is no near jump to the optimized region (other than to the first
 | 
				
			||||||
 | 
					  byte).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- For each instruction in the optimized region, Kprobes verifies that
 | 
				
			||||||
 | 
					the instruction can be executed out of line.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4.3 Preparing Detour Buffer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Next, Kprobes prepares a "detour" buffer, which contains the following
 | 
				
			||||||
 | 
					instruction sequence:
 | 
				
			||||||
 | 
					- code to push the CPU's registers (emulating a breakpoint trap)
 | 
				
			||||||
 | 
					- a call to the trampoline code which calls user's probe handlers.
 | 
				
			||||||
 | 
					- code to restore registers
 | 
				
			||||||
 | 
					- the instructions from the optimized region
 | 
				
			||||||
 | 
					- a jump back to the original execution path.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4.4 Pre-optimization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					After preparing the detour buffer, Kprobes verifies that none of the
 | 
				
			||||||
 | 
					following situations exist:
 | 
				
			||||||
 | 
					- The probe has either a break_handler (i.e., it's a jprobe) or a
 | 
				
			||||||
 | 
					post_handler.
 | 
				
			||||||
 | 
					- Other instructions in the optimized region are probed.
 | 
				
			||||||
 | 
					- The probe is disabled.
 | 
				
			||||||
 | 
					In any of the above cases, Kprobes won't start optimizing the probe.
 | 
				
			||||||
 | 
					Since these are temporary situations, Kprobes tries to start
 | 
				
			||||||
 | 
					optimizing it again if the situation is changed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the kprobe can be optimized, Kprobes enqueues the kprobe to an
 | 
				
			||||||
 | 
					optimizing list, and kicks the kprobe-optimizer workqueue to optimize
 | 
				
			||||||
 | 
					it.  If the to-be-optimized probepoint is hit before being optimized,
 | 
				
			||||||
 | 
					Kprobes returns control to the original instruction path by setting
 | 
				
			||||||
 | 
					the CPU's instruction pointer to the copied code in the detour buffer
 | 
				
			||||||
 | 
					-- thus at least avoiding the single-step.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4.5 Optimization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The Kprobe-optimizer doesn't insert the jump instruction immediately;
 | 
				
			||||||
 | 
					rather, it calls synchronize_sched() for safety first, because it's
 | 
				
			||||||
 | 
					possible for a CPU to be interrupted in the middle of executing the
 | 
				
			||||||
 | 
					optimized region(*).  As you know, synchronize_sched() can ensure
 | 
				
			||||||
 | 
					that all interruptions that were active when synchronize_sched()
 | 
				
			||||||
 | 
					was called are done, but only if CONFIG_PREEMPT=n.  So, this version
 | 
				
			||||||
 | 
					of kprobe optimization supports only kernels with CONFIG_PREEMPT=n.(**)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					After that, the Kprobe-optimizer calls stop_machine() to replace
 | 
				
			||||||
 | 
					the optimized region with a jump instruction to the detour buffer,
 | 
				
			||||||
 | 
					using text_poke_smp().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1.4.6 Unoptimization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When an optimized kprobe is unregistered, disabled, or blocked by
 | 
				
			||||||
 | 
					another kprobe, it will be unoptimized.  If this happens before
 | 
				
			||||||
 | 
					the optimization is complete, the kprobe is just dequeued from the
 | 
				
			||||||
 | 
					optimized list.  If the optimization has been done, the jump is
 | 
				
			||||||
 | 
					replaced with the original code (except for an int3 breakpoint in
 | 
				
			||||||
 | 
					the first byte) by using text_poke_smp().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(*)Please imagine that the 2nd instruction is interrupted and then
 | 
				
			||||||
 | 
					the optimizer replaces the 2nd instruction with the jump *address*
 | 
				
			||||||
 | 
					while the interrupt handler is running. When the interrupt
 | 
				
			||||||
 | 
					returns to original address, there is no valid instruction,
 | 
				
			||||||
 | 
					and it causes an unexpected result.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(**)This optimization-safety checking may be replaced with the
 | 
				
			||||||
 | 
					stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
 | 
				
			||||||
 | 
					kernel.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NOTE for geeks:
 | 
				
			||||||
 | 
					The jump optimization changes the kprobe's pre_handler behavior.
 | 
				
			||||||
 | 
					Without optimization, the pre_handler can change the kernel's execution
 | 
				
			||||||
 | 
					path by changing regs->ip and returning 1.  However, when the probe
 | 
				
			||||||
 | 
					is optimized, that modification is ignored.  Thus, if you want to
 | 
				
			||||||
 | 
					tweak the kernel's execution path, you need to suppress optimization,
 | 
				
			||||||
 | 
					using one of the following techniques:
 | 
				
			||||||
 | 
					- Specify an empty function for the kprobe's post_handler or break_handler.
 | 
				
			||||||
 | 
					 or
 | 
				
			||||||
 | 
					- Config CONFIG_OPTPROBES=n.
 | 
				
			||||||
 | 
					 or
 | 
				
			||||||
 | 
					- Execute 'sysctl -w debug.kprobes_optimization=n'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2. Architectures Supported
 | 
					2. Architectures Supported
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Kprobes, jprobes, and return probes are implemented on the following
 | 
					Kprobes, jprobes, and return probes are implemented on the following
 | 
				
			||||||
architectures:
 | 
					architectures:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- i386
 | 
					- i386 (Supports jump optimization)
 | 
				
			||||||
- x86_64 (AMD-64, EM64T)
 | 
					- x86_64 (AMD-64, EM64T) (Supports jump optimization)
 | 
				
			||||||
- ppc64
 | 
					- ppc64
 | 
				
			||||||
- ia64 (Does not support probes on instruction slot1.)
 | 
					- ia64 (Does not support probes on instruction slot1.)
 | 
				
			||||||
- sparc64 (Return probes not yet implemented.)
 | 
					- sparc64 (Return probes not yet implemented.)
 | 
				
			||||||
| 
						 | 
					@ -193,6 +307,10 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
 | 
				
			||||||
so you can use "objdump -d -l vmlinux" to see the source-to-object
 | 
					so you can use "objdump -d -l vmlinux" to see the source-to-object
 | 
				
			||||||
code mapping.
 | 
					code mapping.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you want to reduce probing overhead, set "Kprobes jump optimization
 | 
				
			||||||
 | 
					support" (CONFIG_OPTPROBES) to "y". You can find this option under the
 | 
				
			||||||
 | 
					"Kprobes" line.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
4. API Reference
 | 
					4. API Reference
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The Kprobes API includes a "register" function and an "unregister"
 | 
					The Kprobes API includes a "register" function and an "unregister"
 | 
				
			||||||
| 
						 | 
					@ -389,7 +507,10 @@ the probe which has been registered.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Kprobes allows multiple probes at the same address.  Currently,
 | 
					Kprobes allows multiple probes at the same address.  Currently,
 | 
				
			||||||
however, there cannot be multiple jprobes on the same function at
 | 
					however, there cannot be multiple jprobes on the same function at
 | 
				
			||||||
the same time.
 | 
					the same time.  Also, a probepoint for which there is a jprobe or
 | 
				
			||||||
 | 
					a post_handler cannot be optimized.  So if you install a jprobe,
 | 
				
			||||||
 | 
					or a kprobe with a post_handler, at an optimized probepoint, the
 | 
				
			||||||
 | 
					probepoint will be unoptimized automatically.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
In general, you can install a probe anywhere in the kernel.
 | 
					In general, you can install a probe anywhere in the kernel.
 | 
				
			||||||
In particular, you can probe interrupt handlers.  Known exceptions
 | 
					In particular, you can probe interrupt handlers.  Known exceptions
 | 
				
			||||||
| 
						 | 
					@ -453,6 +574,38 @@ reason, Kprobes doesn't support return probes (or kprobes or jprobes)
 | 
				
			||||||
on the x86_64 version of __switch_to(); the registration functions
 | 
					on the x86_64 version of __switch_to(); the registration functions
 | 
				
			||||||
return -EINVAL.
 | 
					return -EINVAL.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					On x86/x86-64, since the Jump Optimization of Kprobes modifies
 | 
				
			||||||
 | 
					instructions widely, there are some limitations to optimization. To
 | 
				
			||||||
 | 
					explain it, we introduce some terminology. Imagine a 3-instruction
 | 
				
			||||||
 | 
					sequence consisting of a two 2-byte instructions and one 3-byte
 | 
				
			||||||
 | 
					instruction.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        IA
 | 
				
			||||||
 | 
					         |
 | 
				
			||||||
 | 
					[-2][-1][0][1][2][3][4][5][6][7]
 | 
				
			||||||
 | 
					        [ins1][ins2][  ins3 ]
 | 
				
			||||||
 | 
						[<-     DCR       ->]
 | 
				
			||||||
 | 
						   [<- JTPR ->]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ins1: 1st Instruction
 | 
				
			||||||
 | 
					ins2: 2nd Instruction
 | 
				
			||||||
 | 
					ins3: 3rd Instruction
 | 
				
			||||||
 | 
					IA:  Insertion Address
 | 
				
			||||||
 | 
					JTPR: Jump Target Prohibition Region
 | 
				
			||||||
 | 
					DCR: Detoured Code Region
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The instructions in DCR are copied to the out-of-line buffer
 | 
				
			||||||
 | 
					of the kprobe, because the bytes in DCR are replaced by
 | 
				
			||||||
 | 
					a 5-byte jump instruction. So there are several limitations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					a) The instructions in DCR must be relocatable.
 | 
				
			||||||
 | 
					b) The instructions in DCR must not include a call instruction.
 | 
				
			||||||
 | 
					c) JTPR must not be targeted by any jump or call instruction.
 | 
				
			||||||
 | 
					d) DCR must not straddle the border betweeen functions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Anyway, these limitations are checked by the in-kernel instruction
 | 
				
			||||||
 | 
					decoder, so you don't need to worry about that.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
6. Probe Overhead
 | 
					6. Probe Overhead
 | 
				
			||||||
 | 
					
 | 
				
			||||||
On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0
 | 
					On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0
 | 
				
			||||||
| 
						 | 
					@ -476,6 +629,19 @@ k = 0.49 usec; j = 0.76; r = 0.80; kr = 0.82; jr = 1.07
 | 
				
			||||||
ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
 | 
					ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
 | 
				
			||||||
k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
 | 
					k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					6.1 Optimized Probe Overhead
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Typically, an optimized kprobe hit takes 0.07 to 0.1 microseconds to
 | 
				
			||||||
 | 
					process. Here are sample overhead figures (in usec) for x86 architectures.
 | 
				
			||||||
 | 
					k = unoptimized kprobe, b = boosted (single-step skipped), o = optimized kprobe,
 | 
				
			||||||
 | 
					r = unoptimized kretprobe, rb = boosted kretprobe, ro = optimized kretprobe.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					i386: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
 | 
				
			||||||
 | 
					k = 0.80 usec; b = 0.33; o = 0.05; r = 1.10; rb = 0.61; ro = 0.33
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					x86-64: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
 | 
				
			||||||
 | 
					k = 0.99 usec; b = 0.43; o = 0.06; r = 1.24; rb = 0.68; ro = 0.30
 | 
				
			||||||
 | 
					
 | 
				
			||||||
7. TODO
 | 
					7. TODO
 | 
				
			||||||
 | 
					
 | 
				
			||||||
a. SystemTap (http://sourceware.org/systemtap): Provides a simplified
 | 
					a. SystemTap (http://sourceware.org/systemtap): Provides a simplified
 | 
				
			||||||
| 
						 | 
					@ -523,7 +689,8 @@ is also specified. Following columns show probe status. If the probe is on
 | 
				
			||||||
a virtual address that is no longer valid (module init sections, module
 | 
					a virtual address that is no longer valid (module init sections, module
 | 
				
			||||||
virtual addresses that correspond to modules that've been unloaded),
 | 
					virtual addresses that correspond to modules that've been unloaded),
 | 
				
			||||||
such probes are marked with [GONE]. If the probe is temporarily disabled,
 | 
					such probes are marked with [GONE]. If the probe is temporarily disabled,
 | 
				
			||||||
such probes are marked with [DISABLED].
 | 
					such probes are marked with [DISABLED]. If the probe is optimized, it is
 | 
				
			||||||
 | 
					marked with [OPTIMIZED].
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
 | 
					/sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -533,3 +700,19 @@ registered probes will be disarmed, till such time a "1" is echoed to this
 | 
				
			||||||
file. Note that this knob just disarms and arms all kprobes and doesn't
 | 
					file. Note that this knob just disarms and arms all kprobes and doesn't
 | 
				
			||||||
change each probe's disabling state. This means that disabled kprobes (marked
 | 
					change each probe's disabling state. This means that disabled kprobes (marked
 | 
				
			||||||
[DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
 | 
					[DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Appendix B: The kprobes sysctl interface
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/proc/sys/debug/kprobes-optimization: Turn kprobes optimization ON/OFF.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When CONFIG_OPTPROBES=y, this sysctl interface appears and it provides
 | 
				
			||||||
 | 
					a knob to globally and forcibly turn jump optimization (see section
 | 
				
			||||||
 | 
					1.4) ON or OFF. By default, jump optimization is allowed (ON).
 | 
				
			||||||
 | 
					If you echo "0" to this file or set "debug.kprobes_optimization" to
 | 
				
			||||||
 | 
					0 via sysctl, all optimized probes will be unoptimized, and any new
 | 
				
			||||||
 | 
					probes registered after that will not be optimized.  Note that this
 | 
				
			||||||
 | 
					knob *changes* the optimized state. This means that optimized probes
 | 
				
			||||||
 | 
					(marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
 | 
				
			||||||
 | 
					removed). If the knob is turned on, they will be optimized again.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,12 +23,12 @@ of a virtual machine.  The ioctls belong to three classes
 | 
				
			||||||
   Only run vcpu ioctls from the same thread that was used to create the
 | 
					   Only run vcpu ioctls from the same thread that was used to create the
 | 
				
			||||||
   vcpu.
 | 
					   vcpu.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2. File descritpors
 | 
					2. File descriptors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The kvm API is centered around file descriptors.  An initial
 | 
					The kvm API is centered around file descriptors.  An initial
 | 
				
			||||||
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
 | 
					open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
 | 
				
			||||||
can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
 | 
					can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
 | 
				
			||||||
handle will create a VM file descripror which can be used to issue VM
 | 
					handle will create a VM file descriptor which can be used to issue VM
 | 
				
			||||||
ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
 | 
					ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
 | 
				
			||||||
and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
 | 
					and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
 | 
				
			||||||
fd can be used to control the vcpu, including the important task of
 | 
					fd can be used to control the vcpu, including the important task of
 | 
				
			||||||
| 
						 | 
					@ -643,7 +643,7 @@ Type: vm ioctl
 | 
				
			||||||
Parameters: struct kvm_clock_data (in)
 | 
					Parameters: struct kvm_clock_data (in)
 | 
				
			||||||
Returns: 0 on success, -1 on error
 | 
					Returns: 0 on success, -1 on error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Sets the current timestamp of kvmclock to the valued specific in its parameter.
 | 
					Sets the current timestamp of kvmclock to the value specified in its parameter.
 | 
				
			||||||
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
 | 
					In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
 | 
				
			||||||
such as migration.
 | 
					such as migration.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -795,11 +795,11 @@ Unused.
 | 
				
			||||||
			__u64 data_offset; /* relative to kvm_run start */
 | 
								__u64 data_offset; /* relative to kvm_run start */
 | 
				
			||||||
		} io;
 | 
							} io;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
 | 
					If exit_reason is KVM_EXIT_IO, then the vcpu has
 | 
				
			||||||
executed a port I/O instruction which could not be satisfied by kvm.
 | 
					executed a port I/O instruction which could not be satisfied by kvm.
 | 
				
			||||||
data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
 | 
					data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
 | 
				
			||||||
where kvm expects application code to place the data for the next
 | 
					where kvm expects application code to place the data for the next
 | 
				
			||||||
KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a patcked array.
 | 
					KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		struct {
 | 
							struct {
 | 
				
			||||||
			struct kvm_debug_exit_arch arch;
 | 
								struct kvm_debug_exit_arch arch;
 | 
				
			||||||
| 
						 | 
					@ -815,7 +815,7 @@ Unused.
 | 
				
			||||||
			__u8  is_write;
 | 
								__u8  is_write;
 | 
				
			||||||
		} mmio;
 | 
							} mmio;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
 | 
					If exit_reason is KVM_EXIT_MMIO, then the vcpu has
 | 
				
			||||||
executed a memory-mapped I/O instruction which could not be satisfied
 | 
					executed a memory-mapped I/O instruction which could not be satisfied
 | 
				
			||||||
by kvm.  The 'data' member contains the written data if 'is_write' is
 | 
					by kvm.  The 'data' member contains the written data if 'is_write' is
 | 
				
			||||||
true, and should be filled by application code otherwise.
 | 
					true, and should be filled by application code otherwise.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,6 +2,12 @@
 | 
				
			||||||
	- This file
 | 
						- This file
 | 
				
			||||||
acer-wmi.txt
 | 
					acer-wmi.txt
 | 
				
			||||||
	- information on the Acer Laptop WMI Extras driver.
 | 
						- information on the Acer Laptop WMI Extras driver.
 | 
				
			||||||
 | 
					asus-laptop.txt
 | 
				
			||||||
 | 
						- information on the Asus Laptop Extras driver.
 | 
				
			||||||
 | 
					disk-shock-protection.txt
 | 
				
			||||||
 | 
						- information on hard disk shock protection.
 | 
				
			||||||
 | 
					dslm.c
 | 
				
			||||||
 | 
						- Simple Disk Sleep Monitor program
 | 
				
			||||||
laptop-mode.txt
 | 
					laptop-mode.txt
 | 
				
			||||||
	- how to conserve battery power using laptop-mode.
 | 
						- how to conserve battery power using laptop-mode.
 | 
				
			||||||
sony-laptop.txt
 | 
					sony-laptop.txt
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										8
									
								
								Documentation/laptops/Makefile
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								Documentation/laptops/Makefile
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,8 @@
 | 
				
			||||||
 | 
					# kbuild trick to avoid linker error. Can be omitted if a module is built.
 | 
				
			||||||
 | 
					obj- := dummy.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# List of programs to build
 | 
				
			||||||
 | 
					hostprogs-y := dslm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Tell kbuild to always build the programs
 | 
				
			||||||
 | 
					always := $(hostprogs-y)
 | 
				
			||||||
							
								
								
									
										166
									
								
								Documentation/laptops/dslm.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								Documentation/laptops/dslm.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,166 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * dslm.c
 | 
				
			||||||
 | 
					 * Simple Disk Sleep Monitor
 | 
				
			||||||
 | 
					 *  by Bartek Kania
 | 
				
			||||||
 | 
					 * Licenced under the GPL
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <fcntl.h>
 | 
				
			||||||
 | 
					#include <errno.h>
 | 
				
			||||||
 | 
					#include <time.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <signal.h>
 | 
				
			||||||
 | 
					#include <sys/ioctl.h>
 | 
				
			||||||
 | 
					#include <linux/hdreg.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef DEBUG
 | 
				
			||||||
 | 
					#define D(x) x
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define D(x)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int endit = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Check if the disk is in powersave-mode
 | 
				
			||||||
 | 
					 * Most of the code is stolen from hdparm.
 | 
				
			||||||
 | 
					 * 1 = active, 0 = standby/sleep, -1 = unknown */
 | 
				
			||||||
 | 
					static int check_powermode(int fd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0};
 | 
				
			||||||
 | 
					    int state;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (ioctl(fd, HDIO_DRIVE_CMD, &args)
 | 
				
			||||||
 | 
						&& (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */
 | 
				
			||||||
 | 
						&& ioctl(fd, HDIO_DRIVE_CMD, &args)) {
 | 
				
			||||||
 | 
						if (errno != EIO || args[0] != 0 || args[1] != 0) {
 | 
				
			||||||
 | 
						    state = -1; /* "unknown"; */
 | 
				
			||||||
 | 
						} else
 | 
				
			||||||
 | 
						    state = 0; /* "sleeping"; */
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
						state = (args[2] == 255) ? 1 : 0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    D(printf(" drive state is:  %d\n", state));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return state;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static char *state_name(int i)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (i == -1) return "unknown";
 | 
				
			||||||
 | 
					    if (i == 0) return "sleeping";
 | 
				
			||||||
 | 
					    if (i == 1) return "active";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return "internal error";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static char *myctime(time_t time)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    char *ts = ctime(&time);
 | 
				
			||||||
 | 
					    ts[strlen(ts) - 1] = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return ts;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void measure(int fd)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    time_t start_time;
 | 
				
			||||||
 | 
					    int last_state;
 | 
				
			||||||
 | 
					    time_t last_time;
 | 
				
			||||||
 | 
					    int curr_state;
 | 
				
			||||||
 | 
					    time_t curr_time = 0;
 | 
				
			||||||
 | 
					    time_t time_diff;
 | 
				
			||||||
 | 
					    time_t active_time = 0;
 | 
				
			||||||
 | 
					    time_t sleep_time = 0;
 | 
				
			||||||
 | 
					    time_t unknown_time = 0;
 | 
				
			||||||
 | 
					    time_t total_time = 0;
 | 
				
			||||||
 | 
					    int changes = 0;
 | 
				
			||||||
 | 
					    float tmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    printf("Starting measurements\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    last_state = check_powermode(fd);
 | 
				
			||||||
 | 
					    start_time = last_time = time(0);
 | 
				
			||||||
 | 
					    printf("  System is in state %s\n\n", state_name(last_state));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while(!endit) {
 | 
				
			||||||
 | 
						sleep(1);
 | 
				
			||||||
 | 
						curr_state = check_powermode(fd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (curr_state != last_state || endit) {
 | 
				
			||||||
 | 
						    changes++;
 | 
				
			||||||
 | 
						    curr_time = time(0);
 | 
				
			||||||
 | 
						    time_diff = curr_time - last_time;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    if (last_state == 1) active_time += time_diff;
 | 
				
			||||||
 | 
						    else if (last_state == 0) sleep_time += time_diff;
 | 
				
			||||||
 | 
						    else unknown_time += time_diff;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    last_state = curr_state;
 | 
				
			||||||
 | 
						    last_time = curr_time;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						    printf("%s: State-change to %s\n", myctime(curr_time),
 | 
				
			||||||
 | 
							   state_name(curr_state));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    changes--; /* Compensate for SIGINT */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    total_time = time(0) - start_time;
 | 
				
			||||||
 | 
					    printf("\nTotal running time:  %lus\n", curr_time - start_time);
 | 
				
			||||||
 | 
					    printf(" State changed %d times\n", changes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    tmp = (float)sleep_time / (float)total_time * 100;
 | 
				
			||||||
 | 
					    printf(" Time in sleep state:   %lus (%.2f%%)\n", sleep_time, tmp);
 | 
				
			||||||
 | 
					    tmp = (float)active_time / (float)total_time * 100;
 | 
				
			||||||
 | 
					    printf(" Time in active state:  %lus (%.2f%%)\n", active_time, tmp);
 | 
				
			||||||
 | 
					    tmp = (float)unknown_time / (float)total_time * 100;
 | 
				
			||||||
 | 
					    printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ender(int s)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    endit = 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void usage(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    puts("usage: dslm [-w <time>] <disk>");
 | 
				
			||||||
 | 
					    exit(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int fd;
 | 
				
			||||||
 | 
					    char *disk = 0;
 | 
				
			||||||
 | 
					    int settle_time = 60;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Parse the simple command-line */
 | 
				
			||||||
 | 
					    if (argc == 2)
 | 
				
			||||||
 | 
						disk = argv[1];
 | 
				
			||||||
 | 
					    else if (argc == 4) {
 | 
				
			||||||
 | 
						settle_time = atoi(argv[2]);
 | 
				
			||||||
 | 
						disk = argv[3];
 | 
				
			||||||
 | 
					    } else
 | 
				
			||||||
 | 
						usage();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) {
 | 
				
			||||||
 | 
						printf("Can't open %s, because: %s\n", disk, strerror(errno));
 | 
				
			||||||
 | 
						exit(-1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (settle_time) {
 | 
				
			||||||
 | 
						printf("Waiting %d seconds for the system to settle down to "
 | 
				
			||||||
 | 
						       "'normal'\n", settle_time);
 | 
				
			||||||
 | 
						sleep(settle_time);
 | 
				
			||||||
 | 
					    } else
 | 
				
			||||||
 | 
						puts("Not waiting for system to settle down");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    signal(SIGINT, ender);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    measure(fd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    close(fd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -779,172 +779,4 @@ Monitoring tool
 | 
				
			||||||
---------------
 | 
					---------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Bartek Kania submitted this, it can be used to measure how much time your disk
 | 
					Bartek Kania submitted this, it can be used to measure how much time your disk
 | 
				
			||||||
spends spun up/down.
 | 
					spends spun up/down.  See Documentation/laptops/dslm.c
 | 
				
			||||||
 | 
					 | 
				
			||||||
---------------------------dslm.c BEGIN-----------------------------------------
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Simple Disk Sleep Monitor
 | 
					 | 
				
			||||||
 *  by Bartek Kania
 | 
					 | 
				
			||||||
 * Licenced under the GPL
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <fcntl.h>
 | 
					 | 
				
			||||||
#include <errno.h>
 | 
					 | 
				
			||||||
#include <time.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
#include <signal.h>
 | 
					 | 
				
			||||||
#include <sys/ioctl.h>
 | 
					 | 
				
			||||||
#include <linux/hdreg.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef DEBUG
 | 
					 | 
				
			||||||
#define D(x) x
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
#define D(x)
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int endit = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Check if the disk is in powersave-mode
 | 
					 | 
				
			||||||
 * Most of the code is stolen from hdparm.
 | 
					 | 
				
			||||||
 * 1 = active, 0 = standby/sleep, -1 = unknown */
 | 
					 | 
				
			||||||
int check_powermode(int fd)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0};
 | 
					 | 
				
			||||||
    int state;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (ioctl(fd, HDIO_DRIVE_CMD, &args)
 | 
					 | 
				
			||||||
	&& (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */
 | 
					 | 
				
			||||||
	&& ioctl(fd, HDIO_DRIVE_CMD, &args)) {
 | 
					 | 
				
			||||||
	if (errno != EIO || args[0] != 0 || args[1] != 0) {
 | 
					 | 
				
			||||||
	    state = -1; /* "unknown"; */
 | 
					 | 
				
			||||||
	} else
 | 
					 | 
				
			||||||
	    state = 0; /* "sleeping"; */
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
	state = (args[2] == 255) ? 1 : 0;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    D(printf(" drive state is:  %d\n", state));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return state;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
char *state_name(int i)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    if (i == -1) return "unknown";
 | 
					 | 
				
			||||||
    if (i == 0) return "sleeping";
 | 
					 | 
				
			||||||
    if (i == 1) return "active";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return "internal error";
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
char *myctime(time_t time)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    char *ts = ctime(&time);
 | 
					 | 
				
			||||||
    ts[strlen(ts) - 1] = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return ts;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void measure(int fd)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    time_t start_time;
 | 
					 | 
				
			||||||
    int last_state;
 | 
					 | 
				
			||||||
    time_t last_time;
 | 
					 | 
				
			||||||
    int curr_state;
 | 
					 | 
				
			||||||
    time_t curr_time = 0;
 | 
					 | 
				
			||||||
    time_t time_diff;
 | 
					 | 
				
			||||||
    time_t active_time = 0;
 | 
					 | 
				
			||||||
    time_t sleep_time = 0;
 | 
					 | 
				
			||||||
    time_t unknown_time = 0;
 | 
					 | 
				
			||||||
    time_t total_time = 0;
 | 
					 | 
				
			||||||
    int changes = 0;
 | 
					 | 
				
			||||||
    float tmp;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    printf("Starting measurements\n");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    last_state = check_powermode(fd);
 | 
					 | 
				
			||||||
    start_time = last_time = time(0);
 | 
					 | 
				
			||||||
    printf("  System is in state %s\n\n", state_name(last_state));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    while(!endit) {
 | 
					 | 
				
			||||||
	sleep(1);
 | 
					 | 
				
			||||||
	curr_state = check_powermode(fd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (curr_state != last_state || endit) {
 | 
					 | 
				
			||||||
	    changes++;
 | 
					 | 
				
			||||||
	    curr_time = time(0);
 | 
					 | 
				
			||||||
	    time_diff = curr_time - last_time;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    if (last_state == 1) active_time += time_diff;
 | 
					 | 
				
			||||||
	    else if (last_state == 0) sleep_time += time_diff;
 | 
					 | 
				
			||||||
	    else unknown_time += time_diff;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    last_state = curr_state;
 | 
					 | 
				
			||||||
	    last_time = curr_time;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	    printf("%s: State-change to %s\n", myctime(curr_time),
 | 
					 | 
				
			||||||
		   state_name(curr_state));
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    changes--; /* Compensate for SIGINT */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    total_time = time(0) - start_time;
 | 
					 | 
				
			||||||
    printf("\nTotal running time:  %lus\n", curr_time - start_time);
 | 
					 | 
				
			||||||
    printf(" State changed %d times\n", changes);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    tmp = (float)sleep_time / (float)total_time * 100;
 | 
					 | 
				
			||||||
    printf(" Time in sleep state:   %lus (%.2f%%)\n", sleep_time, tmp);
 | 
					 | 
				
			||||||
    tmp = (float)active_time / (float)total_time * 100;
 | 
					 | 
				
			||||||
    printf(" Time in active state:  %lus (%.2f%%)\n", active_time, tmp);
 | 
					 | 
				
			||||||
    tmp = (float)unknown_time / (float)total_time * 100;
 | 
					 | 
				
			||||||
    printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void ender(int s)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    endit = 1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void usage()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    puts("usage: dslm [-w <time>] <disk>");
 | 
					 | 
				
			||||||
    exit(0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(int argc, char **argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    int fd;
 | 
					 | 
				
			||||||
    char *disk = 0;
 | 
					 | 
				
			||||||
    int settle_time = 60;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /* Parse the simple command-line */
 | 
					 | 
				
			||||||
    if (argc == 2)
 | 
					 | 
				
			||||||
	disk = argv[1];
 | 
					 | 
				
			||||||
    else if (argc == 4) {
 | 
					 | 
				
			||||||
	settle_time = atoi(argv[2]);
 | 
					 | 
				
			||||||
	disk = argv[3];
 | 
					 | 
				
			||||||
    } else
 | 
					 | 
				
			||||||
	usage();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) {
 | 
					 | 
				
			||||||
	printf("Can't open %s, because: %s\n", disk, strerror(errno));
 | 
					 | 
				
			||||||
	exit(-1);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (settle_time) {
 | 
					 | 
				
			||||||
	printf("Waiting %d seconds for the system to settle down to "
 | 
					 | 
				
			||||||
	       "'normal'\n", settle_time);
 | 
					 | 
				
			||||||
	sleep(settle_time);
 | 
					 | 
				
			||||||
    } else
 | 
					 | 
				
			||||||
	puts("Not waiting for system to settle down");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    signal(SIGINT, ender);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    measure(fd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    close(fd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
---------------------------dslm.c END-------------------------------------------
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,6 +3,7 @@
 | 
				
			||||||
			 ============================
 | 
								 ============================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
By: David Howells <dhowells@redhat.com>
 | 
					By: David Howells <dhowells@redhat.com>
 | 
				
			||||||
 | 
					    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Contents:
 | 
					Contents:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -60,6 +61,10 @@ Contents:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
     - And then there's the Alpha.
 | 
					     - And then there's the Alpha.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 (*) Example uses.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     - Circular buffers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 (*) References.
 | 
					 (*) References.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2226,6 +2231,21 @@ The Alpha defines the Linux kernel's memory barrier model.
 | 
				
			||||||
See the subsection on "Cache Coherency" above.
 | 
					See the subsection on "Cache Coherency" above.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					============
 | 
				
			||||||
 | 
					EXAMPLE USES
 | 
				
			||||||
 | 
					============
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CIRCULAR BUFFERS
 | 
				
			||||||
 | 
					----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Memory barriers can be used to implement circular buffering without the need
 | 
				
			||||||
 | 
					of a lock to serialise the producer with the consumer.  See:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Documentation/circular-buffers.txt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
==========
 | 
					==========
 | 
				
			||||||
REFERENCES
 | 
					REFERENCES
 | 
				
			||||||
==========
 | 
					==========
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,3 +6,5 @@ hostprogs-y := ifenslave
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Tell kbuild to always build the programs
 | 
					# Tell kbuild to always build the programs
 | 
				
			||||||
always := $(hostprogs-y)
 | 
					always := $(hostprogs-y)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					obj-m := timestamping/
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,7 +68,7 @@ Compaq adapters (not tested):
 | 
				
			||||||
=======================
 | 
					=======================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
From v2.01 on, the driver is integrated in the linux kernel sources.
 | 
					From v2.01 on, the driver is integrated in the linux kernel sources.
 | 
				
			||||||
Therefor, the installation is the same as for any other adapter
 | 
					Therefore, the installation is the same as for any other adapter
 | 
				
			||||||
supported by the kernel.
 | 
					supported by the kernel.
 | 
				
			||||||
Refer to the manual of your distribution about the installation
 | 
					Refer to the manual of your distribution about the installation
 | 
				
			||||||
of network adapters.
 | 
					of network adapters.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +1,13 @@
 | 
				
			||||||
CPPFLAGS = -I../../../include
 | 
					# kbuild trick to avoid linker error. Can be omitted if a module is built.
 | 
				
			||||||
 | 
					obj- := dummy.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
timestamping: timestamping.c
 | 
					# List of programs to build
 | 
				
			||||||
 | 
					hostprogs-y := timestamping
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Tell kbuild to always build the programs
 | 
				
			||||||
 | 
					always := $(hostprogs-y)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include
 | 
				
			||||||
 | 
					
 | 
				
			||||||
clean:
 | 
					clean:
 | 
				
			||||||
	rm -f timestamping
 | 
						rm -f timestamping
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -41,9 +41,9 @@
 | 
				
			||||||
#include <arpa/inet.h>
 | 
					#include <arpa/inet.h>
 | 
				
			||||||
#include <net/if.h>
 | 
					#include <net/if.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "asm/types.h"
 | 
					#include <asm/types.h>
 | 
				
			||||||
#include "linux/net_tstamp.h"
 | 
					#include <linux/net_tstamp.h>
 | 
				
			||||||
#include "linux/errqueue.h"
 | 
					#include <linux/errqueue.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef SO_TIMESTAMPING
 | 
					#ifndef SO_TIMESTAMPING
 | 
				
			||||||
# define SO_TIMESTAMPING         37
 | 
					# define SO_TIMESTAMPING         37
 | 
				
			||||||
| 
						 | 
					@ -164,7 +164,7 @@ static void printpacket(struct msghdr *msg, int res,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gettimeofday(&now, 0);
 | 
						gettimeofday(&now, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	printf("%ld.%06ld: received %s data, %d bytes from %s, %d bytes control messages\n",
 | 
						printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
 | 
				
			||||||
	       (long)now.tv_sec, (long)now.tv_usec,
 | 
						       (long)now.tv_sec, (long)now.tv_usec,
 | 
				
			||||||
	       (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
 | 
						       (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
 | 
				
			||||||
	       res,
 | 
						       res,
 | 
				
			||||||
| 
						 | 
					@ -173,7 +173,7 @@ static void printpacket(struct msghdr *msg, int res,
 | 
				
			||||||
	for (cmsg = CMSG_FIRSTHDR(msg);
 | 
						for (cmsg = CMSG_FIRSTHDR(msg);
 | 
				
			||||||
	     cmsg;
 | 
						     cmsg;
 | 
				
			||||||
	     cmsg = CMSG_NXTHDR(msg, cmsg)) {
 | 
						     cmsg = CMSG_NXTHDR(msg, cmsg)) {
 | 
				
			||||||
		printf("   cmsg len %d: ", cmsg->cmsg_len);
 | 
							printf("   cmsg len %zu: ", cmsg->cmsg_len);
 | 
				
			||||||
		switch (cmsg->cmsg_level) {
 | 
							switch (cmsg->cmsg_level) {
 | 
				
			||||||
		case SOL_SOCKET:
 | 
							case SOL_SOCKET:
 | 
				
			||||||
			printf("SOL_SOCKET ");
 | 
								printf("SOL_SOCKET ");
 | 
				
			||||||
| 
						 | 
					@ -370,7 +370,7 @@ int main(int argc, char **argv)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
 | 
						sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
 | 
				
			||||||
	if (socket < 0)
 | 
						if (sock < 0)
 | 
				
			||||||
		bail("socket");
 | 
							bail("socket");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	memset(&device, 0, sizeof(device));
 | 
						memset(&device, 0, sizeof(device));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -57,7 +57,7 @@ PC standard floppy disk controller
 | 
				
			||||||
# cat resources
 | 
					# cat resources
 | 
				
			||||||
DISABLED
 | 
					DISABLED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Notice the string "DISABLED".  THis means the device is not active.
 | 
					- Notice the string "DISABLED".  This means the device is not active.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
3.) check the device's possible configurations (optional)
 | 
					3.) check the device's possible configurations (optional)
 | 
				
			||||||
# cat options
 | 
					# cat options
 | 
				
			||||||
| 
						 | 
					@ -139,7 +139,7 @@ Plug and Play but it is planned to be in the near future.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Requirements for a Linux PnP protocol:
 | 
					Requirements for a Linux PnP protocol:
 | 
				
			||||||
1.) the protocol must use EISA IDs
 | 
					1.) the protocol must use EISA IDs
 | 
				
			||||||
2.) the protocol must inform the PnP Layer of a devices current configuration
 | 
					2.) the protocol must inform the PnP Layer of a device's current configuration
 | 
				
			||||||
- the ability to set resources is optional but preferred.
 | 
					- the ability to set resources is optional but preferred.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The following are PnP protocol related functions:
 | 
					The following are PnP protocol related functions:
 | 
				
			||||||
| 
						 | 
					@ -158,7 +158,7 @@ pnp_remove_device
 | 
				
			||||||
- automatically will free mem used by the device and related structures
 | 
					- automatically will free mem used by the device and related structures
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pnp_add_id
 | 
					pnp_add_id
 | 
				
			||||||
- adds a EISA ID to the list of supported IDs for the specified device
 | 
					- adds an EISA ID to the list of supported IDs for the specified device
 | 
				
			||||||
 | 
					
 | 
				
			||||||
For more information consult the source of a protocol such as
 | 
					For more information consult the source of a protocol such as
 | 
				
			||||||
/drivers/pnp/pnpbios/core.c.
 | 
					/drivers/pnp/pnpbios/core.c.
 | 
				
			||||||
| 
						 | 
					@ -167,7 +167,7 @@ For more information consult the source of a protocol such as
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Linux Plug and Play Drivers
 | 
					Linux Plug and Play Drivers
 | 
				
			||||||
---------------------------
 | 
					---------------------------
 | 
				
			||||||
	This section contains information for linux PnP driver developers.
 | 
						This section contains information for Linux PnP driver developers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The New Way
 | 
					The New Way
 | 
				
			||||||
...........
 | 
					...........
 | 
				
			||||||
| 
						 | 
					@ -235,11 +235,10 @@ static int __init serial8250_pnp_init(void)
 | 
				
			||||||
The Old Way
 | 
					The Old Way
 | 
				
			||||||
...........
 | 
					...........
 | 
				
			||||||
 | 
					
 | 
				
			||||||
a series of compatibility functions have been created to make it easy to convert 
 | 
					A series of compatibility functions have been created to make it easy to convert
 | 
				
			||||||
 | 
					 | 
				
			||||||
ISAPNP drivers.  They should serve as a temporary solution only.
 | 
					ISAPNP drivers.  They should serve as a temporary solution only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
they are as follows:
 | 
					They are as follows:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct pnp_card *pnp_find_card(unsigned short vendor,
 | 
					struct pnp_card *pnp_find_card(unsigned short vendor,
 | 
				
			||||||
				 unsigned short device,
 | 
									 unsigned short device,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -224,6 +224,12 @@ defined in include/linux/pm.h:
 | 
				
			||||||
      RPM_SUSPENDED, which means that each device is initially regarded by the
 | 
					      RPM_SUSPENDED, which means that each device is initially regarded by the
 | 
				
			||||||
      PM core as 'suspended', regardless of its real hardware status
 | 
					      PM core as 'suspended', regardless of its real hardware status
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned int runtime_auto;
 | 
				
			||||||
 | 
					    - if set, indicates that the user space has allowed the device driver to
 | 
				
			||||||
 | 
					      power manage the device at run time via the /sys/devices/.../power/control
 | 
				
			||||||
 | 
					      interface; it may only be modified with the help of the pm_runtime_allow()
 | 
				
			||||||
 | 
					      and pm_runtime_forbid() helper functions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
All of the above fields are members of the 'power' member of 'struct device'.
 | 
					All of the above fields are members of the 'power' member of 'struct device'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
4. Run-time PM Device Helper Functions
 | 
					4. Run-time PM Device Helper Functions
 | 
				
			||||||
| 
						 | 
					@ -250,7 +256,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
 | 
				
			||||||
      to suspend the device again in future
 | 
					      to suspend the device again in future
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int pm_runtime_resume(struct device *dev);
 | 
					  int pm_runtime_resume(struct device *dev);
 | 
				
			||||||
    - execute the subsystem-leve resume callback for the device; returns 0 on
 | 
					    - execute the subsystem-level resume callback for the device; returns 0 on
 | 
				
			||||||
      success, 1 if the device's run-time PM status was already 'active' or
 | 
					      success, 1 if the device's run-time PM status was already 'active' or
 | 
				
			||||||
      error code on failure, where -EAGAIN means it may be safe to attempt to
 | 
					      error code on failure, where -EAGAIN means it may be safe to attempt to
 | 
				
			||||||
      resume the device again in future, but 'power.runtime_error' should be
 | 
					      resume the device again in future, but 'power.runtime_error' should be
 | 
				
			||||||
| 
						 | 
					@ -329,6 +335,20 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
 | 
				
			||||||
      'power.runtime_error' is set or 'power.disable_depth' is greater than
 | 
					      'power.runtime_error' is set or 'power.disable_depth' is greater than
 | 
				
			||||||
      zero)
 | 
					      zero)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bool pm_runtime_suspended(struct device *dev);
 | 
				
			||||||
 | 
					    - return true if the device's runtime PM status is 'suspended', or false
 | 
				
			||||||
 | 
					      otherwise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void pm_runtime_allow(struct device *dev);
 | 
				
			||||||
 | 
					    - set the power.runtime_auto flag for the device and decrease its usage
 | 
				
			||||||
 | 
					      counter (used by the /sys/devices/.../power/control interface to
 | 
				
			||||||
 | 
					      effectively allow the device to be power managed at run time)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void pm_runtime_forbid(struct device *dev);
 | 
				
			||||||
 | 
					    - unset the power.runtime_auto flag for the device and increase its usage
 | 
				
			||||||
 | 
					      counter (used by the /sys/devices/.../power/control interface to
 | 
				
			||||||
 | 
					      effectively prevent the device from being power managed at run time)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
It is safe to execute the following helper functions from interrupt context:
 | 
					It is safe to execute the following helper functions from interrupt context:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pm_request_idle()
 | 
					pm_request_idle()
 | 
				
			||||||
| 
						 | 
					@ -382,6 +402,18 @@ may be desirable to suspend the device as soon as ->probe() or ->remove() has
 | 
				
			||||||
finished, so the PM core uses pm_runtime_idle_sync() to invoke the
 | 
					finished, so the PM core uses pm_runtime_idle_sync() to invoke the
 | 
				
			||||||
subsystem-level idle callback for the device at that time.
 | 
					subsystem-level idle callback for the device at that time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The user space can effectively disallow the driver of the device to power manage
 | 
				
			||||||
 | 
					it at run time by changing the value of its /sys/devices/.../power/control
 | 
				
			||||||
 | 
					attribute to "on", which causes pm_runtime_forbid() to be called.  In principle,
 | 
				
			||||||
 | 
					this mechanism may also be used by the driver to effectively turn off the
 | 
				
			||||||
 | 
					run-time power management of the device until the user space turns it on.
 | 
				
			||||||
 | 
					Namely, during the initialization the driver can make sure that the run-time PM
 | 
				
			||||||
 | 
					status of the device is 'active' and call pm_runtime_forbid().  It should be
 | 
				
			||||||
 | 
					noted, however, that if the user space has already intentionally changed the
 | 
				
			||||||
 | 
					value of /sys/devices/.../power/control to "auto" to allow the driver to power
 | 
				
			||||||
 | 
					manage the device at run time, the driver may confuse it by using
 | 
				
			||||||
 | 
					pm_runtime_forbid() this way.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
6. Run-time PM and System Sleep
 | 
					6. Run-time PM and System Sleep
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Run-time PM and system sleep (i.e., system suspend and hibernation, also known
 | 
					Run-time PM and system sleep (i.e., system suspend and hibernation, also known
 | 
				
			||||||
| 
						 | 
					@ -431,3 +463,64 @@ The PM core always increments the run-time usage counter before calling the
 | 
				
			||||||
->prepare() callback and decrements it after calling the ->complete() callback.
 | 
					->prepare() callback and decrements it after calling the ->complete() callback.
 | 
				
			||||||
Hence disabling run-time PM temporarily like this will not cause any run-time
 | 
					Hence disabling run-time PM temporarily like this will not cause any run-time
 | 
				
			||||||
suspend callbacks to be lost.
 | 
					suspend callbacks to be lost.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					7. Generic subsystem callbacks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Subsystems may wish to conserve code space by using the set of generic power
 | 
				
			||||||
 | 
					management callbacks provided by the PM core, defined in
 | 
				
			||||||
 | 
					driver/base/power/generic_ops.c:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_runtime_idle(struct device *dev);
 | 
				
			||||||
 | 
					    - invoke the ->runtime_idle() callback provided by the driver of this
 | 
				
			||||||
 | 
					      device, if defined, and call pm_runtime_suspend() for this device if the
 | 
				
			||||||
 | 
					      return value is 0 or the callback is not defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_runtime_suspend(struct device *dev);
 | 
				
			||||||
 | 
					    - invoke the ->runtime_suspend() callback provided by the driver of this
 | 
				
			||||||
 | 
					      device and return its result, or return -EINVAL if not defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_runtime_resume(struct device *dev);
 | 
				
			||||||
 | 
					    - invoke the ->runtime_resume() callback provided by the driver of this
 | 
				
			||||||
 | 
					      device and return its result, or return -EINVAL if not defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_suspend(struct device *dev);
 | 
				
			||||||
 | 
					    - if the device has not been suspended at run time, invoke the ->suspend()
 | 
				
			||||||
 | 
					      callback provided by its driver and return its result, or return 0 if not
 | 
				
			||||||
 | 
					      defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_resume(struct device *dev);
 | 
				
			||||||
 | 
					    - invoke the ->resume() callback provided by the driver of this device and,
 | 
				
			||||||
 | 
					      if successful, change the device's runtime PM status to 'active'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_freeze(struct device *dev);
 | 
				
			||||||
 | 
					    - if the device has not been suspended at run time, invoke the ->freeze()
 | 
				
			||||||
 | 
					      callback provided by its driver and return its result, or return 0 if not
 | 
				
			||||||
 | 
					      defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_thaw(struct device *dev);
 | 
				
			||||||
 | 
					    - if the device has not been suspended at run time, invoke the ->thaw()
 | 
				
			||||||
 | 
					      callback provided by its driver and return its result, or return 0 if not
 | 
				
			||||||
 | 
					      defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_poweroff(struct device *dev);
 | 
				
			||||||
 | 
					    - if the device has not been suspended at run time, invoke the ->poweroff()
 | 
				
			||||||
 | 
					      callback provided by its driver and return its result, or return 0 if not
 | 
				
			||||||
 | 
					      defined
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int pm_generic_restore(struct device *dev);
 | 
				
			||||||
 | 
					    - invoke the ->restore() callback provided by the driver of this device and,
 | 
				
			||||||
 | 
					      if successful, change the device's runtime PM status to 'active'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(),
 | 
				
			||||||
 | 
					->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(),
 | 
				
			||||||
 | 
					or ->restore() callback pointers in the subsystem-level dev_pm_ops structures.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If a subsystem wishes to use all of them at the same time, it can simply assign
 | 
				
			||||||
 | 
					the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its
 | 
				
			||||||
 | 
					dev_pm_ops structure pointer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Device drivers that wish to use the same function as a system suspend, freeze,
 | 
				
			||||||
 | 
					poweroff and run-time suspend callback, and similarly for system resume, thaw,
 | 
				
			||||||
 | 
					restore, and run-time resume, can achieve this with the help of the
 | 
				
			||||||
 | 
					UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
 | 
				
			||||||
 | 
					last argument to NULL).
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -44,21 +44,29 @@ Example:
 | 
				
			||||||
			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
								compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
				
			||||||
			cell-index = <0>;
 | 
								cell-index = <0>;
 | 
				
			||||||
			reg = <0 0x80>;
 | 
								reg = <0 0x80>;
 | 
				
			||||||
 | 
								interrupt-parent = <&ipic>;
 | 
				
			||||||
 | 
								interrupts = <71 8>;
 | 
				
			||||||
		};
 | 
							};
 | 
				
			||||||
		dma-channel@80 {
 | 
							dma-channel@80 {
 | 
				
			||||||
			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
								compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
				
			||||||
			cell-index = <1>;
 | 
								cell-index = <1>;
 | 
				
			||||||
			reg = <0x80 0x80>;
 | 
								reg = <0x80 0x80>;
 | 
				
			||||||
 | 
								interrupt-parent = <&ipic>;
 | 
				
			||||||
 | 
								interrupts = <71 8>;
 | 
				
			||||||
		};
 | 
							};
 | 
				
			||||||
		dma-channel@100 {
 | 
							dma-channel@100 {
 | 
				
			||||||
			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
								compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
				
			||||||
			cell-index = <2>;
 | 
								cell-index = <2>;
 | 
				
			||||||
			reg = <0x100 0x80>;
 | 
								reg = <0x100 0x80>;
 | 
				
			||||||
 | 
								interrupt-parent = <&ipic>;
 | 
				
			||||||
 | 
								interrupts = <71 8>;
 | 
				
			||||||
		};
 | 
							};
 | 
				
			||||||
		dma-channel@180 {
 | 
							dma-channel@180 {
 | 
				
			||||||
			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
								compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 | 
				
			||||||
			cell-index = <3>;
 | 
								cell-index = <3>;
 | 
				
			||||||
			reg = <0x180 0x80>;
 | 
								reg = <0x180 0x80>;
 | 
				
			||||||
 | 
								interrupt-parent = <&ipic>;
 | 
				
			||||||
 | 
								interrupts = <71 8>;
 | 
				
			||||||
		};
 | 
							};
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,15 +2,14 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Required properties :
 | 
					Required properties :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 - device_type : Should be "i2c"
 | 
					 | 
				
			||||||
 - reg : Offset and length of the register set for the device
 | 
					 - reg : Offset and length of the register set for the device
 | 
				
			||||||
 | 
					 - compatible : should be "fsl,CHIP-i2c" where CHIP is the name of a
 | 
				
			||||||
 | 
					   compatible processor, e.g. mpc8313, mpc8543, mpc8544, mpc5121,
 | 
				
			||||||
 | 
					   mpc5200 or mpc5200b. For the mpc5121, an additional node
 | 
				
			||||||
 | 
					   "fsl,mpc5121-i2c-ctrl" is required as shown in the example below.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Recommended properties :
 | 
					Recommended properties :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 - compatible : compatibility list with 2 entries, the first should
 | 
					 | 
				
			||||||
   be "fsl,CHIP-i2c" where CHIP is the name of a compatible processor,
 | 
					 | 
				
			||||||
   e.g. mpc8313, mpc8543, mpc8544, mpc5200 or mpc5200b. The second one
 | 
					 | 
				
			||||||
   should be "fsl-i2c".
 | 
					 | 
				
			||||||
 - interrupts : <a b> where a is the interrupt number and b is a
 | 
					 - interrupts : <a b> where a is the interrupt number and b is a
 | 
				
			||||||
   field that represents an encoding of the sense and level
 | 
					   field that represents an encoding of the sense and level
 | 
				
			||||||
   information for the interrupt.  This should be encoded based on
 | 
					   information for the interrupt.  This should be encoded based on
 | 
				
			||||||
| 
						 | 
					@ -24,25 +23,40 @@ Recommended properties :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Examples :
 | 
					Examples :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* MPC5121 based board */
 | 
				
			||||||
 | 
						i2c@1740 {
 | 
				
			||||||
 | 
							#address-cells = <1>;
 | 
				
			||||||
 | 
							#size-cells = <0>;
 | 
				
			||||||
 | 
							compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 | 
				
			||||||
 | 
							reg = <0x1740 0x20>;
 | 
				
			||||||
 | 
							interrupts = <11 0x8>;
 | 
				
			||||||
 | 
							interrupt-parent = <&ipic>;
 | 
				
			||||||
 | 
							clock-frequency = <100000>;
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						i2ccontrol@1760 {
 | 
				
			||||||
 | 
							compatible = "fsl,mpc5121-i2c-ctrl";
 | 
				
			||||||
 | 
							reg = <0x1760 0x8>;
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* MPC5200B based board */
 | 
				
			||||||
	i2c@3d00 {
 | 
						i2c@3d00 {
 | 
				
			||||||
		#address-cells = <1>;
 | 
							#address-cells = <1>;
 | 
				
			||||||
		#size-cells = <0>;
 | 
							#size-cells = <0>;
 | 
				
			||||||
		compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
 | 
							compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
 | 
				
			||||||
		cell-index = <0>;
 | 
					 | 
				
			||||||
		reg = <0x3d00 0x40>;
 | 
							reg = <0x3d00 0x40>;
 | 
				
			||||||
		interrupts = <2 15 0>;
 | 
							interrupts = <2 15 0>;
 | 
				
			||||||
		interrupt-parent = <&mpc5200_pic>;
 | 
							interrupt-parent = <&mpc5200_pic>;
 | 
				
			||||||
		fsl,preserve-clocking;
 | 
							fsl,preserve-clocking;
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* MPC8544 base board */
 | 
				
			||||||
	i2c@3100 {
 | 
						i2c@3100 {
 | 
				
			||||||
		#address-cells = <1>;
 | 
							#address-cells = <1>;
 | 
				
			||||||
		#size-cells = <0>;
 | 
							#size-cells = <0>;
 | 
				
			||||||
		cell-index = <1>;
 | 
					 | 
				
			||||||
		compatible = "fsl,mpc8544-i2c", "fsl-i2c";
 | 
							compatible = "fsl,mpc8544-i2c", "fsl-i2c";
 | 
				
			||||||
		reg = <0x3100 0x100>;
 | 
							reg = <0x3100 0x100>;
 | 
				
			||||||
		interrupts = <43 2>;
 | 
							interrupts = <43 2>;
 | 
				
			||||||
		interrupt-parent = <&mpic>;
 | 
							interrupt-parent = <&mpic>;
 | 
				
			||||||
		clock-frequency = <400000>;
 | 
							clock-frequency = <400000>;
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -102,7 +102,7 @@ args:		unsigned long
 | 
				
			||||||
see also:	include/linux/kvm.h
 | 
					see also:	include/linux/kvm.h
 | 
				
			||||||
This ioctl stores the state of the cpu at the guest real address given as
 | 
					This ioctl stores the state of the cpu at the guest real address given as
 | 
				
			||||||
argument, unless one of the following values defined in include/linux/kvm.h
 | 
					argument, unless one of the following values defined in include/linux/kvm.h
 | 
				
			||||||
is given as arguement:
 | 
					is given as argument:
 | 
				
			||||||
KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
 | 
					KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
 | 
				
			||||||
absolute lowcore as defined by the principles of operation
 | 
					absolute lowcore as defined by the principles of operation
 | 
				
			||||||
KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
 | 
					KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -989,8 +989,8 @@ Changes from 20040709 to 20040716
 | 
				
			||||||
	* Remove redundant port_cmp != 2 check in if
 | 
						* Remove redundant port_cmp != 2 check in if
 | 
				
			||||||
	  (!port_cmp) { .... if (port_cmp != 2).... }
 | 
						  (!port_cmp) { .... if (port_cmp != 2).... }
 | 
				
			||||||
	* Clock changes: removed struct clk_data and timerList.
 | 
						* Clock changes: removed struct clk_data and timerList.
 | 
				
			||||||
	* Clock changes: seperate nodev_tmo and els_retry_delay into 2
 | 
						* Clock changes: separate nodev_tmo and els_retry_delay into 2
 | 
				
			||||||
	  seperate timers and convert to 1 argument changed
 | 
						  separate timers and convert to 1 argument changed
 | 
				
			||||||
	  LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert
 | 
						  LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert
 | 
				
			||||||
	  ipfarp_tmo to 1 argument convert target struct tmofunc and
 | 
						  ipfarp_tmo to 1 argument convert target struct tmofunc and
 | 
				
			||||||
	  rtplunfunc to 1 argument * cr_count, cr_delay and
 | 
						  rtplunfunc to 1 argument * cr_count, cr_delay and
 | 
				
			||||||
| 
						 | 
					@ -1514,7 +1514,7 @@ Changes from 20040402 to 20040409
 | 
				
			||||||
	* Remove unused elxclock declaration in elx_sli.h.
 | 
						* Remove unused elxclock declaration in elx_sli.h.
 | 
				
			||||||
	* Since everywhere IOCB_ENTRY is used, the return value is cast,
 | 
						* Since everywhere IOCB_ENTRY is used, the return value is cast,
 | 
				
			||||||
	  move the cast into the macro.
 | 
						  move the cast into the macro.
 | 
				
			||||||
	* Split ioctls out into seperate files
 | 
						* Split ioctls out into separate files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Changes from 20040326 to 20040402
 | 
					Changes from 20040326 to 20040402
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1534,7 +1534,7 @@ Changes from 20040326 to 20040402
 | 
				
			||||||
	* Unused variable cleanup
 | 
						* Unused variable cleanup
 | 
				
			||||||
	* Use Linux list macros for DMABUF_t
 | 
						* Use Linux list macros for DMABUF_t
 | 
				
			||||||
	* Break up ioctls into 3 sections, dfc, util, hbaapi
 | 
						* Break up ioctls into 3 sections, dfc, util, hbaapi
 | 
				
			||||||
	  rearranged code so this could be easily seperated into a
 | 
						  rearranged code so this could be easily separated into a
 | 
				
			||||||
	  differnet module later All 3 are currently turned on by
 | 
						  differnet module later All 3 are currently turned on by
 | 
				
			||||||
	  defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL,
 | 
						  defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL,
 | 
				
			||||||
	  LPFC_HBAAPI_IOCTL
 | 
						  LPFC_HBAAPI_IOCTL
 | 
				
			||||||
| 
						 | 
					@ -1551,7 +1551,7 @@ Changes from 20040326 to 20040402
 | 
				
			||||||
	  started by lpfc_online().  lpfc_offline() only stopped
 | 
						  started by lpfc_online().  lpfc_offline() only stopped
 | 
				
			||||||
	  els_timeout routine.  It now stops all timeout routines
 | 
						  els_timeout routine.  It now stops all timeout routines
 | 
				
			||||||
	  associated with that hba.
 | 
						  associated with that hba.
 | 
				
			||||||
	* Replace seperate next and prev pointers in struct
 | 
						* Replace separate next and prev pointers in struct
 | 
				
			||||||
	  lpfc_bindlist with list_head type.  In elxHBA_t, replace
 | 
						  lpfc_bindlist with list_head type.  In elxHBA_t, replace
 | 
				
			||||||
	  fc_nlpbind_start and _end with fc_nlpbind_list and use
 | 
						  fc_nlpbind_start and _end with fc_nlpbind_list and use
 | 
				
			||||||
	  list_head macros to access it.
 | 
						  list_head macros to access it.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -105,6 +105,10 @@ write_wakeup()	-	May be called at any point between open and close.
 | 
				
			||||||
			is permitted to call the driver write method from
 | 
								is permitted to call the driver write method from
 | 
				
			||||||
			this function. In such a situation defer it.
 | 
								this function. In such a situation defer it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					dcd_change()	-	Report to the tty line the current DCD pin status
 | 
				
			||||||
 | 
								changes and the relative timestamp. The timestamp
 | 
				
			||||||
 | 
								can be NULL.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Driver Access
 | 
					Driver Access
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1812,7 +1812,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 | 
				
			||||||
  Module snd-ua101
 | 
					  Module snd-ua101
 | 
				
			||||||
  ----------------
 | 
					  ----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Module for the Edirol UA-101 audio/MIDI interface.
 | 
					    Module for the Edirol UA-101/UA-1000 audio/MIDI interfaces.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    This module supports multiple devices, autoprobe and hotplugging.
 | 
					    This module supports multiple devices, autoprobe and hotplugging.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -573,11 +573,14 @@ Because other nodes' memory may be free. This means system total status
 | 
				
			||||||
may be not fatal yet.
 | 
					may be not fatal yet.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If this is set to 2, the kernel panics compulsorily even on the
 | 
					If this is set to 2, the kernel panics compulsorily even on the
 | 
				
			||||||
above-mentioned.
 | 
					above-mentioned. Even oom happens under memory cgroup, the whole
 | 
				
			||||||
 | 
					system panics.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The default value is 0.
 | 
					The default value is 0.
 | 
				
			||||||
1 and 2 are for failover of clustering. Please select either
 | 
					1 and 2 are for failover of clustering. Please select either
 | 
				
			||||||
according to your policy of failover.
 | 
					according to your policy of failover.
 | 
				
			||||||
 | 
					panic_on_oom=2+kdump gives you very strong tool to investigate
 | 
				
			||||||
 | 
					why oom happens. You can get snapshot.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
=============================================================
 | 
					=============================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,6 +4,8 @@ highres.txt
 | 
				
			||||||
	- High resolution timers and dynamic ticks design notes
 | 
						- High resolution timers and dynamic ticks design notes
 | 
				
			||||||
hpet.txt
 | 
					hpet.txt
 | 
				
			||||||
	- High Precision Event Timer Driver for Linux
 | 
						- High Precision Event Timer Driver for Linux
 | 
				
			||||||
 | 
					hpet_example.c
 | 
				
			||||||
 | 
						- sample hpet timer test program
 | 
				
			||||||
hrtimers.txt
 | 
					hrtimers.txt
 | 
				
			||||||
	- subsystem for high-resolution kernel timers
 | 
						- subsystem for high-resolution kernel timers
 | 
				
			||||||
timer_stats.txt
 | 
					timer_stats.txt
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										8
									
								
								Documentation/timers/Makefile
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								Documentation/timers/Makefile
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,8 @@
 | 
				
			||||||
 | 
					# kbuild trick to avoid linker error. Can be omitted if a module is built.
 | 
				
			||||||
 | 
					obj- := dummy.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# List of programs to build
 | 
				
			||||||
 | 
					hostprogs-y := hpet_example
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Tell kbuild to always build the programs
 | 
				
			||||||
 | 
					always := $(hostprogs-y)
 | 
				
			||||||
| 
						 | 
					@ -26,274 +26,5 @@ initialization.  An example of this initialization can be found in
 | 
				
			||||||
arch/x86/kernel/hpet.c.
 | 
					arch/x86/kernel/hpet.c.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The driver provides a userspace API which resembles the API found in the
 | 
					The driver provides a userspace API which resembles the API found in the
 | 
				
			||||||
RTC driver framework.  An example user space program is provided below.
 | 
					RTC driver framework.  An example user space program is provided in
 | 
				
			||||||
 | 
					file:Documentation/timers/hpet_example.c
 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
#include <fcntl.h>
 | 
					 | 
				
			||||||
#include <string.h>
 | 
					 | 
				
			||||||
#include <memory.h>
 | 
					 | 
				
			||||||
#include <malloc.h>
 | 
					 | 
				
			||||||
#include <time.h>
 | 
					 | 
				
			||||||
#include <ctype.h>
 | 
					 | 
				
			||||||
#include <sys/types.h>
 | 
					 | 
				
			||||||
#include <sys/wait.h>
 | 
					 | 
				
			||||||
#include <signal.h>
 | 
					 | 
				
			||||||
#include <fcntl.h>
 | 
					 | 
				
			||||||
#include <errno.h>
 | 
					 | 
				
			||||||
#include <sys/time.h>
 | 
					 | 
				
			||||||
#include <linux/hpet.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void hpet_open_close(int, const char **);
 | 
					 | 
				
			||||||
extern void hpet_info(int, const char **);
 | 
					 | 
				
			||||||
extern void hpet_poll(int, const char **);
 | 
					 | 
				
			||||||
extern void hpet_fasync(int, const char **);
 | 
					 | 
				
			||||||
extern void hpet_read(int, const char **);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <sys/poll.h>
 | 
					 | 
				
			||||||
#include <sys/ioctl.h>
 | 
					 | 
				
			||||||
#include <signal.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct hpet_command {
 | 
					 | 
				
			||||||
	char		*command;
 | 
					 | 
				
			||||||
	void		(*func)(int argc, const char ** argv);
 | 
					 | 
				
			||||||
} hpet_command[] = {
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		"open-close",
 | 
					 | 
				
			||||||
		hpet_open_close
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		"info",
 | 
					 | 
				
			||||||
		hpet_info
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		"poll",
 | 
					 | 
				
			||||||
		hpet_poll
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		"fasync",
 | 
					 | 
				
			||||||
		hpet_fasync
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int
 | 
					 | 
				
			||||||
main(int argc, const char ** argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	int	i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	argc--;
 | 
					 | 
				
			||||||
	argv++;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!argc) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "-hpet: requires command\n");
 | 
					 | 
				
			||||||
		return -1;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++)
 | 
					 | 
				
			||||||
		if (!strcmp(argv[0], hpet_command[i].command)) {
 | 
					 | 
				
			||||||
			argc--;
 | 
					 | 
				
			||||||
			argv++;
 | 
					 | 
				
			||||||
			fprintf(stderr, "-hpet: executing %s\n",
 | 
					 | 
				
			||||||
				hpet_command[i].command);
 | 
					 | 
				
			||||||
			hpet_command[i].func(argc, argv);
 | 
					 | 
				
			||||||
			return 0;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return -1;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
hpet_open_close(int argc, const char **argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	int	fd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (argc != 1) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_open_close: device-name\n");
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fd = open(argv[0], O_RDONLY);
 | 
					 | 
				
			||||||
	if (fd < 0)
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_open_close: open failed\n");
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		close(fd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
hpet_info(int argc, const char **argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
hpet_poll(int argc, const char **argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long		freq;
 | 
					 | 
				
			||||||
	int			iterations, i, fd;
 | 
					 | 
				
			||||||
	struct pollfd		pfd;
 | 
					 | 
				
			||||||
	struct hpet_info	info;
 | 
					 | 
				
			||||||
	struct timeval		stv, etv;
 | 
					 | 
				
			||||||
	struct timezone		tz;
 | 
					 | 
				
			||||||
	long			usec;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (argc != 3) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_poll: device-name freq iterations\n");
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	freq = atoi(argv[1]);
 | 
					 | 
				
			||||||
	iterations = atoi(argv[2]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fd = open(argv[0], O_RDONLY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (fd < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]);
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ioctl(fd, HPET_INFO, &info) < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_poll: failed to get info\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_poll: HPET_EPI failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ioctl(fd, HPET_IE_ON, 0) < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pfd.fd = fd;
 | 
					 | 
				
			||||||
	pfd.events = POLLIN;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < iterations; i++) {
 | 
					 | 
				
			||||||
		pfd.revents = 0;
 | 
					 | 
				
			||||||
		gettimeofday(&stv, &tz);
 | 
					 | 
				
			||||||
		if (poll(&pfd, 1, -1) < 0)
 | 
					 | 
				
			||||||
			fprintf(stderr, "hpet_poll: poll failed\n");
 | 
					 | 
				
			||||||
		else {
 | 
					 | 
				
			||||||
			long 	data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			gettimeofday(&etv, &tz);
 | 
					 | 
				
			||||||
			usec = stv.tv_sec * 1000000 + stv.tv_usec;
 | 
					 | 
				
			||||||
			usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			fprintf(stderr,
 | 
					 | 
				
			||||||
				"hpet_poll: expired time = 0x%lx\n", usec);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			fprintf(stderr, "hpet_poll: revents = 0x%x\n",
 | 
					 | 
				
			||||||
				pfd.revents);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (read(fd, &data, sizeof(data)) != sizeof(data)) {
 | 
					 | 
				
			||||||
				fprintf(stderr, "hpet_poll: read failed\n");
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			else
 | 
					 | 
				
			||||||
				fprintf(stderr, "hpet_poll: data 0x%lx\n",
 | 
					 | 
				
			||||||
					data);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	close(fd);
 | 
					 | 
				
			||||||
	return;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int hpet_sigio_count;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
hpet_sigio(int val)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	fprintf(stderr, "hpet_sigio: called\n");
 | 
					 | 
				
			||||||
	hpet_sigio_count++;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
hpet_fasync(int argc, const char **argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long		freq;
 | 
					 | 
				
			||||||
	int			iterations, i, fd, value;
 | 
					 | 
				
			||||||
	sig_t			oldsig;
 | 
					 | 
				
			||||||
	struct hpet_info	info;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	hpet_sigio_count = 0;
 | 
					 | 
				
			||||||
	fd = -1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: failed to set signal handler\n");
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (argc != 3) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: device-name freq iterations\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fd = open(argv[0], O_RDONLY);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (fd < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]);
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if ((fcntl(fd, F_SETOWN, getpid()) == 1) ||
 | 
					 | 
				
			||||||
		((value = fcntl(fd, F_GETFL)) == 1) ||
 | 
					 | 
				
			||||||
		(fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: fcntl failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	freq = atoi(argv[1]);
 | 
					 | 
				
			||||||
	iterations = atoi(argv[2]);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ioctl(fd, HPET_INFO, &info) < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: failed to get info\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: HPET_EPI failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (ioctl(fd, HPET_IE_ON, 0) < 0) {
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n");
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < iterations; i++) {
 | 
					 | 
				
			||||||
		(void) pause();
 | 
					 | 
				
			||||||
		fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	signal(SIGIO, oldsig);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (fd >= 0)
 | 
					 | 
				
			||||||
		close(fd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										269
									
								
								Documentation/timers/hpet_example.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								Documentation/timers/hpet_example.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,269 @@
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					#include <fcntl.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <memory.h>
 | 
				
			||||||
 | 
					#include <malloc.h>
 | 
				
			||||||
 | 
					#include <time.h>
 | 
				
			||||||
 | 
					#include <ctype.h>
 | 
				
			||||||
 | 
					#include <sys/types.h>
 | 
				
			||||||
 | 
					#include <sys/wait.h>
 | 
				
			||||||
 | 
					#include <signal.h>
 | 
				
			||||||
 | 
					#include <fcntl.h>
 | 
				
			||||||
 | 
					#include <errno.h>
 | 
				
			||||||
 | 
					#include <sys/time.h>
 | 
				
			||||||
 | 
					#include <linux/hpet.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern void hpet_open_close(int, const char **);
 | 
				
			||||||
 | 
					extern void hpet_info(int, const char **);
 | 
				
			||||||
 | 
					extern void hpet_poll(int, const char **);
 | 
				
			||||||
 | 
					extern void hpet_fasync(int, const char **);
 | 
				
			||||||
 | 
					extern void hpet_read(int, const char **);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <sys/poll.h>
 | 
				
			||||||
 | 
					#include <sys/ioctl.h>
 | 
				
			||||||
 | 
					#include <signal.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct hpet_command {
 | 
				
			||||||
 | 
						char		*command;
 | 
				
			||||||
 | 
						void		(*func)(int argc, const char ** argv);
 | 
				
			||||||
 | 
					} hpet_command[] = {
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							"open-close",
 | 
				
			||||||
 | 
							hpet_open_close
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							"info",
 | 
				
			||||||
 | 
							hpet_info
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							"poll",
 | 
				
			||||||
 | 
							hpet_poll
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							"fasync",
 | 
				
			||||||
 | 
							hpet_fasync
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int
 | 
				
			||||||
 | 
					main(int argc, const char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int	i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						argc--;
 | 
				
			||||||
 | 
						argv++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!argc) {
 | 
				
			||||||
 | 
							fprintf(stderr, "-hpet: requires command\n");
 | 
				
			||||||
 | 
							return -1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++)
 | 
				
			||||||
 | 
							if (!strcmp(argv[0], hpet_command[i].command)) {
 | 
				
			||||||
 | 
								argc--;
 | 
				
			||||||
 | 
								argv++;
 | 
				
			||||||
 | 
								fprintf(stderr, "-hpet: executing %s\n",
 | 
				
			||||||
 | 
									hpet_command[i].command);
 | 
				
			||||||
 | 
								hpet_command[i].func(argc, argv);
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return -1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					hpet_open_close(int argc, const char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int	fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (argc != 1) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_open_close: device-name\n");
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fd = open(argv[0], O_RDONLY);
 | 
				
			||||||
 | 
						if (fd < 0)
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_open_close: open failed\n");
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							close(fd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					hpet_info(int argc, const char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					hpet_poll(int argc, const char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long		freq;
 | 
				
			||||||
 | 
						int			iterations, i, fd;
 | 
				
			||||||
 | 
						struct pollfd		pfd;
 | 
				
			||||||
 | 
						struct hpet_info	info;
 | 
				
			||||||
 | 
						struct timeval		stv, etv;
 | 
				
			||||||
 | 
						struct timezone		tz;
 | 
				
			||||||
 | 
						long			usec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (argc != 3) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_poll: device-name freq iterations\n");
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						freq = atoi(argv[1]);
 | 
				
			||||||
 | 
						iterations = atoi(argv[2]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fd = open(argv[0], O_RDONLY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (fd < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ioctl(fd, HPET_INFO, &info) < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_poll: failed to get info\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_poll: HPET_EPI failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ioctl(fd, HPET_IE_ON, 0) < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pfd.fd = fd;
 | 
				
			||||||
 | 
						pfd.events = POLLIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < iterations; i++) {
 | 
				
			||||||
 | 
							pfd.revents = 0;
 | 
				
			||||||
 | 
							gettimeofday(&stv, &tz);
 | 
				
			||||||
 | 
							if (poll(&pfd, 1, -1) < 0)
 | 
				
			||||||
 | 
								fprintf(stderr, "hpet_poll: poll failed\n");
 | 
				
			||||||
 | 
							else {
 | 
				
			||||||
 | 
								long 	data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								gettimeofday(&etv, &tz);
 | 
				
			||||||
 | 
								usec = stv.tv_sec * 1000000 + stv.tv_usec;
 | 
				
			||||||
 | 
								usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								fprintf(stderr,
 | 
				
			||||||
 | 
									"hpet_poll: expired time = 0x%lx\n", usec);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								fprintf(stderr, "hpet_poll: revents = 0x%x\n",
 | 
				
			||||||
 | 
									pfd.revents);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (read(fd, &data, sizeof(data)) != sizeof(data)) {
 | 
				
			||||||
 | 
									fprintf(stderr, "hpet_poll: read failed\n");
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								else
 | 
				
			||||||
 | 
									fprintf(stderr, "hpet_poll: data 0x%lx\n",
 | 
				
			||||||
 | 
										data);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						close(fd);
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int hpet_sigio_count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void
 | 
				
			||||||
 | 
					hpet_sigio(int val)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						fprintf(stderr, "hpet_sigio: called\n");
 | 
				
			||||||
 | 
						hpet_sigio_count++;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					hpet_fasync(int argc, const char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long		freq;
 | 
				
			||||||
 | 
						int			iterations, i, fd, value;
 | 
				
			||||||
 | 
						sig_t			oldsig;
 | 
				
			||||||
 | 
						struct hpet_info	info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						hpet_sigio_count = 0;
 | 
				
			||||||
 | 
						fd = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: failed to set signal handler\n");
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (argc != 3) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: device-name freq iterations\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fd = open(argv[0], O_RDONLY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (fd < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ((fcntl(fd, F_SETOWN, getpid()) == 1) ||
 | 
				
			||||||
 | 
							((value = fcntl(fd, F_GETFL)) == 1) ||
 | 
				
			||||||
 | 
							(fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: fcntl failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						freq = atoi(argv[1]);
 | 
				
			||||||
 | 
						iterations = atoi(argv[2]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ioctl(fd, HPET_INFO, &info) < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: failed to get info\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: HPET_EPI failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ioctl(fd, HPET_IE_ON, 0) < 0) {
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n");
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < iterations; i++) {
 | 
				
			||||||
 | 
							(void) pause();
 | 
				
			||||||
 | 
							fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						signal(SIGIO, oldsig);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (fd >= 0)
 | 
				
			||||||
 | 
							close(fd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1588,7 +1588,7 @@ module author does not need to worry about it.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When tracing is enabled, kstop_machine is called to prevent
 | 
					When tracing is enabled, kstop_machine is called to prevent
 | 
				
			||||||
races with the CPUS executing code being modified (which can
 | 
					races with the CPUS executing code being modified (which can
 | 
				
			||||||
cause the CPU to do undesireable things), and the nops are
 | 
					cause the CPU to do undesirable things), and the nops are
 | 
				
			||||||
patched back to calls. But this time, they do not call mcount
 | 
					patched back to calls. But this time, they do not call mcount
 | 
				
			||||||
(which is just a function stub). They now call into the ftrace
 | 
					(which is just a function stub). They now call into the ftrace
 | 
				
			||||||
infrastructure.
 | 
					infrastructure.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,23 +4,35 @@ active_mm.txt
 | 
				
			||||||
	- An explanation from Linus about tsk->active_mm vs tsk->mm.
 | 
						- An explanation from Linus about tsk->active_mm vs tsk->mm.
 | 
				
			||||||
balance
 | 
					balance
 | 
				
			||||||
	- various information on memory balancing.
 | 
						- various information on memory balancing.
 | 
				
			||||||
 | 
					hugepage-mmap.c
 | 
				
			||||||
 | 
						- Example app using huge page memory with the mmap system call.
 | 
				
			||||||
 | 
					hugepage-shm.c
 | 
				
			||||||
 | 
						- Example app using huge page memory with Sys V shared memory system calls.
 | 
				
			||||||
hugetlbpage.txt
 | 
					hugetlbpage.txt
 | 
				
			||||||
	- a brief summary of hugetlbpage support in the Linux kernel.
 | 
						- a brief summary of hugetlbpage support in the Linux kernel.
 | 
				
			||||||
 | 
					hwpoison.txt
 | 
				
			||||||
 | 
						- explains what hwpoison is
 | 
				
			||||||
ksm.txt
 | 
					ksm.txt
 | 
				
			||||||
	- how to use the Kernel Samepage Merging feature.
 | 
						- how to use the Kernel Samepage Merging feature.
 | 
				
			||||||
locking
 | 
					locking
 | 
				
			||||||
	- info on how locking and synchronization is done in the Linux vm code.
 | 
						- info on how locking and synchronization is done in the Linux vm code.
 | 
				
			||||||
 | 
					map_hugetlb.c
 | 
				
			||||||
 | 
						- an example program that uses the MAP_HUGETLB mmap flag.
 | 
				
			||||||
numa
 | 
					numa
 | 
				
			||||||
	- information about NUMA specific code in the Linux vm.
 | 
						- information about NUMA specific code in the Linux vm.
 | 
				
			||||||
numa_memory_policy.txt
 | 
					numa_memory_policy.txt
 | 
				
			||||||
	- documentation of concepts and APIs of the 2.6 memory policy support.
 | 
						- documentation of concepts and APIs of the 2.6 memory policy support.
 | 
				
			||||||
overcommit-accounting
 | 
					overcommit-accounting
 | 
				
			||||||
	- description of the Linux kernels overcommit handling modes.
 | 
						- description of the Linux kernels overcommit handling modes.
 | 
				
			||||||
 | 
					page-types.c
 | 
				
			||||||
 | 
						- Tool for querying page flags
 | 
				
			||||||
page_migration
 | 
					page_migration
 | 
				
			||||||
	- description of page migration in NUMA systems.
 | 
						- description of page migration in NUMA systems.
 | 
				
			||||||
 | 
					pagemap.txt
 | 
				
			||||||
 | 
						- pagemap, from the userspace perspective
 | 
				
			||||||
slabinfo.c
 | 
					slabinfo.c
 | 
				
			||||||
	- source code for a tool to get reports about slabs.
 | 
						- source code for a tool to get reports about slabs.
 | 
				
			||||||
slub.txt
 | 
					slub.txt
 | 
				
			||||||
	- a short users guide for SLUB.
 | 
						- a short users guide for SLUB.
 | 
				
			||||||
map_hugetlb.c
 | 
					unevictable-lru.txt
 | 
				
			||||||
	- an example program that uses the MAP_HUGETLB mmap flag.
 | 
						- Unevictable LRU infrastructure
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
obj- := dummy.o
 | 
					obj- := dummy.o
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# List of programs to build
 | 
					# List of programs to build
 | 
				
			||||||
hostprogs-y := slabinfo page-types
 | 
					hostprogs-y := slabinfo page-types hugepage-mmap hugepage-shm map_hugetlb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Tell kbuild to always build the programs
 | 
					# Tell kbuild to always build the programs
 | 
				
			||||||
always := $(hostprogs-y)
 | 
					always := $(hostprogs-y)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										91
									
								
								Documentation/vm/hugepage-mmap.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								Documentation/vm/hugepage-mmap.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,91 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * hugepage-mmap:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Example of using huge page memory in a user application using the mmap
 | 
				
			||||||
 | 
					 * system call.  Before running this application, make sure that the
 | 
				
			||||||
 | 
					 * administrator has mounted the hugetlbfs filesystem (on some directory
 | 
				
			||||||
 | 
					 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
 | 
				
			||||||
 | 
					 * example, the app is requesting memory of size 256MB that is backed by
 | 
				
			||||||
 | 
					 * huge pages.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
 | 
				
			||||||
 | 
					 * huge pages.  That means that if one requires a fixed address, a huge page
 | 
				
			||||||
 | 
					 * aligned address starting with 0x800000... will be required.  If a fixed
 | 
				
			||||||
 | 
					 * address is not required, the kernel will select an address in the proper
 | 
				
			||||||
 | 
					 * range.
 | 
				
			||||||
 | 
					 * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					#include <sys/mman.h>
 | 
				
			||||||
 | 
					#include <fcntl.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define FILE_NAME "/mnt/hugepagefile"
 | 
				
			||||||
 | 
					#define LENGTH (256UL*1024*1024)
 | 
				
			||||||
 | 
					#define PROTECTION (PROT_READ | PROT_WRITE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Only ia64 requires this */
 | 
				
			||||||
 | 
					#ifdef __ia64__
 | 
				
			||||||
 | 
					#define ADDR (void *)(0x8000000000000000UL)
 | 
				
			||||||
 | 
					#define FLAGS (MAP_SHARED | MAP_FIXED)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define ADDR (void *)(0x0UL)
 | 
				
			||||||
 | 
					#define FLAGS (MAP_SHARED)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void check_bytes(char *addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						printf("First hex is %x\n", *((unsigned int *)addr));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void write_bytes(char *addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < LENGTH; i++)
 | 
				
			||||||
 | 
							*(addr + i) = (char)i;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void read_bytes(char *addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned long i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						check_bytes(addr);
 | 
				
			||||||
 | 
						for (i = 0; i < LENGTH; i++)
 | 
				
			||||||
 | 
							if (*(addr + i) != (char)i) {
 | 
				
			||||||
 | 
								printf("Mismatch at %lu\n", i);
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						void *addr;
 | 
				
			||||||
 | 
						int fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
 | 
				
			||||||
 | 
						if (fd < 0) {
 | 
				
			||||||
 | 
							perror("Open failed");
 | 
				
			||||||
 | 
							exit(1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
 | 
				
			||||||
 | 
						if (addr == MAP_FAILED) {
 | 
				
			||||||
 | 
							perror("mmap");
 | 
				
			||||||
 | 
							unlink(FILE_NAME);
 | 
				
			||||||
 | 
							exit(1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						printf("Returned address is %p\n", addr);
 | 
				
			||||||
 | 
						check_bytes(addr);
 | 
				
			||||||
 | 
						write_bytes(addr);
 | 
				
			||||||
 | 
						read_bytes(addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						munmap(addr, LENGTH);
 | 
				
			||||||
 | 
						close(fd);
 | 
				
			||||||
 | 
						unlink(FILE_NAME);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										98
									
								
								Documentation/vm/hugepage-shm.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								Documentation/vm/hugepage-shm.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,98 @@
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * hugepage-shm:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Example of using huge page memory in a user application using Sys V shared
 | 
				
			||||||
 | 
					 * memory system calls.  In this example the app is requesting 256MB of
 | 
				
			||||||
 | 
					 * memory that is backed by huge pages.  The application uses the flag
 | 
				
			||||||
 | 
					 * SHM_HUGETLB in the shmget system call to inform the kernel that it is
 | 
				
			||||||
 | 
					 * requesting huge pages.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
 | 
				
			||||||
 | 
					 * huge pages.  That means that if one requires a fixed address, a huge page
 | 
				
			||||||
 | 
					 * aligned address starting with 0x800000... will be required.  If a fixed
 | 
				
			||||||
 | 
					 * address is not required, the kernel will select an address in the proper
 | 
				
			||||||
 | 
					 * range.
 | 
				
			||||||
 | 
					 * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Note: The default shared memory limit is quite low on many kernels,
 | 
				
			||||||
 | 
					 * you may need to increase it via:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * echo 268435456 > /proc/sys/kernel/shmmax
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This will increase the maximum size per shared memory segment to 256MB.
 | 
				
			||||||
 | 
					 * The other limit that you will hit eventually is shmall which is the
 | 
				
			||||||
 | 
					 * total amount of shared memory in pages. To set it to 16GB on a system
 | 
				
			||||||
 | 
					 * with a 4kB pagesize do:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * echo 4194304 > /proc/sys/kernel/shmall
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <sys/types.h>
 | 
				
			||||||
 | 
					#include <sys/ipc.h>
 | 
				
			||||||
 | 
					#include <sys/shm.h>
 | 
				
			||||||
 | 
					#include <sys/mman.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifndef SHM_HUGETLB
 | 
				
			||||||
 | 
					#define SHM_HUGETLB 04000
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define LENGTH (256UL*1024*1024)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define dprintf(x)  printf(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Only ia64 requires this */
 | 
				
			||||||
 | 
					#ifdef __ia64__
 | 
				
			||||||
 | 
					#define ADDR (void *)(0x8000000000000000UL)
 | 
				
			||||||
 | 
					#define SHMAT_FLAGS (SHM_RND)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define ADDR (void *)(0x0UL)
 | 
				
			||||||
 | 
					#define SHMAT_FLAGS (0)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int shmid;
 | 
				
			||||||
 | 
						unsigned long i;
 | 
				
			||||||
 | 
						char *shmaddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ((shmid = shmget(2, LENGTH,
 | 
				
			||||||
 | 
								    SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
 | 
				
			||||||
 | 
							perror("shmget");
 | 
				
			||||||
 | 
							exit(1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						printf("shmid: 0x%x\n", shmid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
 | 
				
			||||||
 | 
						if (shmaddr == (char *)-1) {
 | 
				
			||||||
 | 
							perror("Shared memory attach failure");
 | 
				
			||||||
 | 
							shmctl(shmid, IPC_RMID, NULL);
 | 
				
			||||||
 | 
							exit(2);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						printf("shmaddr: %p\n", shmaddr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dprintf("Starting the writes:\n");
 | 
				
			||||||
 | 
						for (i = 0; i < LENGTH; i++) {
 | 
				
			||||||
 | 
							shmaddr[i] = (char)(i);
 | 
				
			||||||
 | 
							if (!(i % (1024 * 1024)))
 | 
				
			||||||
 | 
								dprintf(".");
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						dprintf("\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dprintf("Starting the Check...");
 | 
				
			||||||
 | 
						for (i = 0; i < LENGTH; i++)
 | 
				
			||||||
 | 
							if (shmaddr[i] != (char)i)
 | 
				
			||||||
 | 
								printf("\nIndex %lu mismatched\n", i);
 | 
				
			||||||
 | 
						dprintf("Done.\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (shmdt((const void *)shmaddr) != 0) {
 | 
				
			||||||
 | 
							perror("Detach failure");
 | 
				
			||||||
 | 
							shmctl(shmid, IPC_RMID, NULL);
 | 
				
			||||||
 | 
							exit(3);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						shmctl(shmid, IPC_RMID, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -299,176 +299,11 @@ map_hugetlb.c.
 | 
				
			||||||
*******************************************************************
 | 
					*******************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Example of using huge page memory in a user application using Sys V shared
 | 
					 * hugepage-shm:  see Documentation/vm/hugepage-shm.c
 | 
				
			||||||
 * memory system calls.  In this example the app is requesting 256MB of
 | 
					 | 
				
			||||||
 * memory that is backed by huge pages.  The application uses the flag
 | 
					 | 
				
			||||||
 * SHM_HUGETLB in the shmget system call to inform the kernel that it is
 | 
					 | 
				
			||||||
 * requesting huge pages.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
 | 
					 | 
				
			||||||
 * huge pages.  That means that if one requires a fixed address, a huge page
 | 
					 | 
				
			||||||
 * aligned address starting with 0x800000... will be required.  If a fixed
 | 
					 | 
				
			||||||
 * address is not required, the kernel will select an address in the proper
 | 
					 | 
				
			||||||
 * range.
 | 
					 | 
				
			||||||
 * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Note: The default shared memory limit is quite low on many kernels,
 | 
					 | 
				
			||||||
 * you may need to increase it via:
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * echo 268435456 > /proc/sys/kernel/shmmax
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This will increase the maximum size per shared memory segment to 256MB.
 | 
					 | 
				
			||||||
 * The other limit that you will hit eventually is shmall which is the
 | 
					 | 
				
			||||||
 * total amount of shared memory in pages. To set it to 16GB on a system
 | 
					 | 
				
			||||||
 * with a 4kB pagesize do:
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * echo 4194304 > /proc/sys/kernel/shmall
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <sys/types.h>
 | 
					 | 
				
			||||||
#include <sys/ipc.h>
 | 
					 | 
				
			||||||
#include <sys/shm.h>
 | 
					 | 
				
			||||||
#include <sys/mman.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef SHM_HUGETLB
 | 
					 | 
				
			||||||
#define SHM_HUGETLB 04000
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define LENGTH (256UL*1024*1024)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define dprintf(x)  printf(x)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define ADDR (void *)(0x0UL)	/* let kernel choose address */
 | 
					 | 
				
			||||||
#define SHMAT_FLAGS (0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	int shmid;
 | 
					 | 
				
			||||||
	unsigned long i;
 | 
					 | 
				
			||||||
	char *shmaddr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if ((shmid = shmget(2, LENGTH,
 | 
					 | 
				
			||||||
			    SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
 | 
					 | 
				
			||||||
		perror("shmget");
 | 
					 | 
				
			||||||
		exit(1);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	printf("shmid: 0x%x\n", shmid);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
 | 
					 | 
				
			||||||
	if (shmaddr == (char *)-1) {
 | 
					 | 
				
			||||||
		perror("Shared memory attach failure");
 | 
					 | 
				
			||||||
		shmctl(shmid, IPC_RMID, NULL);
 | 
					 | 
				
			||||||
		exit(2);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	printf("shmaddr: %p\n", shmaddr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	dprintf("Starting the writes:\n");
 | 
					 | 
				
			||||||
	for (i = 0; i < LENGTH; i++) {
 | 
					 | 
				
			||||||
		shmaddr[i] = (char)(i);
 | 
					 | 
				
			||||||
		if (!(i % (1024 * 1024)))
 | 
					 | 
				
			||||||
			dprintf(".");
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	dprintf("\n");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	dprintf("Starting the Check...");
 | 
					 | 
				
			||||||
	for (i = 0; i < LENGTH; i++)
 | 
					 | 
				
			||||||
		if (shmaddr[i] != (char)i)
 | 
					 | 
				
			||||||
			printf("\nIndex %lu mismatched\n", i);
 | 
					 | 
				
			||||||
	dprintf("Done.\n");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (shmdt((const void *)shmaddr) != 0) {
 | 
					 | 
				
			||||||
		perror("Detach failure");
 | 
					 | 
				
			||||||
		shmctl(shmid, IPC_RMID, NULL);
 | 
					 | 
				
			||||||
		exit(3);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	shmctl(shmid, IPC_RMID, NULL);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
*******************************************************************
 | 
					*******************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Example of using huge page memory in a user application using the mmap
 | 
					 * hugepage-mmap:  see Documentation/vm/hugepage-mmap.c
 | 
				
			||||||
 * system call.  Before running this application, make sure that the
 | 
					 | 
				
			||||||
 * administrator has mounted the hugetlbfs filesystem (on some directory
 | 
					 | 
				
			||||||
 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
 | 
					 | 
				
			||||||
 * example, the app is requesting memory of size 256MB that is backed by
 | 
					 | 
				
			||||||
 * huge pages.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
 | 
					 | 
				
			||||||
 * huge pages.  That means that if one requires a fixed address, a huge page
 | 
					 | 
				
			||||||
 * aligned address starting with 0x800000... will be required.  If a fixed
 | 
					 | 
				
			||||||
 * address is not required, the kernel will select an address in the proper
 | 
					 | 
				
			||||||
 * range.
 | 
					 | 
				
			||||||
 * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#include <stdlib.h>
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <unistd.h>
 | 
					 | 
				
			||||||
#include <sys/mman.h>
 | 
					 | 
				
			||||||
#include <fcntl.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define FILE_NAME "/mnt/hugepagefile"
 | 
					 | 
				
			||||||
#define LENGTH (256UL*1024*1024)
 | 
					 | 
				
			||||||
#define PROTECTION (PROT_READ | PROT_WRITE)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define ADDR (void *)(0x0UL)	/* let kernel choose address */
 | 
					 | 
				
			||||||
#define FLAGS (MAP_SHARED)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void check_bytes(char *addr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	printf("First hex is %x\n", *((unsigned int *)addr));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void write_bytes(char *addr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (i = 0; i < LENGTH; i++)
 | 
					 | 
				
			||||||
		*(addr + i) = (char)i;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void read_bytes(char *addr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long i;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	check_bytes(addr);
 | 
					 | 
				
			||||||
	for (i = 0; i < LENGTH; i++)
 | 
					 | 
				
			||||||
		if (*(addr + i) != (char)i) {
 | 
					 | 
				
			||||||
			printf("Mismatch at %lu\n", i);
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	void *addr;
 | 
					 | 
				
			||||||
	int fd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
 | 
					 | 
				
			||||||
	if (fd < 0) {
 | 
					 | 
				
			||||||
		perror("Open failed");
 | 
					 | 
				
			||||||
		exit(1);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
 | 
					 | 
				
			||||||
	if (addr == MAP_FAILED) {
 | 
					 | 
				
			||||||
		perror("mmap");
 | 
					 | 
				
			||||||
		unlink(FILE_NAME);
 | 
					 | 
				
			||||||
		exit(1);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	printf("Returned address is %p\n", addr);
 | 
					 | 
				
			||||||
	check_bytes(addr);
 | 
					 | 
				
			||||||
	write_bytes(addr);
 | 
					 | 
				
			||||||
	read_bytes(addr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	munmap(addr, LENGTH);
 | 
					 | 
				
			||||||
	close(fd);
 | 
					 | 
				
			||||||
	unlink(FILE_NAME);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,12 +31,12 @@
 | 
				
			||||||
#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
 | 
					#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void check_bytes(char *addr)
 | 
					static void check_bytes(char *addr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	printf("First hex is %x\n", *((unsigned int *)addr));
 | 
						printf("First hex is %x\n", *((unsigned int *)addr));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void write_bytes(char *addr)
 | 
					static void write_bytes(char *addr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long i;
 | 
						unsigned long i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -44,7 +44,7 @@ void write_bytes(char *addr)
 | 
				
			||||||
		*(addr + i) = (char)i;
 | 
							*(addr + i) = (char)i;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void read_bytes(char *addr)
 | 
					static void read_bytes(char *addr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long i;
 | 
						unsigned long i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -41,6 +41,7 @@ Possible debug options are
 | 
				
			||||||
	P		Poisoning (object and padding)
 | 
						P		Poisoning (object and padding)
 | 
				
			||||||
	U		User tracking (free and alloc)
 | 
						U		User tracking (free and alloc)
 | 
				
			||||||
	T		Trace (please only use on single slabs)
 | 
						T		Trace (please only use on single slabs)
 | 
				
			||||||
 | 
						A		Toggle failslab filter mark for the cache
 | 
				
			||||||
	O		Switch debugging off for caches that would have
 | 
						O		Switch debugging off for caches that would have
 | 
				
			||||||
			caused higher minimum slab orders
 | 
								caused higher minimum slab orders
 | 
				
			||||||
	-		Switch all debugging off (useful if the kernel is
 | 
						-		Switch all debugging off (useful if the kernel is
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -63,9 +63,9 @@ way to perform a busy wait is:
 | 
				
			||||||
        cpu_relax();
 | 
					        cpu_relax();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The cpu_relax() call can lower CPU power consumption or yield to a
 | 
					The cpu_relax() call can lower CPU power consumption or yield to a
 | 
				
			||||||
hyperthreaded twin processor; it also happens to serve as a memory barrier,
 | 
					hyperthreaded twin processor; it also happens to serve as a compiler
 | 
				
			||||||
so, once again, volatile is unnecessary.  Of course, busy-waiting is
 | 
					barrier, so, once again, volatile is unnecessary.  Of course, busy-
 | 
				
			||||||
generally an anti-social act to begin with.
 | 
					waiting is generally an anti-social act to begin with.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
There are still a few rare situations where volatile makes sense in the
 | 
					There are still a few rare situations where volatile makes sense in the
 | 
				
			||||||
kernel:
 | 
					kernel:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,95 +0,0 @@
 | 
				
			||||||
Running Linux on the Voyager Architecture
 | 
					 | 
				
			||||||
=========================================
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For full details and current project status, see
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
http://www.hansenpartnership.com/voyager
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The voyager architecture was designed by NCR in the mid 80s to be a
 | 
					 | 
				
			||||||
fully SMP capable RAS computing architecture built around intel's 486
 | 
					 | 
				
			||||||
chip set.  The voyager came in three levels of architectural
 | 
					 | 
				
			||||||
sophistication: 3,4 and 5 --- 1 and 2 never made it out of prototype.
 | 
					 | 
				
			||||||
The linux patches support only the Level 5 voyager architecture (any
 | 
					 | 
				
			||||||
machine class 3435 and above).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The Voyager Architecture
 | 
					 | 
				
			||||||
------------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Voyager machines consist of a Baseboard with a 386 diagnostic
 | 
					 | 
				
			||||||
processor, a Power Supply Interface (PSI) a Primary and possibly
 | 
					 | 
				
			||||||
Secondary Microchannel bus and between 2 and 20 voyager slots.  The
 | 
					 | 
				
			||||||
voyager slots can be populated with memory and cpu cards (up to 4GB
 | 
					 | 
				
			||||||
memory and from 1 486 to 32 Pentium Pro processors).  Internally, the
 | 
					 | 
				
			||||||
voyager has a dual arbitrated system bus and a configuration and test
 | 
					 | 
				
			||||||
bus (CAT).  The voyager bus speed is 40MHz.  Therefore (since all
 | 
					 | 
				
			||||||
voyager cards are dual ported for each system bus) the maximum
 | 
					 | 
				
			||||||
transfer rate is 320Mb/s but only if you have your slot configuration
 | 
					 | 
				
			||||||
tuned (only memory cards can communicate with both busses at once, CPU
 | 
					 | 
				
			||||||
cards utilise them one at a time).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Voyager SMP
 | 
					 | 
				
			||||||
-----------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Since voyager was the first intel based SMP system, it is slightly
 | 
					 | 
				
			||||||
more primitive than the Intel IO-APIC approach to SMP.  Voyager allows
 | 
					 | 
				
			||||||
arbitrary interrupt routing (including processor affinity routing) of
 | 
					 | 
				
			||||||
all 16 PC type interrupts.  However it does this by using a modified
 | 
					 | 
				
			||||||
5259 master/slave chip set instead of an APIC bus.  Additionally,
 | 
					 | 
				
			||||||
voyager supports Cross Processor Interrupts (CPI) equivalent to the
 | 
					 | 
				
			||||||
APIC IPIs.  There are two routed voyager interrupt lines provided to
 | 
					 | 
				
			||||||
each slot.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Processor Cards
 | 
					 | 
				
			||||||
---------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
These come in single, dyadic and quad configurations (the quads are
 | 
					 | 
				
			||||||
problematic--see later).  The maximum configuration is 8 quad cards
 | 
					 | 
				
			||||||
for 32 way SMP.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Quad Processors
 | 
					 | 
				
			||||||
---------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Because voyager only supplies two interrupt lines to each Processor
 | 
					 | 
				
			||||||
card, the Quad processors have to be configured (and Bootstrapped) in
 | 
					 | 
				
			||||||
as a pair of Master/Slave processors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In fact, most Quad cards only accept one VIC interrupt line, so they
 | 
					 | 
				
			||||||
have one interrupt handling processor (called the VIC extended
 | 
					 | 
				
			||||||
processor) and three non-interrupt handling processors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Current Status
 | 
					 | 
				
			||||||
--------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The System will boot on Mono, Dyad and Quad cards.  There was
 | 
					 | 
				
			||||||
originally a Quad boot problem which has been fixed by proper gdt
 | 
					 | 
				
			||||||
alignment in the initial boot loader.  If you still cannot get your
 | 
					 | 
				
			||||||
voyager system to boot, email me at:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<J.E.J.Bottomley@HansenPartnership.com>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The Quad cards now support using the separate Quad CPI vectors instead
 | 
					 | 
				
			||||||
of going through the VIC mailbox system.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The Level 4 architecture (3430 and 3360 Machines) should also work
 | 
					 | 
				
			||||||
fine.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Dump Switch
 | 
					 | 
				
			||||||
-----------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The voyager dump switch sends out a broadcast NMI which the voyager
 | 
					 | 
				
			||||||
code intercepts and does a task dump.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Power Switch
 | 
					 | 
				
			||||||
------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The front panel power switch is intercepted by the kernel and should
 | 
					 | 
				
			||||||
cause a system shutdown and power off.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
A Note About Mixed CPU Systems
 | 
					 | 
				
			||||||
------------------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Linux isn't designed to handle mixed CPU systems very well.  In order
 | 
					 | 
				
			||||||
to get everything going you *must* make sure that your lowest
 | 
					 | 
				
			||||||
capability CPU is used for booting.  Also, mixing CPU classes
 | 
					 | 
				
			||||||
(e.g. 486 and 586) is really not going to work very well at all.
 | 
					 | 
				
			||||||
							
								
								
									
										157
									
								
								MAINTAINERS
									
										
									
									
									
								
							
							
						
						
									
										157
									
								
								MAINTAINERS
									
										
									
									
									
								
							| 
						 | 
					@ -71,6 +71,7 @@ Descriptions of section entries:
 | 
				
			||||||
	M: Mail patches to: FullName <address@domain>
 | 
						M: Mail patches to: FullName <address@domain>
 | 
				
			||||||
	L: Mailing list that is relevant to this area
 | 
						L: Mailing list that is relevant to this area
 | 
				
			||||||
	W: Web-page with status/info
 | 
						W: Web-page with status/info
 | 
				
			||||||
 | 
						Q: Patchwork web based patch tracking system site
 | 
				
			||||||
	T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 | 
						T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 | 
				
			||||||
	S: Status, one of the following:
 | 
						S: Status, one of the following:
 | 
				
			||||||
	   Supported:	Someone is actually paid to look after this.
 | 
						   Supported:	Someone is actually paid to look after this.
 | 
				
			||||||
| 
						 | 
					@ -182,6 +183,7 @@ M:	Ron Minnich <rminnich@sandia.gov>
 | 
				
			||||||
M:	Latchesar Ionkov <lucho@ionkov.net>
 | 
					M:	Latchesar Ionkov <lucho@ionkov.net>
 | 
				
			||||||
L:	v9fs-developer@lists.sourceforge.net
 | 
					L:	v9fs-developer@lists.sourceforge.net
 | 
				
			||||||
W:	http://swik.net/v9fs
 | 
					W:	http://swik.net/v9fs
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/v9fs-devel/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/filesystems/9p.txt
 | 
					F:	Documentation/filesystems/9p.txt
 | 
				
			||||||
| 
						 | 
					@ -238,6 +240,7 @@ ACPI
 | 
				
			||||||
M:	Len Brown <lenb@kernel.org>
 | 
					M:	Len Brown <lenb@kernel.org>
 | 
				
			||||||
L:	linux-acpi@vger.kernel.org
 | 
					L:	linux-acpi@vger.kernel.org
 | 
				
			||||||
W:	http://www.lesswatts.org/projects/acpi/
 | 
					W:	http://www.lesswatts.org/projects/acpi/
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-acpi/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	drivers/acpi/
 | 
					F:	drivers/acpi/
 | 
				
			||||||
| 
						 | 
					@ -428,7 +431,6 @@ P:	Jordan Crouse
 | 
				
			||||||
L:	linux-geode@lists.infradead.org (moderated for non-subscribers)
 | 
					L:	linux-geode@lists.infradead.org (moderated for non-subscribers)
 | 
				
			||||||
W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 | 
					W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	arch/x86/kernel/geode_32.c
 | 
					 | 
				
			||||||
F:	drivers/char/hw_random/geode-rng.c
 | 
					F:	drivers/char/hw_random/geode-rng.c
 | 
				
			||||||
F:	drivers/crypto/geode*
 | 
					F:	drivers/crypto/geode*
 | 
				
			||||||
F:	drivers/video/geode/
 | 
					F:	drivers/video/geode/
 | 
				
			||||||
| 
						 | 
					@ -664,6 +666,12 @@ T:	git://git.pengutronix.de/git/imx/linux-2.6.git
 | 
				
			||||||
F:	arch/arm/mach-mx*/
 | 
					F:	arch/arm/mach-mx*/
 | 
				
			||||||
F:	arch/arm/plat-mxc/
 | 
					F:	arch/arm/plat-mxc/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ARM/FREESCALE IMX51
 | 
				
			||||||
 | 
					M:	Amit Kucheria <amit.kucheria@canonical.com>
 | 
				
			||||||
 | 
					L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	arch/arm/mach-mx5/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ARM/GLOMATION GESBC9312SX MACHINE SUPPORT
 | 
					ARM/GLOMATION GESBC9312SX MACHINE SUPPORT
 | 
				
			||||||
M:	Lennert Buytenhek <kernel@wantstofly.org>
 | 
					M:	Lennert Buytenhek <kernel@wantstofly.org>
 | 
				
			||||||
L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
					L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
				
			||||||
| 
						 | 
					@ -937,6 +945,16 @@ W:	http://www.fluff.org/ben/linux/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	arch/arm/mach-s3c6410/
 | 
					F:	arch/arm/mach-s3c6410/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ARM/SHMOBILE ARM ARCHITECTURE
 | 
				
			||||||
 | 
					M:	Paul Mundt <lethal@linux-sh.org>
 | 
				
			||||||
 | 
					M:	Magnus Damm <magnus.damm@gmail.com>
 | 
				
			||||||
 | 
					L:	linux-sh@vger.kernel.org
 | 
				
			||||||
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/genesis-2.6.git
 | 
				
			||||||
 | 
					W:	http://oss.renesas.com
 | 
				
			||||||
 | 
					S:	Supported
 | 
				
			||||||
 | 
					F:	arch/arm/mach-shmobile/
 | 
				
			||||||
 | 
					F:	drivers/sh/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT
 | 
					ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT
 | 
				
			||||||
M:	Lennert Buytenhek <kernel@wantstofly.org>
 | 
					M:	Lennert Buytenhek <kernel@wantstofly.org>
 | 
				
			||||||
L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
					L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
				
			||||||
| 
						 | 
					@ -966,6 +984,13 @@ W:	http://www.arm.linux.org.uk/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	arch/arm/vfp/
 | 
					F:	arch/arm/vfp/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ASC7621 HARDWARE MONITOR DRIVER
 | 
				
			||||||
 | 
					M:	George Joseph <george.joseph@fairview5.com>
 | 
				
			||||||
 | 
					L:	lm-sensors@lm-sensors.org
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	Documentation/hwmon/asc7621
 | 
				
			||||||
 | 
					F:	drivers/hwmon/asc7621.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ASUS ACPI EXTRAS DRIVER
 | 
					ASUS ACPI EXTRAS DRIVER
 | 
				
			||||||
M:	Corentin Chary <corentincj@iksaif.net>
 | 
					M:	Corentin Chary <corentincj@iksaif.net>
 | 
				
			||||||
M:	Karol Kozimor <sziwan@users.sourceforge.net>
 | 
					M:	Karol Kozimor <sziwan@users.sourceforge.net>
 | 
				
			||||||
| 
						 | 
					@ -1226,6 +1251,13 @@ W:	http://blackfin.uclinux.org
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	drivers/rtc/rtc-bfin.c
 | 
					F:	drivers/rtc/rtc-bfin.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BLACKFIN SDH DRIVER
 | 
				
			||||||
 | 
					M:	Cliff Cai <cliff.cai@analog.com>
 | 
				
			||||||
 | 
					L:	uclinux-dist-devel@blackfin.uclinux.org
 | 
				
			||||||
 | 
					W:	http://blackfin.uclinux.org
 | 
				
			||||||
 | 
					S:	Supported
 | 
				
			||||||
 | 
					F:	drivers/mmc/host/bfin_sdh.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BLACKFIN SERIAL DRIVER
 | 
					BLACKFIN SERIAL DRIVER
 | 
				
			||||||
M:	Sonic Zhang <sonic.zhang@analog.com>
 | 
					M:	Sonic Zhang <sonic.zhang@analog.com>
 | 
				
			||||||
L:	uclinux-dist-devel@blackfin.uclinux.org
 | 
					L:	uclinux-dist-devel@blackfin.uclinux.org
 | 
				
			||||||
| 
						 | 
					@ -1332,6 +1364,7 @@ BTRFS FILE SYSTEM
 | 
				
			||||||
M:	Chris Mason <chris.mason@oracle.com>
 | 
					M:	Chris Mason <chris.mason@oracle.com>
 | 
				
			||||||
L:	linux-btrfs@vger.kernel.org
 | 
					L:	linux-btrfs@vger.kernel.org
 | 
				
			||||||
W:	http://btrfs.wiki.kernel.org/
 | 
					W:	http://btrfs.wiki.kernel.org/
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-btrfs/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/filesystems/btrfs.txt
 | 
					F:	Documentation/filesystems/btrfs.txt
 | 
				
			||||||
| 
						 | 
					@ -1372,20 +1405,30 @@ F:	arch/x86/include/asm/calgary.h
 | 
				
			||||||
F:	arch/x86/include/asm/tce.h
 | 
					F:	arch/x86/include/asm/tce.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CAN NETWORK LAYER
 | 
					CAN NETWORK LAYER
 | 
				
			||||||
M:	Urs Thuermann <urs.thuermann@volkswagen.de>
 | 
					M:	Oliver Hartkopp <socketcan@hartkopp.net>
 | 
				
			||||||
M:	Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
 | 
					M:	Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
 | 
				
			||||||
L:	socketcan-core@lists.berlios.de (subscribers-only)
 | 
					M:	Urs Thuermann <urs.thuermann@volkswagen.de>
 | 
				
			||||||
 | 
					L:	socketcan-core@lists.berlios.de
 | 
				
			||||||
 | 
					L:	netdev@vger.kernel.org
 | 
				
			||||||
W:	http://developer.berlios.de/projects/socketcan/
 | 
					W:	http://developer.berlios.de/projects/socketcan/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	drivers/net/can/
 | 
					F:	net/can/
 | 
				
			||||||
F:	include/linux/can/
 | 
					 | 
				
			||||||
F:	include/linux/can.h
 | 
					F:	include/linux/can.h
 | 
				
			||||||
 | 
					F:	include/linux/can/core.h
 | 
				
			||||||
 | 
					F:	include/linux/can/bcm.h
 | 
				
			||||||
 | 
					F:	include/linux/can/raw.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CAN NETWORK DRIVERS
 | 
					CAN NETWORK DRIVERS
 | 
				
			||||||
M:	Wolfgang Grandegger <wg@grandegger.com>
 | 
					M:	Wolfgang Grandegger <wg@grandegger.com>
 | 
				
			||||||
L:	socketcan-core@lists.berlios.de (subscribers-only)
 | 
					L:	socketcan-core@lists.berlios.de
 | 
				
			||||||
 | 
					L:	netdev@vger.kernel.org
 | 
				
			||||||
W:	http://developer.berlios.de/projects/socketcan/
 | 
					W:	http://developer.berlios.de/projects/socketcan/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	drivers/net/can/
 | 
				
			||||||
 | 
					F:	include/linux/can/dev.h
 | 
				
			||||||
 | 
					F:	include/linux/can/error.h
 | 
				
			||||||
 | 
					F:	include/linux/can/netlink.h
 | 
				
			||||||
 | 
					F:	include/linux/can/platform/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CELL BROADBAND ENGINE ARCHITECTURE
 | 
					CELL BROADBAND ENGINE ARCHITECTURE
 | 
				
			||||||
M:	Arnd Bergmann <arnd@arndb.de>
 | 
					M:	Arnd Bergmann <arnd@arndb.de>
 | 
				
			||||||
| 
						 | 
					@ -1398,6 +1441,15 @@ F:	arch/powerpc/include/asm/spu*.h
 | 
				
			||||||
F:	arch/powerpc/oprofile/*cell*
 | 
					F:	arch/powerpc/oprofile/*cell*
 | 
				
			||||||
F:	arch/powerpc/platforms/cell/
 | 
					F:	arch/powerpc/platforms/cell/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CEPH DISTRIBUTED FILE SYSTEM CLIENT
 | 
				
			||||||
 | 
					M:	Sage Weil <sage@newdream.net>
 | 
				
			||||||
 | 
					L:	ceph-devel@vger.kernel.org
 | 
				
			||||||
 | 
					W:	http://ceph.newdream.net/
 | 
				
			||||||
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 | 
				
			||||||
 | 
					S:	Supported
 | 
				
			||||||
 | 
					F:	Documentation/filesystems/ceph.txt
 | 
				
			||||||
 | 
					F:	fs/ceph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 | 
					CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 | 
				
			||||||
M:	David Vrabel <david.vrabel@csr.com>
 | 
					M:	David Vrabel <david.vrabel@csr.com>
 | 
				
			||||||
L:	linux-usb@vger.kernel.org
 | 
					L:	linux-usb@vger.kernel.org
 | 
				
			||||||
| 
						 | 
					@ -1496,6 +1548,7 @@ M:	Steve French <sfrench@samba.org>
 | 
				
			||||||
L:	linux-cifs-client@lists.samba.org (moderated for non-subscribers)
 | 
					L:	linux-cifs-client@lists.samba.org (moderated for non-subscribers)
 | 
				
			||||||
L:	samba-technical@lists.samba.org (moderated for non-subscribers)
 | 
					L:	samba-technical@lists.samba.org (moderated for non-subscribers)
 | 
				
			||||||
W:	http://linux-cifs.samba.org/
 | 
					W:	http://linux-cifs.samba.org/
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/linux-cifs-client/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	Documentation/filesystems/cifs.txt
 | 
					F:	Documentation/filesystems/cifs.txt
 | 
				
			||||||
| 
						 | 
					@ -1782,6 +1835,7 @@ DEVICE-MAPPER  (LVM)
 | 
				
			||||||
P:	Alasdair Kergon
 | 
					P:	Alasdair Kergon
 | 
				
			||||||
L:	dm-devel@redhat.com
 | 
					L:	dm-devel@redhat.com
 | 
				
			||||||
W:	http://sources.redhat.com/dm
 | 
					W:	http://sources.redhat.com/dm
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/dm-devel/list/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/device-mapper/
 | 
					F:	Documentation/device-mapper/
 | 
				
			||||||
F:	drivers/md/dm*
 | 
					F:	drivers/md/dm*
 | 
				
			||||||
| 
						 | 
					@ -2095,6 +2149,7 @@ F:	drivers/net/eexpress.*
 | 
				
			||||||
ETHERNET BRIDGE
 | 
					ETHERNET BRIDGE
 | 
				
			||||||
M:	Stephen Hemminger <shemminger@linux-foundation.org>
 | 
					M:	Stephen Hemminger <shemminger@linux-foundation.org>
 | 
				
			||||||
L:	bridge@lists.linux-foundation.org
 | 
					L:	bridge@lists.linux-foundation.org
 | 
				
			||||||
 | 
					L:	netdev@vger.kernel.org
 | 
				
			||||||
W:	http://www.linux-foundation.org/en/Net:Bridge
 | 
					W:	http://www.linux-foundation.org/en/Net:Bridge
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	include/linux/netfilter_bridge/
 | 
					F:	include/linux/netfilter_bridge/
 | 
				
			||||||
| 
						 | 
					@ -2126,6 +2181,7 @@ M:	"Theodore Ts'o" <tytso@mit.edu>
 | 
				
			||||||
M:	Andreas Dilger <adilger@sun.com>
 | 
					M:	Andreas Dilger <adilger@sun.com>
 | 
				
			||||||
L:	linux-ext4@vger.kernel.org
 | 
					L:	linux-ext4@vger.kernel.org
 | 
				
			||||||
W:	http://ext4.wiki.kernel.org
 | 
					W:	http://ext4.wiki.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/linux-ext4/list/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/filesystems/ext4.txt
 | 
					F:	Documentation/filesystems/ext4.txt
 | 
				
			||||||
F:	fs/ext4/
 | 
					F:	fs/ext4/
 | 
				
			||||||
| 
						 | 
					@ -2502,13 +2558,6 @@ L:	linux-parisc@vger.kernel.org
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	sound/parisc/harmony.*
 | 
					F:	sound/parisc/harmony.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HAYES ESP SERIAL DRIVER
 | 
					 | 
				
			||||||
M:	"Andrew J. Robinson" <arobinso@nyx.net>
 | 
					 | 
				
			||||||
W:	http://www.nyx.net/~arobinso
 | 
					 | 
				
			||||||
S:	Maintained
 | 
					 | 
				
			||||||
F:	Documentation/serial/hayes-esp.txt
 | 
					 | 
				
			||||||
F:	drivers/char/esp.c
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
HEWLETT-PACKARD SMART2 RAID DRIVER
 | 
					HEWLETT-PACKARD SMART2 RAID DRIVER
 | 
				
			||||||
M:	Chirag Kantharia <chirag.kantharia@hp.com>
 | 
					M:	Chirag Kantharia <chirag.kantharia@hp.com>
 | 
				
			||||||
L:	iss_storagedev@hp.com
 | 
					L:	iss_storagedev@hp.com
 | 
				
			||||||
| 
						 | 
					@ -2717,6 +2766,7 @@ F:	drivers/scsi/ips.*
 | 
				
			||||||
IDE SUBSYSTEM
 | 
					IDE SUBSYSTEM
 | 
				
			||||||
M:	"David S. Miller" <davem@davemloft.net>
 | 
					M:	"David S. Miller" <davem@davemloft.net>
 | 
				
			||||||
L:	linux-ide@vger.kernel.org
 | 
					L:	linux-ide@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/linux-ide/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/ide/
 | 
					F:	Documentation/ide/
 | 
				
			||||||
| 
						 | 
					@ -2771,6 +2821,7 @@ M:	Sean Hefty <sean.hefty@intel.com>
 | 
				
			||||||
M:	Hal Rosenstock <hal.rosenstock@gmail.com>
 | 
					M:	Hal Rosenstock <hal.rosenstock@gmail.com>
 | 
				
			||||||
L:	linux-rdma@vger.kernel.org
 | 
					L:	linux-rdma@vger.kernel.org
 | 
				
			||||||
W:	http://www.openib.org/
 | 
					W:	http://www.openib.org/
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-rdma/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	Documentation/infiniband/
 | 
					F:	Documentation/infiniband/
 | 
				
			||||||
| 
						 | 
					@ -2790,12 +2841,13 @@ INPUT (KEYBOARD, MOUSE, JOYSTICK, TOUCHSCREEN) DRIVERS
 | 
				
			||||||
M:	Dmitry Torokhov <dmitry.torokhov@gmail.com>
 | 
					M:	Dmitry Torokhov <dmitry.torokhov@gmail.com>
 | 
				
			||||||
M:	Dmitry Torokhov <dtor@mail.ru>
 | 
					M:	Dmitry Torokhov <dtor@mail.ru>
 | 
				
			||||||
L:	linux-input@vger.kernel.org
 | 
					L:	linux-input@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-input/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	drivers/input/
 | 
					F:	drivers/input/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 | 
					INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 | 
				
			||||||
M:	Sylvain Meyer <sylvain.meyer@worldonline.fr>
 | 
					M:	Maik Broemme <mbroemme@plusserver.de>
 | 
				
			||||||
L:	linux-fbdev@vger.kernel.org
 | 
					L:	linux-fbdev@vger.kernel.org
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/fb/intelfb.txt
 | 
					F:	Documentation/fb/intelfb.txt
 | 
				
			||||||
| 
						 | 
					@ -3031,6 +3083,7 @@ F:	include/scsi/*iscsi*
 | 
				
			||||||
ISDN SUBSYSTEM
 | 
					ISDN SUBSYSTEM
 | 
				
			||||||
M:	Karsten Keil <isdn@linux-pingi.de>
 | 
					M:	Karsten Keil <isdn@linux-pingi.de>
 | 
				
			||||||
L:	isdn4linux@listserv.isdn4linux.de (subscribers-only)
 | 
					L:	isdn4linux@listserv.isdn4linux.de (subscribers-only)
 | 
				
			||||||
 | 
					L:	netdev@vger.kernel.org
 | 
				
			||||||
W:	http://www.isdn4linux.de
 | 
					W:	http://www.isdn4linux.de
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kkeil/isdn-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kkeil/isdn-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
| 
						 | 
					@ -3046,6 +3099,13 @@ W:	http://www.melware.de
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	drivers/isdn/hardware/eicon/
 | 
					F:	drivers/isdn/hardware/eicon/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					IT87 HARDWARE MONITORING DRIVER
 | 
				
			||||||
 | 
					M:	Jean Delvare <khali@linux-fr.org>
 | 
				
			||||||
 | 
					L:	lm-sensors@lm-sensors.org
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	Documentation/hwmon/it87
 | 
				
			||||||
 | 
					F:	drivers/hwmon/it87.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
IVTV VIDEO4LINUX DRIVER
 | 
					IVTV VIDEO4LINUX DRIVER
 | 
				
			||||||
M:	Andy Walls <awalls@radix.net>
 | 
					M:	Andy Walls <awalls@radix.net>
 | 
				
			||||||
L:	ivtv-devel@ivtvdriver.org (moderated for non-subscribers)
 | 
					L:	ivtv-devel@ivtvdriver.org (moderated for non-subscribers)
 | 
				
			||||||
| 
						 | 
					@ -3099,6 +3159,7 @@ F:	drivers/hwmon/k8temp.c
 | 
				
			||||||
KCONFIG
 | 
					KCONFIG
 | 
				
			||||||
M:	Roman Zippel <zippel@linux-m68k.org>
 | 
					M:	Roman Zippel <zippel@linux-m68k.org>
 | 
				
			||||||
L:	linux-kbuild@vger.kernel.org
 | 
					L:	linux-kbuild@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-kbuild/list/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/kbuild/kconfig-language.txt
 | 
					F:	Documentation/kbuild/kconfig-language.txt
 | 
				
			||||||
F:	scripts/kconfig/
 | 
					F:	scripts/kconfig/
 | 
				
			||||||
| 
						 | 
					@ -3173,7 +3234,7 @@ F:	arch/x86/include/asm/svm.h
 | 
				
			||||||
F:	arch/x86/kvm/svm.c
 | 
					F:	arch/x86/kvm/svm.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
 | 
					KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
 | 
				
			||||||
M:	Hollis Blanchard <hollisb@us.ibm.com>
 | 
					M:	Alexander Graf <agraf@suse.de>
 | 
				
			||||||
L:	kvm-ppc@vger.kernel.org
 | 
					L:	kvm-ppc@vger.kernel.org
 | 
				
			||||||
W:	http://kvm.qumranet.com
 | 
					W:	http://kvm.qumranet.com
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
| 
						 | 
					@ -3209,6 +3270,16 @@ S:	Maintained
 | 
				
			||||||
F:	include/linux/kexec.h
 | 
					F:	include/linux/kexec.h
 | 
				
			||||||
F:	kernel/kexec.c
 | 
					F:	kernel/kexec.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					KEYS/KEYRINGS:
 | 
				
			||||||
 | 
					M:	David Howells <dhowells@redhat.com>
 | 
				
			||||||
 | 
					L:	keyrings@linux-nfs.org
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	Documentation/keys.txt
 | 
				
			||||||
 | 
					F:	include/linux/key.h
 | 
				
			||||||
 | 
					F:	include/linux/key-type.h
 | 
				
			||||||
 | 
					F:	include/keys/
 | 
				
			||||||
 | 
					F:	security/keys/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
KGDB
 | 
					KGDB
 | 
				
			||||||
M:	Jason Wessel <jason.wessel@windriver.com>
 | 
					M:	Jason Wessel <jason.wessel@windriver.com>
 | 
				
			||||||
L:	kgdb-bugreport@lists.sourceforge.net
 | 
					L:	kgdb-bugreport@lists.sourceforge.net
 | 
				
			||||||
| 
						 | 
					@ -3312,6 +3383,7 @@ M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
 | 
				
			||||||
M:	Paul Mackerras <paulus@samba.org>
 | 
					M:	Paul Mackerras <paulus@samba.org>
 | 
				
			||||||
W:	http://www.penguinppc.org/
 | 
					W:	http://www.penguinppc.org/
 | 
				
			||||||
L:	linuxppc-dev@ozlabs.org
 | 
					L:	linuxppc-dev@ozlabs.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/linuxppc-dev/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	Documentation/powerpc/
 | 
					F:	Documentation/powerpc/
 | 
				
			||||||
| 
						 | 
					@ -3432,6 +3504,13 @@ S:	Maintained
 | 
				
			||||||
F:	Documentation/ldm.txt
 | 
					F:	Documentation/ldm.txt
 | 
				
			||||||
F:	fs/partitions/ldm.*
 | 
					F:	fs/partitions/ldm.*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LogFS
 | 
				
			||||||
 | 
					M:	Joern Engel <joern@logfs.org>
 | 
				
			||||||
 | 
					L:	logfs@logfs.org
 | 
				
			||||||
 | 
					W:	logfs.org
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	fs/logfs/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 | 
					LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 | 
				
			||||||
M:	Eric Moore <Eric.Moore@lsi.com>
 | 
					M:	Eric Moore <Eric.Moore@lsi.com>
 | 
				
			||||||
M:	support@lsi.com
 | 
					M:	support@lsi.com
 | 
				
			||||||
| 
						 | 
					@ -3568,6 +3647,7 @@ M:	Mauro Carvalho Chehab <mchehab@infradead.org>
 | 
				
			||||||
P:	LinuxTV.org Project
 | 
					P:	LinuxTV.org Project
 | 
				
			||||||
L:	linux-media@vger.kernel.org
 | 
					L:	linux-media@vger.kernel.org
 | 
				
			||||||
W:	http://linuxtv.org
 | 
					W:	http://linuxtv.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-media/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/dvb/
 | 
					F:	Documentation/dvb/
 | 
				
			||||||
| 
						 | 
					@ -3595,7 +3675,7 @@ F:	mm/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MEMORY RESOURCE CONTROLLER
 | 
					MEMORY RESOURCE CONTROLLER
 | 
				
			||||||
M:	Balbir Singh <balbir@linux.vnet.ibm.com>
 | 
					M:	Balbir Singh <balbir@linux.vnet.ibm.com>
 | 
				
			||||||
M:	Pavel Emelyanov <xemul@openvz.org>
 | 
					M:	Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
 | 
				
			||||||
M:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
 | 
					M:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
 | 
				
			||||||
L:	linux-mm@kvack.org
 | 
					L:	linux-mm@kvack.org
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
| 
						 | 
					@ -3603,8 +3683,9 @@ F:	mm/memcontrol.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MEMORY TECHNOLOGY DEVICES (MTD)
 | 
					MEMORY TECHNOLOGY DEVICES (MTD)
 | 
				
			||||||
M:	David Woodhouse <dwmw2@infradead.org>
 | 
					M:	David Woodhouse <dwmw2@infradead.org>
 | 
				
			||||||
W:	http://www.linux-mtd.infradead.org/
 | 
					 | 
				
			||||||
L:	linux-mtd@lists.infradead.org
 | 
					L:	linux-mtd@lists.infradead.org
 | 
				
			||||||
 | 
					W:	http://www.linux-mtd.infradead.org/
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
 | 
				
			||||||
T:	git git://git.infradead.org/mtd-2.6.git
 | 
					T:	git git://git.infradead.org/mtd-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	drivers/mtd/
 | 
					F:	drivers/mtd/
 | 
				
			||||||
| 
						 | 
					@ -3864,6 +3945,7 @@ S:	Maintained
 | 
				
			||||||
NETWORKING [WIRELESS]
 | 
					NETWORKING [WIRELESS]
 | 
				
			||||||
M:	"John W. Linville" <linville@tuxdriver.com>
 | 
					M:	"John W. Linville" <linville@tuxdriver.com>
 | 
				
			||||||
L:	linux-wireless@vger.kernel.org
 | 
					L:	linux-wireless@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-wireless/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	net/mac80211/
 | 
					F:	net/mac80211/
 | 
				
			||||||
| 
						 | 
					@ -3956,6 +4038,7 @@ M:	Tony Lindgren <tony@atomide.com>
 | 
				
			||||||
L:	linux-omap@vger.kernel.org
 | 
					L:	linux-omap@vger.kernel.org
 | 
				
			||||||
W:	http://www.muru.com/linux/omap/
 | 
					W:	http://www.muru.com/linux/omap/
 | 
				
			||||||
W:	http://linux.omap.com/
 | 
					W:	http://linux.omap.com/
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-omap/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	arch/arm/*omap*/
 | 
					F:	arch/arm/*omap*/
 | 
				
			||||||
| 
						 | 
					@ -4182,6 +4265,7 @@ M:	Helge Deller <deller@gmx.de>
 | 
				
			||||||
M:	"James E.J. Bottomley" <jejb@parisc-linux.org>
 | 
					M:	"James E.J. Bottomley" <jejb@parisc-linux.org>
 | 
				
			||||||
L:	linux-parisc@vger.kernel.org
 | 
					L:	linux-parisc@vger.kernel.org
 | 
				
			||||||
W:	http://www.parisc-linux.org/
 | 
					W:	http://www.parisc-linux.org/
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-parisc/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kyle/parisc-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kyle/parisc-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	arch/parisc/
 | 
					F:	arch/parisc/
 | 
				
			||||||
| 
						 | 
					@ -4224,6 +4308,7 @@ F:	Documentation/powerpc/eeh-pci-error-recovery.txt
 | 
				
			||||||
PCI SUBSYSTEM
 | 
					PCI SUBSYSTEM
 | 
				
			||||||
M:	Jesse Barnes <jbarnes@virtuousgeek.org>
 | 
					M:	Jesse Barnes <jbarnes@virtuousgeek.org>
 | 
				
			||||||
L:	linux-pci@vger.kernel.org
 | 
					L:	linux-pci@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-pci/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6.git
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	Documentation/PCI/
 | 
					F:	Documentation/PCI/
 | 
				
			||||||
| 
						 | 
					@ -4262,10 +4347,13 @@ PERFORMANCE EVENTS SUBSYSTEM
 | 
				
			||||||
M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 | 
					M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 | 
				
			||||||
M:	Paul Mackerras <paulus@samba.org>
 | 
					M:	Paul Mackerras <paulus@samba.org>
 | 
				
			||||||
M:	Ingo Molnar <mingo@elte.hu>
 | 
					M:	Ingo Molnar <mingo@elte.hu>
 | 
				
			||||||
 | 
					M:	Arnaldo Carvalho de Melo <acme@redhat.com>
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	kernel/perf_event.c
 | 
					F:	kernel/perf_event.c
 | 
				
			||||||
F:	include/linux/perf_event.h
 | 
					F:	include/linux/perf_event.h
 | 
				
			||||||
F:	arch/*/*/kernel/perf_event.c
 | 
					F:	arch/*/kernel/perf_event.c
 | 
				
			||||||
 | 
					F:	arch/*/kernel/*/perf_event.c
 | 
				
			||||||
 | 
					F:	arch/*/kernel/*/*/perf_event.c
 | 
				
			||||||
F:	arch/*/include/asm/perf_event.h
 | 
					F:	arch/*/include/asm/perf_event.h
 | 
				
			||||||
F:	arch/*/lib/perf_event.c
 | 
					F:	arch/*/lib/perf_event.c
 | 
				
			||||||
F:	arch/*/kernel/perf_callchain.c
 | 
					F:	arch/*/kernel/perf_callchain.c
 | 
				
			||||||
| 
						 | 
					@ -4462,6 +4550,13 @@ L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MMP2 SUPPORT (aka ARMADA610)
 | 
				
			||||||
 | 
					M:	Haojian Zhuang <haojian.zhuang@marvell.com>
 | 
				
			||||||
 | 
					M:	Eric Miao <eric.y.miao@gmail.com>
 | 
				
			||||||
 | 
					L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 | 
				
			||||||
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ycmiao/pxa-linux-2.6.git
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PXA MMCI DRIVER
 | 
					PXA MMCI DRIVER
 | 
				
			||||||
S:	Orphan
 | 
					S:	Orphan
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4599,6 +4694,7 @@ F:	include/linux/rtc.h
 | 
				
			||||||
REAL TIME CLOCK (RTC) SUBSYSTEM
 | 
					REAL TIME CLOCK (RTC) SUBSYSTEM
 | 
				
			||||||
M:	Alessandro Zummo <a.zummo@towertech.it>
 | 
					M:	Alessandro Zummo <a.zummo@towertech.it>
 | 
				
			||||||
L:	rtc-linux@googlegroups.com
 | 
					L:	rtc-linux@googlegroups.com
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/rtc-linux/list/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/rtc.txt
 | 
					F:	Documentation/rtc.txt
 | 
				
			||||||
F:	drivers/rtc/
 | 
					F:	drivers/rtc/
 | 
				
			||||||
| 
						 | 
					@ -4966,6 +5062,7 @@ F:	drivers/*/*/*s3c2410*
 | 
				
			||||||
TI DAVINCI MACHINE SUPPORT
 | 
					TI DAVINCI MACHINE SUPPORT
 | 
				
			||||||
P:	Kevin Hilman
 | 
					P:	Kevin Hilman
 | 
				
			||||||
M:	davinci-linux-open-source@linux.davincidsp.com
 | 
					M:	davinci-linux-open-source@linux.davincidsp.com
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-davinci/list/
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	arch/arm/mach-davinci
 | 
					F:	arch/arm/mach-davinci
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5131,11 +5228,27 @@ F:	include/sound/soc*
 | 
				
			||||||
SPARC + UltraSPARC (sparc/sparc64)
 | 
					SPARC + UltraSPARC (sparc/sparc64)
 | 
				
			||||||
M:	"David S. Miller" <davem@davemloft.net>
 | 
					M:	"David S. Miller" <davem@davemloft.net>
 | 
				
			||||||
L:	sparclinux@vger.kernel.org
 | 
					L:	sparclinux@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.ozlabs.org/project/sparclinux/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6.git
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6.git
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	arch/sparc/
 | 
					F:	arch/sparc/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SPARC SERIAL DRIVERS
 | 
				
			||||||
 | 
					M:	"David S. Miller" <davem@davemloft.net>
 | 
				
			||||||
 | 
					L:	sparclinux@vger.kernel.org
 | 
				
			||||||
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6.git
 | 
				
			||||||
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6.git
 | 
				
			||||||
 | 
					S:	Maintained
 | 
				
			||||||
 | 
					F:	drivers/serial/suncore.c
 | 
				
			||||||
 | 
					F:	drivers/serial/suncore.h
 | 
				
			||||||
 | 
					F:	drivers/serial/sunhv.c
 | 
				
			||||||
 | 
					F:	drivers/serial/sunsab.c
 | 
				
			||||||
 | 
					F:	drivers/serial/sunsab.h
 | 
				
			||||||
 | 
					F:	drivers/serial/sunsu.c
 | 
				
			||||||
 | 
					F:	drivers/serial/sunzilog.c
 | 
				
			||||||
 | 
					F:	drivers/serial/sunzilog.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SPECIALIX IO8+ MULTIPORT SERIAL CARD DRIVER
 | 
					SPECIALIX IO8+ MULTIPORT SERIAL CARD DRIVER
 | 
				
			||||||
M:	Roger Wolff <R.E.Wolff@BitWizard.nl>
 | 
					M:	Roger Wolff <R.E.Wolff@BitWizard.nl>
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
| 
						 | 
					@ -5146,6 +5259,7 @@ SPI SUBSYSTEM
 | 
				
			||||||
M:	David Brownell <dbrownell@users.sourceforge.net>
 | 
					M:	David Brownell <dbrownell@users.sourceforge.net>
 | 
				
			||||||
M:	Grant Likely <grant.likely@secretlab.ca>
 | 
					M:	Grant Likely <grant.likely@secretlab.ca>
 | 
				
			||||||
L:	spi-devel-general@lists.sourceforge.net
 | 
					L:	spi-devel-general@lists.sourceforge.net
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/spi-devel-general/list/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/spi/
 | 
					F:	Documentation/spi/
 | 
				
			||||||
F:	drivers/spi/
 | 
					F:	drivers/spi/
 | 
				
			||||||
| 
						 | 
					@ -5201,7 +5315,7 @@ F:	drivers/net/starfire*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STARMODE RADIO IP (STRIP) PROTOCOL DRIVER
 | 
					STARMODE RADIO IP (STRIP) PROTOCOL DRIVER
 | 
				
			||||||
S:	Orphan
 | 
					S:	Orphan
 | 
				
			||||||
F:	drivers/net/wireless/strip.c
 | 
					F:	drivers/staging/strip/strip.c
 | 
				
			||||||
F:	include/linux/if_strip.h
 | 
					F:	include/linux/if_strip.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STRADIS MPEG-2 DECODER DRIVER
 | 
					STRADIS MPEG-2 DECODER DRIVER
 | 
				
			||||||
| 
						 | 
					@ -5222,6 +5336,7 @@ SUPERH
 | 
				
			||||||
M:	Paul Mundt <lethal@linux-sh.org>
 | 
					M:	Paul Mundt <lethal@linux-sh.org>
 | 
				
			||||||
L:	linux-sh@vger.kernel.org
 | 
					L:	linux-sh@vger.kernel.org
 | 
				
			||||||
W:	http://www.linux-sh.org
 | 
					W:	http://www.linux-sh.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/linux-sh/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git
 | 
				
			||||||
S:	Supported
 | 
					S:	Supported
 | 
				
			||||||
F:	Documentation/sh/
 | 
					F:	Documentation/sh/
 | 
				
			||||||
| 
						 | 
					@ -5319,7 +5434,6 @@ S:	Maintained
 | 
				
			||||||
F:	sound/soc/codecs/twl4030*
 | 
					F:	sound/soc/codecs/twl4030*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TIPC NETWORK LAYER
 | 
					TIPC NETWORK LAYER
 | 
				
			||||||
M:	Per Liden <per.liden@ericsson.com>
 | 
					 | 
				
			||||||
M:	Jon Maloy <jon.maloy@ericsson.com>
 | 
					M:	Jon Maloy <jon.maloy@ericsson.com>
 | 
				
			||||||
M:	Allan Stephens <allan.stephens@windriver.com>
 | 
					M:	Allan Stephens <allan.stephens@windriver.com>
 | 
				
			||||||
L:	tipc-discussion@lists.sourceforge.net
 | 
					L:	tipc-discussion@lists.sourceforge.net
 | 
				
			||||||
| 
						 | 
					@ -5989,7 +6103,7 @@ L:	linux-wireless@vger.kernel.org
 | 
				
			||||||
W:	http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/
 | 
					W:	http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/
 | 
				
			||||||
S:	Maintained
 | 
					S:	Maintained
 | 
				
			||||||
F:	Documentation/networking/wavelan.txt
 | 
					F:	Documentation/networking/wavelan.txt
 | 
				
			||||||
F:	drivers/net/wireless/wavelan*
 | 
					F:	drivers/staging/wavelan/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
WD7000 SCSI DRIVER
 | 
					WD7000 SCSI DRIVER
 | 
				
			||||||
M:	Miroslav Zagorac <zaga@fly.cc.fer.hr>
 | 
					M:	Miroslav Zagorac <zaga@fly.cc.fer.hr>
 | 
				
			||||||
| 
						 | 
					@ -6185,6 +6299,7 @@ F:	drivers/serial/zs.*
 | 
				
			||||||
THE REST
 | 
					THE REST
 | 
				
			||||||
M:	Linus Torvalds <torvalds@linux-foundation.org>
 | 
					M:	Linus Torvalds <torvalds@linux-foundation.org>
 | 
				
			||||||
L:	linux-kernel@vger.kernel.org
 | 
					L:	linux-kernel@vger.kernel.org
 | 
				
			||||||
 | 
					Q:	http://patchwork.kernel.org/project/LKML/list/
 | 
				
			||||||
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
 | 
					T:	git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
 | 
				
			||||||
S:	Buried alive in reporters
 | 
					S:	Buried alive in reporters
 | 
				
			||||||
F:	*
 | 
					F:	*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										4
									
								
								Makefile
									
										
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
										
									
									
									
								
							| 
						 | 
					@ -1,7 +1,7 @@
 | 
				
			||||||
VERSION = 2
 | 
					VERSION = 2
 | 
				
			||||||
PATCHLEVEL = 6
 | 
					PATCHLEVEL = 6
 | 
				
			||||||
SUBLEVEL = 33
 | 
					SUBLEVEL = 34
 | 
				
			||||||
EXTRAVERSION =
 | 
					EXTRAVERSION = -rc3
 | 
				
			||||||
NAME = Man-Eating Seals of Antiquity
 | 
					NAME = Man-Eating Seals of Antiquity
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# *DOCUMENTATION*
 | 
					# *DOCUMENTATION*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										13
									
								
								arch/Kconfig
									
										
									
									
									
								
							
							
						
						
									
										13
									
								
								arch/Kconfig
									
										
									
									
									
								
							| 
						 | 
					@ -41,6 +41,17 @@ config KPROBES
 | 
				
			||||||
	  for kernel debugging, non-intrusive instrumentation and testing.
 | 
						  for kernel debugging, non-intrusive instrumentation and testing.
 | 
				
			||||||
	  If in doubt, say "N".
 | 
						  If in doubt, say "N".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config OPTPROBES
 | 
				
			||||||
 | 
						bool "Kprobes jump optimization support (EXPERIMENTAL)"
 | 
				
			||||||
 | 
						default y
 | 
				
			||||||
 | 
						depends on KPROBES
 | 
				
			||||||
 | 
						depends on !PREEMPT
 | 
				
			||||||
 | 
						depends on HAVE_OPTPROBES
 | 
				
			||||||
 | 
						select KALLSYMS_ALL
 | 
				
			||||||
 | 
						help
 | 
				
			||||||
 | 
						  This option will allow kprobes to optimize breakpoint to
 | 
				
			||||||
 | 
						  a jump for reducing its overhead.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config HAVE_EFFICIENT_UNALIGNED_ACCESS
 | 
					config HAVE_EFFICIENT_UNALIGNED_ACCESS
 | 
				
			||||||
	bool
 | 
						bool
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
| 
						 | 
					@ -83,6 +94,8 @@ config HAVE_KPROBES
 | 
				
			||||||
config HAVE_KRETPROBES
 | 
					config HAVE_KRETPROBES
 | 
				
			||||||
	bool
 | 
						bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config HAVE_OPTPROBES
 | 
				
			||||||
 | 
						bool
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# An arch should select this if it provides all these things:
 | 
					# An arch should select this if it provides all these things:
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,6 +10,7 @@ config ALPHA
 | 
				
			||||||
	select HAVE_OPROFILE
 | 
						select HAVE_OPROFILE
 | 
				
			||||||
	select HAVE_SYSCALL_WRAPPERS
 | 
						select HAVE_SYSCALL_WRAPPERS
 | 
				
			||||||
	select HAVE_PERF_EVENTS
 | 
						select HAVE_PERF_EVENTS
 | 
				
			||||||
 | 
						select HAVE_DMA_ATTRS
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
	  The Alpha is a 64-bit general-purpose processor designed and
 | 
						  The Alpha is a 64-bit general-purpose processor designed and
 | 
				
			||||||
	  marketed by the Digital Equipment Corporation of blessed memory,
 | 
						  marketed by the Digital Equipment Corporation of blessed memory,
 | 
				
			||||||
| 
						 | 
					@ -58,6 +59,9 @@ config ZONE_DMA
 | 
				
			||||||
	bool
 | 
						bool
 | 
				
			||||||
	default y
 | 
						default y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config NEED_DMA_MAP_STATE
 | 
				
			||||||
 | 
					       def_bool y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config GENERIC_ISA_DMA
 | 
					config GENERIC_ISA_DMA
 | 
				
			||||||
	bool
 | 
						bool
 | 
				
			||||||
	default y
 | 
						default y
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,7 +12,6 @@
 | 
				
			||||||
#define __ALPHA_MARVEL__H__
 | 
					#define __ALPHA_MARVEL__H__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/pci.h>
 | 
					 | 
				
			||||||
#include <linux/spinlock.h>
 | 
					#include <linux/spinlock.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/compiler.h>
 | 
					#include <asm/compiler.h>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,7 +6,6 @@
 | 
				
			||||||
#define MCPCIA_ONE_HAE_WINDOW 1
 | 
					#define MCPCIA_ONE_HAE_WINDOW 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/pci.h>
 | 
					 | 
				
			||||||
#include <asm/compiler.h>
 | 
					#include <asm/compiler.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,6 @@
 | 
				
			||||||
#define __ALPHA_TITAN__H__
 | 
					#define __ALPHA_TITAN__H__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/pci.h>
 | 
					 | 
				
			||||||
#include <asm/compiler.h>
 | 
					#include <asm/compiler.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,6 @@
 | 
				
			||||||
#define __ALPHA_TSUNAMI__H__
 | 
					#define __ALPHA_TSUNAMI__H__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/pci.h>
 | 
					 | 
				
			||||||
#include <asm/compiler.h>
 | 
					#include <asm/compiler.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,71 +1,49 @@
 | 
				
			||||||
#ifndef _ALPHA_DMA_MAPPING_H
 | 
					#ifndef _ALPHA_DMA_MAPPING_H
 | 
				
			||||||
#define _ALPHA_DMA_MAPPING_H
 | 
					#define _ALPHA_DMA_MAPPING_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/dma-attrs.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_PCI
 | 
					extern struct dma_map_ops *dma_ops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/pci.h>
 | 
					static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dma_ops;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define dma_map_single(dev, va, size, dir)		\
 | 
					#include <asm-generic/dma-mapping-common.h>
 | 
				
			||||||
		pci_map_single(alpha_gendev_to_pci(dev), va, size, dir)
 | 
					 | 
				
			||||||
#define dma_unmap_single(dev, addr, size, dir)		\
 | 
					 | 
				
			||||||
		pci_unmap_single(alpha_gendev_to_pci(dev), addr, size, dir)
 | 
					 | 
				
			||||||
#define dma_alloc_coherent(dev, size, addr, gfp)	\
 | 
					 | 
				
			||||||
	      __pci_alloc_consistent(alpha_gendev_to_pci(dev), size, addr, gfp)
 | 
					 | 
				
			||||||
#define dma_free_coherent(dev, size, va, addr)		\
 | 
					 | 
				
			||||||
		pci_free_consistent(alpha_gendev_to_pci(dev), size, va, addr)
 | 
					 | 
				
			||||||
#define dma_map_page(dev, page, off, size, dir)		\
 | 
					 | 
				
			||||||
		pci_map_page(alpha_gendev_to_pci(dev), page, off, size, dir)
 | 
					 | 
				
			||||||
#define dma_unmap_page(dev, addr, size, dir)		\
 | 
					 | 
				
			||||||
		pci_unmap_page(alpha_gendev_to_pci(dev), addr, size, dir)
 | 
					 | 
				
			||||||
#define dma_map_sg(dev, sg, nents, dir)			\
 | 
					 | 
				
			||||||
		pci_map_sg(alpha_gendev_to_pci(dev), sg, nents, dir)
 | 
					 | 
				
			||||||
#define dma_unmap_sg(dev, sg, nents, dir)		\
 | 
					 | 
				
			||||||
		pci_unmap_sg(alpha_gendev_to_pci(dev), sg, nents, dir)
 | 
					 | 
				
			||||||
#define dma_supported(dev, mask)			\
 | 
					 | 
				
			||||||
		pci_dma_supported(alpha_gendev_to_pci(dev), mask)
 | 
					 | 
				
			||||||
#define dma_mapping_error(dev, addr)				\
 | 
					 | 
				
			||||||
		pci_dma_mapping_error(alpha_gendev_to_pci(dev), addr)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else	/* no PCI - no IOMMU. */
 | 
					static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 | 
				
			||||||
 | 
									       dma_addr_t *dma_handle, gfp_t gfp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return get_dma_ops(dev)->alloc_coherent(dev, size, dma_handle, gfp);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/io.h>	/* for virt_to_phys() */
 | 
					static inline void dma_free_coherent(struct device *dev, size_t size,
 | 
				
			||||||
 | 
									     void *vaddr, dma_addr_t dma_handle)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						get_dma_ops(dev)->free_coherent(dev, size, vaddr, dma_handle);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct scatterlist;
 | 
					static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 | 
				
			||||||
void *dma_alloc_coherent(struct device *dev, size_t size,
 | 
					{
 | 
				
			||||||
			 dma_addr_t *dma_handle, gfp_t gfp);
 | 
						return get_dma_ops(dev)->mapping_error(dev, dma_addr);
 | 
				
			||||||
int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 | 
					}
 | 
				
			||||||
	       enum dma_data_direction direction);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define dma_free_coherent(dev, size, va, addr)		\
 | 
					static inline int dma_supported(struct device *dev, u64 mask)
 | 
				
			||||||
		free_pages((unsigned long)va, get_order(size))
 | 
					{
 | 
				
			||||||
#define dma_supported(dev, mask)		(mask < 0x00ffffffUL ? 0 : 1)
 | 
						return get_dma_ops(dev)->dma_supported(dev, mask);
 | 
				
			||||||
#define dma_map_single(dev, va, size, dir)	virt_to_phys(va)
 | 
					}
 | 
				
			||||||
#define dma_map_page(dev, page, off, size, dir)	(page_to_pa(page) + off)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define dma_unmap_single(dev, addr, size, dir)	((void)0)
 | 
					static inline int dma_set_mask(struct device *dev, u64 mask)
 | 
				
			||||||
#define dma_unmap_page(dev, addr, size, dir)	((void)0)
 | 
					{
 | 
				
			||||||
#define dma_unmap_sg(dev, sg, nents, dir)	((void)0)
 | 
						return get_dma_ops(dev)->set_dma_mask(dev, mask);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
#define dma_mapping_error(dev, addr)  (0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif	/* !CONFIG_PCI */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define dma_alloc_noncoherent(d, s, h, f)	dma_alloc_coherent(d, s, h, f)
 | 
					#define dma_alloc_noncoherent(d, s, h, f)	dma_alloc_coherent(d, s, h, f)
 | 
				
			||||||
#define dma_free_noncoherent(d, s, v, h)	dma_free_coherent(d, s, v, h)
 | 
					#define dma_free_noncoherent(d, s, v, h)	dma_free_coherent(d, s, v, h)
 | 
				
			||||||
#define dma_is_consistent(d, h)			(1)
 | 
					#define dma_is_consistent(d, h)			(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int dma_set_mask(struct device *dev, u64 mask);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define dma_sync_single_for_cpu(dev, addr, size, dir)	  ((void)0)
 | 
					 | 
				
			||||||
#define dma_sync_single_for_device(dev, addr, size, dir)  ((void)0)
 | 
					 | 
				
			||||||
#define dma_sync_single_range(dev, addr, off, size, dir)  ((void)0)
 | 
					 | 
				
			||||||
#define dma_sync_sg_for_cpu(dev, sg, nents, dir)	  ((void)0)
 | 
					 | 
				
			||||||
#define dma_sync_sg_for_device(dev, sg, nents, dir)	  ((void)0)
 | 
					 | 
				
			||||||
#define dma_cache_sync(dev, va, size, dir)		  ((void)0)
 | 
					#define dma_cache_sync(dev, va, size, dir)		  ((void)0)
 | 
				
			||||||
#define dma_sync_single_range_for_cpu(dev, addr, offset, size, dir)	((void)0)
 | 
					 | 
				
			||||||
#define dma_sync_single_range_for_device(dev, addr, offset, size, dir)	((void)0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define dma_get_cache_alignment()			  L1_CACHE_BYTES
 | 
					#define dma_get_cache_alignment()			  L1_CACHE_BYTES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif	/* _ALPHA_DMA_MAPPING_H */
 | 
					#endif	/* _ALPHA_DMA_MAPPING_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -70,142 +70,11 @@ extern inline void pcibios_penalize_isa_irq(int irq, int active)
 | 
				
			||||||
   decisions.  */
 | 
					   decisions.  */
 | 
				
			||||||
#define PCI_DMA_BUS_IS_PHYS  0
 | 
					#define PCI_DMA_BUS_IS_PHYS  0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Allocate and map kernel buffer using consistent mode DMA for PCI
 | 
					 | 
				
			||||||
   device.  Returns non-NULL cpu-view pointer to the buffer if
 | 
					 | 
				
			||||||
   successful and sets *DMA_ADDRP to the pci side dma address as well,
 | 
					 | 
				
			||||||
   else DMA_ADDRP is undefined.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void *__pci_alloc_consistent(struct pci_dev *, size_t,
 | 
					 | 
				
			||||||
				    dma_addr_t *, gfp_t);
 | 
					 | 
				
			||||||
static inline void *
 | 
					 | 
				
			||||||
pci_alloc_consistent(struct pci_dev *dev, size_t size, dma_addr_t *dma)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return __pci_alloc_consistent(dev, size, dma, GFP_ATOMIC);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Free and unmap a consistent DMA buffer.  CPU_ADDR and DMA_ADDR must
 | 
					 | 
				
			||||||
   be values that were returned from pci_alloc_consistent.  SIZE must
 | 
					 | 
				
			||||||
   be the same as what as passed into pci_alloc_consistent.
 | 
					 | 
				
			||||||
   References to the memory and mappings associated with CPU_ADDR or
 | 
					 | 
				
			||||||
   DMA_ADDR past this call are illegal.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void pci_free_consistent(struct pci_dev *, size_t, void *, dma_addr_t);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Map a single buffer of the indicate size for PCI DMA in streaming mode.
 | 
					 | 
				
			||||||
   The 32-bit PCI bus mastering address to use is returned.  Once the device
 | 
					 | 
				
			||||||
   is given the dma address, the device owns this memory until either
 | 
					 | 
				
			||||||
   pci_unmap_single or pci_dma_sync_single_for_cpu is performed.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern dma_addr_t pci_map_single(struct pci_dev *, void *, size_t, int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Likewise, but for a page instead of an address.  */
 | 
					 | 
				
			||||||
extern dma_addr_t pci_map_page(struct pci_dev *, struct page *,
 | 
					 | 
				
			||||||
			       unsigned long, size_t, int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Test for pci_map_single or pci_map_page having generated an error.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline int
 | 
					 | 
				
			||||||
pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return dma_addr == 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Unmap a single streaming mode DMA translation.  The DMA_ADDR and
 | 
					 | 
				
			||||||
   SIZE must match what was provided for in a previous pci_map_single
 | 
					 | 
				
			||||||
   call.  All other usages are undefined.  After this call, reads by
 | 
					 | 
				
			||||||
   the cpu to the buffer are guaranteed to see whatever the device
 | 
					 | 
				
			||||||
   wrote there.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void pci_unmap_single(struct pci_dev *, dma_addr_t, size_t, int);
 | 
					 | 
				
			||||||
extern void pci_unmap_page(struct pci_dev *, dma_addr_t, size_t, int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* pci_unmap_{single,page} is not a nop, thus... */
 | 
					 | 
				
			||||||
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
 | 
					 | 
				
			||||||
	dma_addr_t ADDR_NAME;
 | 
					 | 
				
			||||||
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)		\
 | 
					 | 
				
			||||||
	__u32 LEN_NAME;
 | 
					 | 
				
			||||||
#define pci_unmap_addr(PTR, ADDR_NAME)			\
 | 
					 | 
				
			||||||
	((PTR)->ADDR_NAME)
 | 
					 | 
				
			||||||
#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)		\
 | 
					 | 
				
			||||||
	(((PTR)->ADDR_NAME) = (VAL))
 | 
					 | 
				
			||||||
#define pci_unmap_len(PTR, LEN_NAME)			\
 | 
					 | 
				
			||||||
	((PTR)->LEN_NAME)
 | 
					 | 
				
			||||||
#define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
 | 
					 | 
				
			||||||
	(((PTR)->LEN_NAME) = (VAL))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Map a set of buffers described by scatterlist in streaming mode for
 | 
					 | 
				
			||||||
   PCI DMA.  This is the scatter-gather version of the above
 | 
					 | 
				
			||||||
   pci_map_single interface.  Here the scatter gather list elements
 | 
					 | 
				
			||||||
   are each tagged with the appropriate PCI dma address and length.
 | 
					 | 
				
			||||||
   They are obtained via sg_dma_{address,length}(SG).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   NOTE: An implementation may be able to use a smaller number of DMA
 | 
					 | 
				
			||||||
   address/length pairs than there are SG table elements.  (for
 | 
					 | 
				
			||||||
   example via virtual mapping capabilities) The routine returns the
 | 
					 | 
				
			||||||
   number of addr/length pairs actually used, at most nents.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   Device ownership issues as mentioned above for pci_map_single are
 | 
					 | 
				
			||||||
   the same here.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern int pci_map_sg(struct pci_dev *, struct scatterlist *, int, int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Unmap a set of streaming mode DMA translations.  Again, cpu read
 | 
					 | 
				
			||||||
   rules concerning calls here are the same as for pci_unmap_single()
 | 
					 | 
				
			||||||
   above.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern void pci_unmap_sg(struct pci_dev *, struct scatterlist *, int, int);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Make physical memory consistent for a single streaming mode DMA
 | 
					 | 
				
			||||||
   translation after a transfer and device currently has ownership
 | 
					 | 
				
			||||||
   of the buffer.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   If you perform a pci_map_single() but wish to interrogate the
 | 
					 | 
				
			||||||
   buffer using the cpu, yet do not wish to teardown the PCI dma
 | 
					 | 
				
			||||||
   mapping, you must call this function before doing so.  At the next
 | 
					 | 
				
			||||||
   point you give the PCI dma address back to the card, you must first
 | 
					 | 
				
			||||||
   perform a pci_dma_sync_for_device, and then the device again owns
 | 
					 | 
				
			||||||
   the buffer.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void
 | 
					 | 
				
			||||||
pci_dma_sync_single_for_cpu(struct pci_dev *dev, dma_addr_t dma_addr,
 | 
					 | 
				
			||||||
			    long size, int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* Nothing to do.  */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void
 | 
					 | 
				
			||||||
pci_dma_sync_single_for_device(struct pci_dev *dev, dma_addr_t dma_addr,
 | 
					 | 
				
			||||||
			       size_t size, int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* Nothing to do.  */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Make physical memory consistent for a set of streaming mode DMA
 | 
					 | 
				
			||||||
   translations after a transfer.  The same as pci_dma_sync_single_*
 | 
					 | 
				
			||||||
   but for a scatter-gather list, same rules and usage.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void
 | 
					 | 
				
			||||||
pci_dma_sync_sg_for_cpu(struct pci_dev *dev, struct scatterlist *sg,
 | 
					 | 
				
			||||||
			int nents, int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* Nothing to do.  */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void
 | 
					 | 
				
			||||||
pci_dma_sync_sg_for_device(struct pci_dev *dev, struct scatterlist *sg,
 | 
					 | 
				
			||||||
			   int nents, int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* Nothing to do.  */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Return whether the given PCI device DMA address mask can
 | 
					 | 
				
			||||||
   be supported properly.  For example, if your device can
 | 
					 | 
				
			||||||
   only drive the low 24-bits during PCI bus mastering, then
 | 
					 | 
				
			||||||
   you would pass 0x00ffffff as the mask to this function.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_PCI
 | 
					#ifdef CONFIG_PCI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* implement the pci_ DMA API in terms of the generic device dma_ one */
 | 
				
			||||||
 | 
					#include <asm-generic/pci-dma-compat.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 | 
					static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 | 
				
			||||||
					enum pci_dma_burst_strategy *strat,
 | 
										enum pci_dma_burst_strategy *strat,
 | 
				
			||||||
					unsigned long *strategy_parameter)
 | 
										unsigned long *strategy_parameter)
 | 
				
			||||||
| 
						 | 
					@ -244,8 +113,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 | 
				
			||||||
	return hose->need_domain_info;
 | 
						return hose->need_domain_info;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct pci_dev *alpha_gendev_to_pci(struct device *dev);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#endif /* __KERNEL__ */
 | 
					#endif /* __KERNEL__ */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Values for the `which' argument to sys_pciconfig_iobase.  */
 | 
					/* Values for the `which' argument to sys_pciconfig_iobase.  */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,6 +68,7 @@ struct switch_stack {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __KERNEL__
 | 
					#ifdef __KERNEL__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define arch_has_single_step()		(1)
 | 
				
			||||||
#define user_mode(regs) (((regs)->ps & 8) != 0)
 | 
					#define user_mode(regs) (((regs)->ps & 8) != 0)
 | 
				
			||||||
#define instruction_pointer(regs) ((regs)->pc)
 | 
					#define instruction_pointer(regs) ((regs)->pc)
 | 
				
			||||||
#define profile_pc(regs) instruction_pointer(regs)
 | 
					#define profile_pc(regs) instruction_pointer(regs)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -361,7 +361,7 @@ osf_procfs_mount(char *dirname, struct procfs_args __user *args, int flags)
 | 
				
			||||||
SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 | 
					SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 | 
				
			||||||
		int, flag, void __user *, data)
 | 
							int, flag, void __user *, data)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int retval = -EINVAL;
 | 
						int retval;
 | 
				
			||||||
	char *name;
 | 
						char *name;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	name = getname(path);
 | 
						name = getname(path);
 | 
				
			||||||
| 
						 | 
					@ -379,6 +379,7 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 | 
				
			||||||
		retval = osf_procfs_mount(name, data, flag);
 | 
							retval = osf_procfs_mount(name, data, flag);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
 | 
							retval = -EINVAL;
 | 
				
			||||||
		printk("osf_mount(%ld, %x)\n", typenr, flag);
 | 
							printk("osf_mount(%ld, %x)\n", typenr, flag);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	putname(name);
 | 
						putname(name);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -106,57 +106,7 @@ sys_pciconfig_write(unsigned long bus, unsigned long dfn,
 | 
				
			||||||
		return -ENODEV;
 | 
							return -ENODEV;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Stubs for the routines in pci_iommu.c: */
 | 
					static void *alpha_noop_alloc_coherent(struct device *dev, size_t size,
 | 
				
			||||||
 | 
					 | 
				
			||||||
void *
 | 
					 | 
				
			||||||
__pci_alloc_consistent(struct pci_dev *pdev, size_t size,
 | 
					 | 
				
			||||||
		       dma_addr_t *dma_addrp, gfp_t gfp)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return NULL;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr,
 | 
					 | 
				
			||||||
		    dma_addr_t dma_addr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
dma_addr_t
 | 
					 | 
				
			||||||
pci_map_single(struct pci_dev *pdev, void *cpu_addr, size_t size,
 | 
					 | 
				
			||||||
	       int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return (dma_addr_t) 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
 | 
					 | 
				
			||||||
		 int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int
 | 
					 | 
				
			||||||
pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
 | 
					 | 
				
			||||||
	   int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void
 | 
					 | 
				
			||||||
pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
 | 
					 | 
				
			||||||
	     int direction)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int
 | 
					 | 
				
			||||||
pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Generic DMA mapping functions: */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void *
 | 
					 | 
				
			||||||
dma_alloc_coherent(struct device *dev, size_t size,
 | 
					 | 
				
			||||||
				       dma_addr_t *dma_handle, gfp_t gfp)
 | 
									       dma_addr_t *dma_handle, gfp_t gfp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	void *ret;
 | 
						void *ret;
 | 
				
			||||||
| 
						 | 
					@ -171,11 +121,22 @@ dma_alloc_coherent(struct device *dev, size_t size,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EXPORT_SYMBOL(dma_alloc_coherent);
 | 
					static void alpha_noop_free_coherent(struct device *dev, size_t size,
 | 
				
			||||||
 | 
									     void *cpu_addr, dma_addr_t dma_addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						free_pages((unsigned long)cpu_addr, get_order(size));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					static dma_addr_t alpha_noop_map_page(struct device *dev, struct page *page,
 | 
				
			||||||
dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 | 
									      unsigned long offset, size_t size,
 | 
				
			||||||
	   enum dma_data_direction direction)
 | 
									      enum dma_data_direction dir,
 | 
				
			||||||
 | 
									      struct dma_attrs *attrs)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return page_to_pa(page) + offset;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int alpha_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 | 
				
			||||||
 | 
								     enum dma_data_direction dir, struct dma_attrs *attrs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	struct scatterlist *sg;
 | 
						struct scatterlist *sg;
 | 
				
			||||||
| 
						 | 
					@ -192,19 +153,37 @@ dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 | 
				
			||||||
	return nents;
 | 
						return nents;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EXPORT_SYMBOL(dma_map_sg);
 | 
					static int alpha_noop_mapping_error(struct device *dev, dma_addr_t dma_addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int
 | 
					static int alpha_noop_supported(struct device *dev, u64 mask)
 | 
				
			||||||
dma_set_mask(struct device *dev, u64 mask)
 | 
					{
 | 
				
			||||||
 | 
						return mask < 0x00ffffffUL ? 0 : 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int alpha_noop_set_mask(struct device *dev, u64 mask)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!dev->dma_mask || !dma_supported(dev, mask))
 | 
						if (!dev->dma_mask || !dma_supported(dev, mask))
 | 
				
			||||||
		return -EIO;
 | 
							return -EIO;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	*dev->dma_mask = mask;
 | 
						*dev->dma_mask = mask;
 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(dma_set_mask);
 | 
					
 | 
				
			||||||
 | 
					struct dma_map_ops alpha_noop_ops = {
 | 
				
			||||||
 | 
						.alloc_coherent		= alpha_noop_alloc_coherent,
 | 
				
			||||||
 | 
						.free_coherent		= alpha_noop_free_coherent,
 | 
				
			||||||
 | 
						.map_page		= alpha_noop_map_page,
 | 
				
			||||||
 | 
						.map_sg			= alpha_noop_map_sg,
 | 
				
			||||||
 | 
						.mapping_error		= alpha_noop_mapping_error,
 | 
				
			||||||
 | 
						.dma_supported		= alpha_noop_supported,
 | 
				
			||||||
 | 
						.set_dma_mask		= alpha_noop_set_mask,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct dma_map_ops *dma_ops = &alpha_noop_ops;
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(dma_ops);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 | 
					void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
Some files were not shown because too many files have changed in this diff Show more
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue