| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | #include <linux/device.h>
 | 
					
						
							|  |  |  | #include <linux/dma-mapping.h>
 | 
					
						
							|  |  |  | #include <linux/dmaengine.h>
 | 
					
						
							|  |  |  | #include <linux/sizes.h>
 | 
					
						
							|  |  |  | #include <linux/platform_device.h>
 | 
					
						
							|  |  |  | #include <linux/of.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "musb_core.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define RNDIS_REG(x) (0x80 + ((x - 1) * 4))
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define EP_MODE_AUTOREG_NONE		0
 | 
					
						
							|  |  |  | #define EP_MODE_AUTOREG_ALL_NEOP	1
 | 
					
						
							|  |  |  | #define EP_MODE_AUTOREG_ALWAYS		3
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define EP_MODE_DMA_TRANSPARENT		0
 | 
					
						
							|  |  |  | #define EP_MODE_DMA_RNDIS		1
 | 
					
						
							|  |  |  | #define EP_MODE_DMA_GEN_RNDIS		3
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define USB_CTRL_TX_MODE	0x70
 | 
					
						
							|  |  |  | #define USB_CTRL_RX_MODE	0x74
 | 
					
						
							|  |  |  | #define USB_CTRL_AUTOREQ	0xd0
 | 
					
						
							|  |  |  | #define USB_TDOWN		0xd8
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct cppi41_dma_channel { | 
					
						
							|  |  |  | 	struct dma_channel channel; | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller; | 
					
						
							|  |  |  | 	struct musb_hw_ep *hw_ep; | 
					
						
							|  |  |  | 	struct dma_chan *dc; | 
					
						
							|  |  |  | 	dma_cookie_t cookie; | 
					
						
							|  |  |  | 	u8 port_num; | 
					
						
							|  |  |  | 	u8 is_tx; | 
					
						
							|  |  |  | 	u8 is_allocated; | 
					
						
							|  |  |  | 	u8 usb_toggle; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dma_addr_t buf_addr; | 
					
						
							|  |  |  | 	u32 total_len; | 
					
						
							|  |  |  | 	u32 prog_len; | 
					
						
							|  |  |  | 	u32 transferred; | 
					
						
							|  |  |  | 	u32 packet_sz; | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	struct list_head tx_check; | 
					
						
							| 
									
										
										
										
											2014-08-12 14:18:43 -05:00
										 |  |  | 	int tx_zlp; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define MUSB_DMA_NUM_CHANNELS 15
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct cppi41_dma_controller { | 
					
						
							|  |  |  | 	struct dma_controller controller; | 
					
						
							|  |  |  | 	struct cppi41_dma_channel rx_channel[MUSB_DMA_NUM_CHANNELS]; | 
					
						
							|  |  |  | 	struct cppi41_dma_channel tx_channel[MUSB_DMA_NUM_CHANNELS]; | 
					
						
							|  |  |  | 	struct musb *musb; | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	struct hrtimer early_tx; | 
					
						
							|  |  |  | 	struct list_head early_tx_list; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	u32 rx_mode; | 
					
						
							|  |  |  | 	u32 tx_mode; | 
					
						
							|  |  |  | 	u32 auto_req; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	u16 csr; | 
					
						
							|  |  |  | 	u8 toggle; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->is_tx) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	if (!is_host_active(cppi41_channel->controller->musb)) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	csr = musb_readw(cppi41_channel->hw_ep->regs, MUSB_RXCSR); | 
					
						
							|  |  |  | 	toggle = csr & MUSB_RXCSR_H_DATATOGGLE ? 1 : 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	cppi41_channel->usb_toggle = toggle; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void update_rx_toggle(struct cppi41_dma_channel *cppi41_channel) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:39 +02:00
										 |  |  | 	struct musb_hw_ep *hw_ep = cppi41_channel->hw_ep; | 
					
						
							|  |  |  | 	struct musb *musb = hw_ep->musb; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	u16 csr; | 
					
						
							|  |  |  | 	u8 toggle; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->is_tx) | 
					
						
							|  |  |  | 		return; | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:39 +02:00
										 |  |  | 	if (!is_host_active(musb)) | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 		return; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:39 +02:00
										 |  |  | 	musb_ep_select(musb->mregs, hw_ep->epnum); | 
					
						
							|  |  |  | 	csr = musb_readw(hw_ep->regs, MUSB_RXCSR); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	toggle = csr & MUSB_RXCSR_H_DATATOGGLE ? 1 : 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 * AM335x Advisory 1.0.13: Due to internal synchronisation error the | 
					
						
							|  |  |  | 	 * data toggle may reset from DATA1 to DATA0 during receiving data from | 
					
						
							|  |  |  | 	 * more than one endpoint. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	if (!toggle && toggle == cppi41_channel->usb_toggle) { | 
					
						
							|  |  |  | 		csr |= MUSB_RXCSR_H_DATATOGGLE | MUSB_RXCSR_H_WR_DATATOGGLE; | 
					
						
							|  |  |  | 		musb_writew(cppi41_channel->hw_ep->regs, MUSB_RXCSR, csr); | 
					
						
							|  |  |  | 		dev_dbg(cppi41_channel->controller->musb->controller, | 
					
						
							|  |  |  | 				"Restoring DATA1 toggle.\n"); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	cppi41_channel->usb_toggle = toggle; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | static bool musb_is_tx_fifo_empty(struct musb_hw_ep *hw_ep) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	u8		epnum = hw_ep->epnum; | 
					
						
							|  |  |  | 	struct musb	*musb = hw_ep->musb; | 
					
						
							|  |  |  | 	void __iomem	*epio = musb->endpoints[epnum].regs; | 
					
						
							|  |  |  | 	u16		csr; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:39 +02:00
										 |  |  | 	musb_ep_select(musb->mregs, hw_ep->epnum); | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	csr = musb_readw(epio, MUSB_TXCSR); | 
					
						
							|  |  |  | 	if (csr & MUSB_TXCSR_TXPKTRDY) | 
					
						
							|  |  |  | 		return false; | 
					
						
							|  |  |  | 	return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-12 16:37:46 +01:00
										 |  |  | static void cppi41_dma_callback(void *private_data); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | static void cppi41_trans_done(struct cppi41_dma_channel *cppi41_channel) | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | { | 
					
						
							|  |  |  | 	struct musb_hw_ep *hw_ep = cppi41_channel->hw_ep; | 
					
						
							|  |  |  | 	struct musb *musb = hw_ep->musb; | 
					
						
							| 
									
										
										
										
											2014-08-12 14:18:43 -05:00
										 |  |  | 	void __iomem *epio = hw_ep->regs; | 
					
						
							|  |  |  | 	u16 csr; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-27 10:44:41 +05:30
										 |  |  | 	if (!cppi41_channel->prog_len || | 
					
						
							|  |  |  | 	    (cppi41_channel->channel.status == MUSB_DMA_STATUS_FREE)) { | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		/* done, complete */ | 
					
						
							|  |  |  | 		cppi41_channel->channel.actual_len = | 
					
						
							|  |  |  | 			cppi41_channel->transferred; | 
					
						
							|  |  |  | 		cppi41_channel->channel.status = MUSB_DMA_STATUS_FREE; | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:38 +02:00
										 |  |  | 		cppi41_channel->channel.rx_packet_done = true; | 
					
						
							| 
									
										
										
										
											2014-08-12 14:18:43 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		/*
 | 
					
						
							|  |  |  | 		 * transmit ZLP using PIO mode for transfers which size is | 
					
						
							|  |  |  | 		 * multiple of EP packet size. | 
					
						
							|  |  |  | 		 */ | 
					
						
							|  |  |  | 		if (cppi41_channel->tx_zlp && (cppi41_channel->transferred % | 
					
						
							|  |  |  | 					cppi41_channel->packet_sz) == 0) { | 
					
						
							|  |  |  | 			musb_ep_select(musb->mregs, hw_ep->epnum); | 
					
						
							|  |  |  | 			csr = MUSB_TXCSR_MODE | MUSB_TXCSR_TXPKTRDY; | 
					
						
							|  |  |  | 			musb_writew(epio, MUSB_TXCSR, csr); | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 		musb_dma_completion(musb, hw_ep->epnum, cppi41_channel->is_tx); | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		/* next iteration, reload */ | 
					
						
							|  |  |  | 		struct dma_chan *dc = cppi41_channel->dc; | 
					
						
							|  |  |  | 		struct dma_async_tx_descriptor *dma_desc; | 
					
						
							|  |  |  | 		enum dma_transfer_direction direction; | 
					
						
							|  |  |  | 		u32 remain_bytes; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		cppi41_channel->buf_addr += cppi41_channel->packet_sz; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		remain_bytes = cppi41_channel->total_len; | 
					
						
							|  |  |  | 		remain_bytes -= cppi41_channel->transferred; | 
					
						
							|  |  |  | 		remain_bytes = min(remain_bytes, cppi41_channel->packet_sz); | 
					
						
							|  |  |  | 		cppi41_channel->prog_len = remain_bytes; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		direction = cppi41_channel->is_tx ? DMA_MEM_TO_DEV | 
					
						
							|  |  |  | 			: DMA_DEV_TO_MEM; | 
					
						
							|  |  |  | 		dma_desc = dmaengine_prep_slave_single(dc, | 
					
						
							|  |  |  | 				cppi41_channel->buf_addr, | 
					
						
							|  |  |  | 				remain_bytes, | 
					
						
							|  |  |  | 				direction, | 
					
						
							|  |  |  | 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 
					
						
							| 
									
										
										
										
											2013-11-12 16:37:46 +01:00
										 |  |  | 		if (WARN_ON(!dma_desc)) | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 			return; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		dma_desc->callback = cppi41_dma_callback; | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 		dma_desc->callback_param = &cppi41_channel->channel; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 		cppi41_channel->cookie = dma_desc->tx_submit(dma_desc); | 
					
						
							|  |  |  | 		dma_async_issue_pending(dc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (!cppi41_channel->is_tx) { | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:39 +02:00
										 |  |  | 			musb_ep_select(musb->mregs, hw_ep->epnum); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 			csr = musb_readw(epio, MUSB_RXCSR); | 
					
						
							|  |  |  | 			csr |= MUSB_RXCSR_H_REQPKT; | 
					
						
							|  |  |  | 			musb_writew(epio, MUSB_RXCSR, csr); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2013-11-12 16:37:46 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | static enum hrtimer_restart cppi41_recheck_tx_req(struct hrtimer *timer) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller; | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel, *n; | 
					
						
							|  |  |  | 	struct musb *musb; | 
					
						
							|  |  |  | 	unsigned long flags; | 
					
						
							|  |  |  | 	enum hrtimer_restart ret = HRTIMER_NORESTART; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	controller = container_of(timer, struct cppi41_dma_controller, | 
					
						
							|  |  |  | 			early_tx); | 
					
						
							|  |  |  | 	musb = controller->musb; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock_irqsave(&musb->lock, flags); | 
					
						
							|  |  |  | 	list_for_each_entry_safe(cppi41_channel, n, &controller->early_tx_list, | 
					
						
							|  |  |  | 			tx_check) { | 
					
						
							|  |  |  | 		bool empty; | 
					
						
							|  |  |  | 		struct musb_hw_ep *hw_ep = cppi41_channel->hw_ep; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		empty = musb_is_tx_fifo_empty(hw_ep); | 
					
						
							|  |  |  | 		if (empty) { | 
					
						
							|  |  |  | 			list_del_init(&cppi41_channel->tx_check); | 
					
						
							|  |  |  | 			cppi41_trans_done(cppi41_channel); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-02 17:32:16 +02:00
										 |  |  | 	if (!list_empty(&controller->early_tx_list) && | 
					
						
							|  |  |  | 	    !hrtimer_is_queued(&controller->early_tx)) { | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 		ret = HRTIMER_RESTART; | 
					
						
							|  |  |  | 		hrtimer_forward_now(&controller->early_tx, | 
					
						
							| 
									
										
										
										
											2014-09-03 17:21:24 +02:00
										 |  |  | 				ktime_set(0, 20 * NSEC_PER_USEC)); | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	spin_unlock_irqrestore(&musb->lock, flags); | 
					
						
							|  |  |  | 	return ret; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-12 16:37:46 +01:00
										 |  |  | static void cppi41_dma_callback(void *private_data) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct dma_channel *channel = private_data; | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel = channel->private_data; | 
					
						
							|  |  |  | 	struct musb_hw_ep *hw_ep = cppi41_channel->hw_ep; | 
					
						
							|  |  |  | 	struct musb *musb = hw_ep->musb; | 
					
						
							|  |  |  | 	unsigned long flags; | 
					
						
							|  |  |  | 	struct dma_tx_state txstate; | 
					
						
							|  |  |  | 	u32 transferred; | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	bool empty; | 
					
						
							| 
									
										
										
										
											2013-11-12 16:37:46 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	spin_lock_irqsave(&musb->lock, flags); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dmaengine_tx_status(cppi41_channel->dc, cppi41_channel->cookie, | 
					
						
							|  |  |  | 			&txstate); | 
					
						
							|  |  |  | 	transferred = cppi41_channel->prog_len - txstate.residue; | 
					
						
							|  |  |  | 	cppi41_channel->transferred += transferred; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dev_dbg(musb->controller, "DMA transfer done on hw_ep=%d bytes=%d/%d\n", | 
					
						
							|  |  |  | 		hw_ep->epnum, cppi41_channel->transferred, | 
					
						
							|  |  |  | 		cppi41_channel->total_len); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	update_rx_toggle(cppi41_channel); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->transferred == cppi41_channel->total_len || | 
					
						
							|  |  |  | 			transferred < cppi41_channel->packet_sz) | 
					
						
							|  |  |  | 		cppi41_channel->prog_len = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	empty = musb_is_tx_fifo_empty(hw_ep); | 
					
						
							|  |  |  | 	if (empty) { | 
					
						
							|  |  |  | 		cppi41_trans_done(cppi41_channel); | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		struct cppi41_dma_controller *controller; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:28:47 +01:00
										 |  |  | 		int is_hs = 0; | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 		/*
 | 
					
						
							|  |  |  | 		 * On AM335x it has been observed that the TX interrupt fires | 
					
						
							|  |  |  | 		 * too early that means the TXFIFO is not yet empty but the DMA | 
					
						
							|  |  |  | 		 * engine says that it is done with the transfer. We don't | 
					
						
							|  |  |  | 		 * receive a FIFO empty interrupt so the only thing we can do is | 
					
						
							|  |  |  | 		 * to poll for the bit. On HS it usually takes 2us, on FS around | 
					
						
							|  |  |  | 		 * 110us - 150us depending on the transfer size. | 
					
						
							|  |  |  | 		 * We spin on HS (no longer than than 25us and setup a timer on | 
					
						
							|  |  |  | 		 * FS to check for the bit and complete the transfer. | 
					
						
							|  |  |  | 		 */ | 
					
						
							|  |  |  | 		controller = cppi41_channel->controller; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-13 18:28:47 +01:00
										 |  |  | 		if (is_host_active(musb)) { | 
					
						
							|  |  |  | 			if (musb->port1_status & USB_PORT_STAT_HIGH_SPEED) | 
					
						
							|  |  |  | 				is_hs = 1; | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			if (musb->g.speed == USB_SPEED_HIGH) | 
					
						
							|  |  |  | 				is_hs = 1; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if (is_hs) { | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 			unsigned wait = 25; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			do { | 
					
						
							|  |  |  | 				empty = musb_is_tx_fifo_empty(hw_ep); | 
					
						
							|  |  |  | 				if (empty) | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				wait--; | 
					
						
							|  |  |  | 				if (!wait) | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				udelay(1); | 
					
						
							|  |  |  | 			} while (1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			empty = musb_is_tx_fifo_empty(hw_ep); | 
					
						
							|  |  |  | 			if (empty) { | 
					
						
							|  |  |  | 				cppi41_trans_done(cppi41_channel); | 
					
						
							|  |  |  | 				goto out; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		list_add_tail(&cppi41_channel->tx_check, | 
					
						
							|  |  |  | 				&controller->early_tx_list); | 
					
						
							| 
									
										
										
										
											2014-06-20 23:41:24 +02:00
										 |  |  | 		if (!hrtimer_is_queued(&controller->early_tx)) { | 
					
						
							| 
									
										
											  
											
												usb: musb: cppi41: fire hrtimer according to programmed channel length
The musb/cppi41 code installs a hrtimer to work around DMA completion
interrupts that have fired too early on AM335x hardware. This timer
is currently programmed to first fire 140 microseconds after the DMA
completion callback. According to the commit which introduced it
(a655f481d83, "usb: musb: musb_cppi41: handle pre-mature TX complete
interrupt"), that value is is considered a 'rule of thumb' that worked
well with the test case described in the commit log.
Test show, however, that for USB audio devices and much smaller packet
sizes, the timer has to fire earlier in order to correctly handle the audio
stream. The original test case had output transfer sizes of 1514 bytes, and
a delay of 140 microseconds. For audio devices with 24 bytes channel size, 3
microseconds seem to work well.
Hence, let's assume that the time it takes to clear the bit correlates with
the number of bytes transferred. The referenced commit log mentions such a
suspicion as well. Let the timer fire in cppi41_channel->total_len/10
microseconds to correctly handle both cases.
Also, shorten the interval in which the timer fires again in case of
a non-empty early_tx list.
With these changes in place, both FS and HS audio devices appear to work
well on AM335x hardware.
Signed-off-by: Daniel Mack <zonque@gmail.com>
Reported-by: Sebastian Reimers <sebastian.reimers@googlemail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2014-06-20 00:20:44 +02:00
										 |  |  | 			unsigned long usecs = cppi41_channel->total_len / 10; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 			hrtimer_start_range_ns(&controller->early_tx, | 
					
						
							| 
									
										
											  
											
												usb: musb: cppi41: fire hrtimer according to programmed channel length
The musb/cppi41 code installs a hrtimer to work around DMA completion
interrupts that have fired too early on AM335x hardware. This timer
is currently programmed to first fire 140 microseconds after the DMA
completion callback. According to the commit which introduced it
(a655f481d83, "usb: musb: musb_cppi41: handle pre-mature TX complete
interrupt"), that value is is considered a 'rule of thumb' that worked
well with the test case described in the commit log.
Test show, however, that for USB audio devices and much smaller packet
sizes, the timer has to fire earlier in order to correctly handle the audio
stream. The original test case had output transfer sizes of 1514 bytes, and
a delay of 140 microseconds. For audio devices with 24 bytes channel size, 3
microseconds seem to work well.
Hence, let's assume that the time it takes to clear the bit correlates with
the number of bytes transferred. The referenced commit log mentions such a
suspicion as well. Let the timer fire in cppi41_channel->total_len/10
microseconds to correctly handle both cases.
Also, shorten the interval in which the timer fires again in case of
a non-empty early_tx list.
With these changes in place, both FS and HS audio devices appear to work
well on AM335x hardware.
Signed-off-by: Daniel Mack <zonque@gmail.com>
Reported-by: Sebastian Reimers <sebastian.reimers@googlemail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2014-06-20 00:20:44 +02:00
										 |  |  | 				ktime_set(0, usecs * NSEC_PER_USEC), | 
					
						
							| 
									
										
										
										
											2014-09-03 17:21:24 +02:00
										 |  |  | 				20 * NSEC_PER_USEC, | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 				HRTIMER_MODE_REL); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | out: | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	spin_unlock_irqrestore(&musb->lock, flags); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static u32 update_ep_mode(unsigned ep, unsigned mode, u32 old) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	unsigned shift; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	shift = (ep - 1) * 2; | 
					
						
							|  |  |  | 	old &= ~(3 << shift); | 
					
						
							|  |  |  | 	old |= mode << shift; | 
					
						
							|  |  |  | 	return old; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void cppi41_set_dma_mode(struct cppi41_dma_channel *cppi41_channel, | 
					
						
							|  |  |  | 		unsigned mode) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller = cppi41_channel->controller; | 
					
						
							|  |  |  | 	u32 port; | 
					
						
							|  |  |  | 	u32 new_mode; | 
					
						
							|  |  |  | 	u32 old_mode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->is_tx) | 
					
						
							|  |  |  | 		old_mode = controller->tx_mode; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		old_mode = controller->rx_mode; | 
					
						
							|  |  |  | 	port = cppi41_channel->port_num; | 
					
						
							|  |  |  | 	new_mode = update_ep_mode(port, mode, old_mode); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (new_mode == old_mode) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	if (cppi41_channel->is_tx) { | 
					
						
							|  |  |  | 		controller->tx_mode = new_mode; | 
					
						
							|  |  |  | 		musb_writel(controller->musb->ctrl_base, USB_CTRL_TX_MODE, | 
					
						
							|  |  |  | 				new_mode); | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		controller->rx_mode = new_mode; | 
					
						
							|  |  |  | 		musb_writel(controller->musb->ctrl_base, USB_CTRL_RX_MODE, | 
					
						
							|  |  |  | 				new_mode); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void cppi41_set_autoreq_mode(struct cppi41_dma_channel *cppi41_channel, | 
					
						
							|  |  |  | 		unsigned mode) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller = cppi41_channel->controller; | 
					
						
							|  |  |  | 	u32 port; | 
					
						
							|  |  |  | 	u32 new_mode; | 
					
						
							|  |  |  | 	u32 old_mode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	old_mode = controller->auto_req; | 
					
						
							|  |  |  | 	port = cppi41_channel->port_num; | 
					
						
							|  |  |  | 	new_mode = update_ep_mode(port, mode, old_mode); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (new_mode == old_mode) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 	controller->auto_req = new_mode; | 
					
						
							|  |  |  | 	musb_writel(controller->musb->ctrl_base, USB_CTRL_AUTOREQ, new_mode); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static bool cppi41_configure_channel(struct dma_channel *channel, | 
					
						
							|  |  |  | 				u16 packet_sz, u8 mode, | 
					
						
							|  |  |  | 				dma_addr_t dma_addr, u32 len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel = channel->private_data; | 
					
						
							|  |  |  | 	struct dma_chan *dc = cppi41_channel->dc; | 
					
						
							|  |  |  | 	struct dma_async_tx_descriptor *dma_desc; | 
					
						
							|  |  |  | 	enum dma_transfer_direction direction; | 
					
						
							|  |  |  | 	struct musb *musb = cppi41_channel->controller->musb; | 
					
						
							|  |  |  | 	unsigned use_gen_rndis = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dev_dbg(musb->controller, | 
					
						
							|  |  |  | 		"configure ep%d/%x packet_sz=%d, mode=%d, dma_addr=0x%llx, len=%d is_tx=%d\n", | 
					
						
							|  |  |  | 		cppi41_channel->port_num, RNDIS_REG(cppi41_channel->port_num), | 
					
						
							|  |  |  | 		packet_sz, mode, (unsigned long long) dma_addr, | 
					
						
							|  |  |  | 		len, cppi41_channel->is_tx); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	cppi41_channel->buf_addr = dma_addr; | 
					
						
							|  |  |  | 	cppi41_channel->total_len = len; | 
					
						
							|  |  |  | 	cppi41_channel->transferred = 0; | 
					
						
							|  |  |  | 	cppi41_channel->packet_sz = packet_sz; | 
					
						
							| 
									
										
										
										
											2014-08-12 14:18:43 -05:00
										 |  |  | 	cppi41_channel->tx_zlp = (cppi41_channel->is_tx && mode) ? 1 : 0; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 * Due to AM335x' Advisory 1.0.13 we are not allowed to transfer more | 
					
						
							|  |  |  | 	 * than max packet size at a time. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	if (cppi41_channel->is_tx) | 
					
						
							|  |  |  | 		use_gen_rndis = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (use_gen_rndis) { | 
					
						
							|  |  |  | 		/* RNDIS mode */ | 
					
						
							|  |  |  | 		if (len > packet_sz) { | 
					
						
							|  |  |  | 			musb_writel(musb->ctrl_base, | 
					
						
							|  |  |  | 				RNDIS_REG(cppi41_channel->port_num), len); | 
					
						
							|  |  |  | 			/* gen rndis */ | 
					
						
							|  |  |  | 			cppi41_set_dma_mode(cppi41_channel, | 
					
						
							|  |  |  | 					EP_MODE_DMA_GEN_RNDIS); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			/* auto req */ | 
					
						
							|  |  |  | 			cppi41_set_autoreq_mode(cppi41_channel, | 
					
						
							|  |  |  | 					EP_MODE_AUTOREG_ALL_NEOP); | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			musb_writel(musb->ctrl_base, | 
					
						
							|  |  |  | 					RNDIS_REG(cppi41_channel->port_num), 0); | 
					
						
							|  |  |  | 			cppi41_set_dma_mode(cppi41_channel, | 
					
						
							|  |  |  | 					EP_MODE_DMA_TRANSPARENT); | 
					
						
							|  |  |  | 			cppi41_set_autoreq_mode(cppi41_channel, | 
					
						
							|  |  |  | 					EP_MODE_AUTOREG_NONE); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		/* fallback mode */ | 
					
						
							|  |  |  | 		cppi41_set_dma_mode(cppi41_channel, EP_MODE_DMA_TRANSPARENT); | 
					
						
							|  |  |  | 		cppi41_set_autoreq_mode(cppi41_channel, EP_MODE_AUTOREG_NONE); | 
					
						
							|  |  |  | 		len = min_t(u32, packet_sz, len); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	cppi41_channel->prog_len = len; | 
					
						
							|  |  |  | 	direction = cppi41_channel->is_tx ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; | 
					
						
							|  |  |  | 	dma_desc = dmaengine_prep_slave_single(dc, dma_addr, len, direction, | 
					
						
							|  |  |  | 			DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 
					
						
							|  |  |  | 	if (!dma_desc) | 
					
						
							|  |  |  | 		return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	dma_desc->callback = cppi41_dma_callback; | 
					
						
							|  |  |  | 	dma_desc->callback_param = channel; | 
					
						
							|  |  |  | 	cppi41_channel->cookie = dma_desc->tx_submit(dma_desc); | 
					
						
							| 
									
										
										
										
											2014-05-26 14:52:38 +02:00
										 |  |  | 	cppi41_channel->channel.rx_packet_done = false; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	save_rx_toggle(cppi41_channel); | 
					
						
							|  |  |  | 	dma_async_issue_pending(dc); | 
					
						
							|  |  |  | 	return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static struct dma_channel *cppi41_dma_channel_allocate(struct dma_controller *c, | 
					
						
							|  |  |  | 				struct musb_hw_ep *hw_ep, u8 is_tx) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller = container_of(c, | 
					
						
							|  |  |  | 			struct cppi41_dma_controller, controller); | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel = NULL; | 
					
						
							|  |  |  | 	u8 ch_num = hw_ep->epnum - 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (ch_num >= MUSB_DMA_NUM_CHANNELS) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (is_tx) | 
					
						
							|  |  |  | 		cppi41_channel = &controller->tx_channel[ch_num]; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		cppi41_channel = &controller->rx_channel[ch_num]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (!cppi41_channel->dc) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->is_allocated) | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	cppi41_channel->hw_ep = hw_ep; | 
					
						
							|  |  |  | 	cppi41_channel->is_allocated = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return &cppi41_channel->channel; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void cppi41_dma_channel_release(struct dma_channel *channel) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel = channel->private_data; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->is_allocated) { | 
					
						
							|  |  |  | 		cppi41_channel->is_allocated = 0; | 
					
						
							|  |  |  | 		channel->status = MUSB_DMA_STATUS_FREE; | 
					
						
							|  |  |  | 		channel->actual_len = 0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cppi41_dma_channel_program(struct dma_channel *channel, | 
					
						
							|  |  |  | 				u16 packet_sz, u8 mode, | 
					
						
							|  |  |  | 				dma_addr_t dma_addr, u32 len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int ret; | 
					
						
							| 
									
										
										
										
											2014-01-27 15:07:25 +05:30
										 |  |  | 	struct cppi41_dma_channel *cppi41_channel = channel->private_data; | 
					
						
							|  |  |  | 	int hb_mult = 0; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	BUG_ON(channel->status == MUSB_DMA_STATUS_UNKNOWN || | 
					
						
							|  |  |  | 		channel->status == MUSB_DMA_STATUS_BUSY); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-27 15:07:25 +05:30
										 |  |  | 	if (is_host_active(cppi41_channel->controller->musb)) { | 
					
						
							|  |  |  | 		if (cppi41_channel->is_tx) | 
					
						
							|  |  |  | 			hb_mult = cppi41_channel->hw_ep->out_qh->hb_mult; | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			hb_mult = cppi41_channel->hw_ep->in_qh->hb_mult; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	channel->status = MUSB_DMA_STATUS_BUSY; | 
					
						
							|  |  |  | 	channel->actual_len = 0; | 
					
						
							| 
									
										
										
										
											2014-01-27 15:07:25 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  | 	if (hb_mult) | 
					
						
							|  |  |  | 		packet_sz = hb_mult * (packet_sz & 0x7FF); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	ret = cppi41_configure_channel(channel, packet_sz, mode, dma_addr, len); | 
					
						
							|  |  |  | 	if (!ret) | 
					
						
							|  |  |  | 		channel->status = MUSB_DMA_STATUS_FREE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return ret; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cppi41_is_compatible(struct dma_channel *channel, u16 maxpacket, | 
					
						
							|  |  |  | 		void *buf, u32 length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel = channel->private_data; | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller = cppi41_channel->controller; | 
					
						
							|  |  |  | 	struct musb *musb = controller->musb; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (is_host_active(musb)) { | 
					
						
							|  |  |  | 		WARN_ON(1); | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	if (cppi41_channel->hw_ep->ep_in.type != USB_ENDPOINT_XFER_BULK) | 
					
						
							|  |  |  | 		return 0; | 
					
						
							| 
									
										
										
										
											2013-08-13 19:38:24 +02:00
										 |  |  | 	if (cppi41_channel->is_tx) | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 	/* AM335x Advisory 1.0.13. No workaround for device RX mode */ | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cppi41_dma_channel_abort(struct dma_channel *channel) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel = channel->private_data; | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller = cppi41_channel->controller; | 
					
						
							|  |  |  | 	struct musb *musb = controller->musb; | 
					
						
							|  |  |  | 	void __iomem *epio = cppi41_channel->hw_ep->regs; | 
					
						
							|  |  |  | 	int tdbit; | 
					
						
							|  |  |  | 	int ret; | 
					
						
							|  |  |  | 	unsigned is_tx; | 
					
						
							|  |  |  | 	u16 csr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	is_tx = cppi41_channel->is_tx; | 
					
						
							|  |  |  | 	dev_dbg(musb->controller, "abort channel=%d, is_tx=%d\n", | 
					
						
							|  |  |  | 			cppi41_channel->port_num, is_tx); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cppi41_channel->channel.status == MUSB_DMA_STATUS_FREE) | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	list_del_init(&cppi41_channel->tx_check); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	if (is_tx) { | 
					
						
							|  |  |  | 		csr = musb_readw(epio, MUSB_TXCSR); | 
					
						
							|  |  |  | 		csr &= ~MUSB_TXCSR_DMAENAB; | 
					
						
							|  |  |  | 		musb_writew(epio, MUSB_TXCSR, csr); | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		csr = musb_readw(epio, MUSB_RXCSR); | 
					
						
							|  |  |  | 		csr &= ~(MUSB_RXCSR_H_REQPKT | MUSB_RXCSR_DMAENAB); | 
					
						
							|  |  |  | 		musb_writew(epio, MUSB_RXCSR, csr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		csr = musb_readw(epio, MUSB_RXCSR); | 
					
						
							|  |  |  | 		if (csr & MUSB_RXCSR_RXPKTRDY) { | 
					
						
							|  |  |  | 			csr |= MUSB_RXCSR_FLUSHFIFO; | 
					
						
							|  |  |  | 			musb_writew(epio, MUSB_RXCSR, csr); | 
					
						
							|  |  |  | 			musb_writew(epio, MUSB_RXCSR, csr); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	tdbit = 1 << cppi41_channel->port_num; | 
					
						
							|  |  |  | 	if (is_tx) | 
					
						
							|  |  |  | 		tdbit <<= 16; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	do { | 
					
						
							|  |  |  | 		musb_writel(musb->ctrl_base, USB_TDOWN, tdbit); | 
					
						
							|  |  |  | 		ret = dmaengine_terminate_all(cppi41_channel->dc); | 
					
						
							|  |  |  | 	} while (ret == -EAGAIN); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	musb_writel(musb->ctrl_base, USB_TDOWN, tdbit); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (is_tx) { | 
					
						
							|  |  |  | 		csr = musb_readw(epio, MUSB_TXCSR); | 
					
						
							|  |  |  | 		if (csr & MUSB_TXCSR_TXPKTRDY) { | 
					
						
							|  |  |  | 			csr |= MUSB_TXCSR_FLUSHFIFO; | 
					
						
							|  |  |  | 			musb_writew(epio, MUSB_TXCSR, csr); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	cppi41_channel->channel.status = MUSB_DMA_STATUS_FREE; | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void cppi41_release_all_dma_chans(struct cppi41_dma_controller *ctrl) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct dma_chan *dc; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < MUSB_DMA_NUM_CHANNELS; i++) { | 
					
						
							|  |  |  | 		dc = ctrl->tx_channel[i].dc; | 
					
						
							|  |  |  | 		if (dc) | 
					
						
							|  |  |  | 			dma_release_channel(dc); | 
					
						
							|  |  |  | 		dc = ctrl->rx_channel[i].dc; | 
					
						
							|  |  |  | 		if (dc) | 
					
						
							|  |  |  | 			dma_release_channel(dc); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void cppi41_dma_controller_stop(struct cppi41_dma_controller *controller) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	cppi41_release_all_dma_chans(controller); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct musb *musb = controller->musb; | 
					
						
							|  |  |  | 	struct device *dev = musb->controller; | 
					
						
							|  |  |  | 	struct device_node *np = dev->of_node; | 
					
						
							|  |  |  | 	struct cppi41_dma_channel *cppi41_channel; | 
					
						
							|  |  |  | 	int count; | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 	int ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	count = of_property_count_strings(np, "dma-names"); | 
					
						
							|  |  |  | 	if (count < 0) | 
					
						
							|  |  |  | 		return count; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < count; i++) { | 
					
						
							|  |  |  | 		struct dma_chan *dc; | 
					
						
							|  |  |  | 		struct dma_channel *musb_dma; | 
					
						
							|  |  |  | 		const char *str; | 
					
						
							|  |  |  | 		unsigned is_tx; | 
					
						
							|  |  |  | 		unsigned int port; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		ret = of_property_read_string_index(np, "dma-names", i, &str); | 
					
						
							|  |  |  | 		if (ret) | 
					
						
							|  |  |  | 			goto err; | 
					
						
							| 
									
										
										
										
											2014-11-27 22:25:45 +01:00
										 |  |  | 		if (strstarts(str, "tx")) | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 			is_tx = 1; | 
					
						
							| 
									
										
										
										
											2014-11-27 22:25:45 +01:00
										 |  |  | 		else if (strstarts(str, "rx")) | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 			is_tx = 0; | 
					
						
							|  |  |  | 		else { | 
					
						
							|  |  |  | 			dev_err(dev, "Wrong dmatype %s\n", str); | 
					
						
							|  |  |  | 			goto err; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		ret = kstrtouint(str + 2, 0, &port); | 
					
						
							|  |  |  | 		if (ret) | 
					
						
							|  |  |  | 			goto err; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-16 12:50:08 +02:00
										 |  |  | 		ret = -EINVAL; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 		if (port > MUSB_DMA_NUM_CHANNELS || !port) | 
					
						
							|  |  |  | 			goto err; | 
					
						
							|  |  |  | 		if (is_tx) | 
					
						
							|  |  |  | 			cppi41_channel = &controller->tx_channel[port - 1]; | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 			cppi41_channel = &controller->rx_channel[port - 1]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		cppi41_channel->controller = controller; | 
					
						
							|  |  |  | 		cppi41_channel->port_num = port; | 
					
						
							|  |  |  | 		cppi41_channel->is_tx = is_tx; | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 		INIT_LIST_HEAD(&cppi41_channel->tx_check); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		musb_dma = &cppi41_channel->channel; | 
					
						
							|  |  |  | 		musb_dma->private_data = cppi41_channel; | 
					
						
							|  |  |  | 		musb_dma->status = MUSB_DMA_STATUS_FREE; | 
					
						
							|  |  |  | 		musb_dma->max_len = SZ_4M; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		dc = dma_request_slave_channel(dev, str); | 
					
						
							|  |  |  | 		if (!dc) { | 
					
						
							| 
									
										
										
										
											2014-01-02 19:27:47 +05:30
										 |  |  | 			dev_err(dev, "Failed to request %s.\n", str); | 
					
						
							| 
									
										
										
										
											2013-10-16 12:50:08 +02:00
										 |  |  | 			ret = -EPROBE_DEFER; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 			goto err; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		cppi41_channel->dc = dc; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | err: | 
					
						
							|  |  |  | 	cppi41_release_all_dma_chans(controller); | 
					
						
							| 
									
										
										
										
											2013-10-16 12:50:08 +02:00
										 |  |  | 	return ret; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void dma_controller_destroy(struct dma_controller *c) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller = container_of(c, | 
					
						
							|  |  |  | 			struct cppi41_dma_controller, controller); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	hrtimer_cancel(&controller->early_tx); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	cppi41_dma_controller_stop(controller); | 
					
						
							|  |  |  | 	kfree(controller); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct dma_controller *dma_controller_create(struct musb *musb, | 
					
						
							|  |  |  | 					void __iomem *base) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	struct cppi41_dma_controller *controller; | 
					
						
							| 
									
										
										
										
											2013-10-16 12:50:08 +02:00
										 |  |  | 	int ret = 0; | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if (!musb->controller->of_node) { | 
					
						
							|  |  |  | 		dev_err(musb->controller, "Need DT for the DMA engine.\n"); | 
					
						
							|  |  |  | 		return NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	controller = kzalloc(sizeof(*controller), GFP_KERNEL); | 
					
						
							|  |  |  | 	if (!controller) | 
					
						
							|  |  |  | 		goto kzalloc_fail; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early.
Adding a loop and counting how long it takes until the
MUSB_TXCSR_TXPKTRDY bit is cleared I see
FS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1
|cppi41_dma_callback() 74 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1
|cppi41_dma_callback() 66 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1
|cppi41_dma_callback() 136 loops
avg: 110 - 150us
HS:
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1
|cppi41_dma_callback() 0 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1
|cppi41_dma_callback() 2 loops
|musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1
|cppi41_dma_callback() 13 loops
avg: 2us
for the same test case. One loop means a udelay(1). The delay seems to
depend on the packet size. On HS the bit is always cleared for small
packet sizes while on FS it is never the case, it mostly around 110us.
This testing has been performed with g_ether (musb as device) and using BULK
transfers.
INTR transfers are way more fun: during init the gadget sends a INT
packet to the host and cppi41 says "transfer done" shortly after. The
MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host
did not try to receive it, it does so after the interface (on host side) has
been configured. Until this happens, that packet remains in musb's FIFO.
To fix this, two things are done:
- No DMA transfers for INT based endpoints. These transfer are usually
  very small and rare so it is likely better to skip the DMA engine and
  stuff the four bytes directly into the FIFO
- on HS we poll up to 25us and hope that bit goes away. If not we setup
  a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS
  the command
  | ping 10.10.10.10 -c1 -s65130
  creates about 44 1514bytes transfers. About 19 of them need a second
  timer to complete.
Reported-by: Bin Liu <b-liu@ti.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
											
										 
											2013-11-12 16:37:47 +01:00
										 |  |  | 	hrtimer_init(&controller->early_tx, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 
					
						
							|  |  |  | 	controller->early_tx.function = cppi41_recheck_tx_req; | 
					
						
							|  |  |  | 	INIT_LIST_HEAD(&controller->early_tx_list); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	controller->musb = musb; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	controller->controller.channel_alloc = cppi41_dma_channel_allocate; | 
					
						
							|  |  |  | 	controller->controller.channel_release = cppi41_dma_channel_release; | 
					
						
							|  |  |  | 	controller->controller.channel_program = cppi41_dma_channel_program; | 
					
						
							|  |  |  | 	controller->controller.channel_abort = cppi41_dma_channel_abort; | 
					
						
							|  |  |  | 	controller->controller.is_compatible = cppi41_is_compatible; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ret = cppi41_dma_controller_start(controller); | 
					
						
							|  |  |  | 	if (ret) | 
					
						
							|  |  |  | 		goto plat_get_fail; | 
					
						
							|  |  |  | 	return &controller->controller; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | plat_get_fail: | 
					
						
							|  |  |  | 	kfree(controller); | 
					
						
							|  |  |  | kzalloc_fail: | 
					
						
							| 
									
										
										
										
											2013-10-16 12:50:08 +02:00
										 |  |  | 	if (ret == -EPROBE_DEFER) | 
					
						
							|  |  |  | 		return ERR_PTR(ret); | 
					
						
							| 
									
										
										
										
											2013-06-20 12:13:04 +02:00
										 |  |  | 	return NULL; | 
					
						
							|  |  |  | } |