From 570ba1826f2962f63168e9b65aa5620956234901 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 5 May 2021 11:33:40 +0100 Subject: [PATCH 07/29] mmc: sunxi: Increase MMIO FIFO read performance To avoid the complexity of DMA operations (with chained descriptors), we use repeated MMIO reads and writes to the SD_FIFO_REG, which allows us to drain or fill the MMC data buffer FIFO very easily. However those MMIO accesses are somewhat costly, so this limits our MMC performance, to around 20MB/s on most SoCs, but down to 10MB/s on others (H6, partly due to the lower AHB1 frequency). As it turns out we read the FIFO status register after *every* word we read or write, which effectively doubles the number of MMIO accesses, thus effectively more than halving our performance. To avoid this overhead, we can make use of the FIFO level bits, which are in the very same FIFO status registers. So for a read request, we now can collect as many words as the FIFO level originally indicated, and only then need to update the status register. We don't know for sure the size of the FIFO (and it seems to differ across SoCs anyway), so writing is more fragile, which is why we still use the old method for that. If we find a minimum FIFO size available on all SoCs, we could use that, in a later optimisation. This patch increases the eMMC read speed on a Pine64-LTS from about 21MB/s to 43 MB/s. SD card reads increase slightly from about 20MB/s to 23MB/s, which is the practical limit for a 3.3V SD card anyway. Signed-off-by: Andre Przywara --- arch/arm/include/asm/arch-sunxi/mmc.h | 1 + drivers/mmc/sunxi_mmc.c | 39 +++++++++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/arch-sunxi/mmc.h b/arch/arm/include/asm/arch-sunxi/mmc.h index 340e25b04d..5daacf10eb 100644 --- a/arch/arm/include/asm/arch-sunxi/mmc.h +++ b/arch/arm/include/asm/arch-sunxi/mmc.h @@ -119,6 +119,7 @@ struct sunxi_mmc { #define SUNXI_MMC_STATUS_CARD_PRESENT (0x1 << 8) #define SUNXI_MMC_STATUS_CARD_DATA_BUSY (0x1 << 9) #define SUNXI_MMC_STATUS_DATA_FSM_BUSY (0x1 << 10) +#define SUNXI_MMC_STATUS_FIFO_LEVEL(reg) (((reg) >> 17) & 0x3fff) #define SUNXI_MMC_NTSR_MODE_SEL_NEW (0x1 << 31) diff --git a/drivers/mmc/sunxi_mmc.c b/drivers/mmc/sunxi_mmc.c index a30fd8fbdb..ce085c69f5 100644 --- a/drivers/mmc/sunxi_mmc.c +++ b/drivers/mmc/sunxi_mmc.c @@ -311,8 +311,9 @@ static int mmc_trans_data_by_cpu(struct sunxi_mmc_priv *priv, struct mmc *mmc, SUNXI_MMC_STATUS_FIFO_FULL; unsigned i; unsigned *buff = (unsigned int *)(reading ? data->dest : data->src); - unsigned byte_cnt = data->blocksize * data->blocks; - unsigned timeout_msecs = byte_cnt >> 8; + unsigned word_cnt = (data->blocksize * data->blocks) >> 2; + unsigned timeout_msecs = word_cnt >> 6; + uint32_t status; unsigned long start; if (timeout_msecs < 2000) @@ -323,16 +324,38 @@ static int mmc_trans_data_by_cpu(struct sunxi_mmc_priv *priv, struct mmc *mmc, start = get_timer(0); - for (i = 0; i < (byte_cnt >> 2); i++) { - while (readl(&priv->reg->status) & status_bit) { + for (i = 0; i < word_cnt;) { + unsigned int in_fifo; + + while ((status = readl(&priv->reg->status)) & status_bit) { if (get_timer(start) > timeout_msecs) return -1; } - if (reading) - buff[i] = readl(&priv->reg->fifo); - else - writel(buff[i], &priv->reg->fifo); + /* + * For writing we do not easily know the FIFO size, so have + * to check the FIFO status after every word written. + * TODO: For optimisation we could work out a minimum FIFO + * size across all SoCs, and use that together with the current + * fill level to write chunks for words. + */ + if (!reading) { + writel(buff[i++], &priv->reg->fifo); + continue; + } + + /* + * The status register holds the current FIFO level, so we + * can be sure to collect as many words from the FIFO + * register without checking the status register after every + * read. That saves half of the costly MMIO reads, effectively + * doubling the read performance. + */ + for (in_fifo = SUNXI_MMC_STATUS_FIFO_LEVEL(status); + in_fifo > 0; + in_fifo--) + buff[i++] = readl_relaxed(&priv->reg->fifo); + dmb(); } return 0; -- 2.31.1