63e2807916
- switch sources to official repo https://source.denx.de/u-boot - use tag v2021.07 - extract patches from pine64-org - enable DMA transfers from eMMC and mSD (u-boot from Megi) [ci:skip-build] already built successfully in CI
114 lines
4.2 KiB
Diff
114 lines
4.2 KiB
Diff
From 570ba1826f2962f63168e9b65aa5620956234901 Mon Sep 17 00:00:00 2001
|
|
From: Andre Przywara <andre.przywara@arm.com>
|
|
Date: Wed, 5 May 2021 11:33:40 +0100
|
|
Subject: [PATCH 07/29] mmc: sunxi: Increase MMIO FIFO read performance
|
|
|
|
To avoid the complexity of DMA operations (with chained descriptors), we
|
|
use repeated MMIO reads and writes to the SD_FIFO_REG, which allows us
|
|
to drain or fill the MMC data buffer FIFO very easily.
|
|
|
|
However those MMIO accesses are somewhat costly, so this limits our MMC
|
|
performance, to around 20MB/s on most SoCs, but down to 10MB/s on others
|
|
(H6, partly due to the lower AHB1 frequency).
|
|
|
|
As it turns out we read the FIFO status register after *every* word we
|
|
read or write, which effectively doubles the number of MMIO accesses,
|
|
thus effectively more than halving our performance.
|
|
|
|
To avoid this overhead, we can make use of the FIFO level bits, which are
|
|
in the very same FIFO status registers.
|
|
So for a read request, we now can collect as many words as the FIFO
|
|
level originally indicated, and only then need to update the status
|
|
register.
|
|
|
|
We don't know for sure the size of the FIFO (and it seems to differ
|
|
across SoCs anyway), so writing is more fragile, which is why we still
|
|
use the old method for that. If we find a minimum FIFO size available on
|
|
all SoCs, we could use that, in a later optimisation.
|
|
|
|
This patch increases the eMMC read speed on a Pine64-LTS from about
|
|
21MB/s to 43 MB/s. SD card reads increase slightly from about 20MB/s to
|
|
23MB/s, which is the practical limit for a 3.3V SD card anyway.
|
|
|
|
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
|
|
---
|
|
arch/arm/include/asm/arch-sunxi/mmc.h | 1 +
|
|
drivers/mmc/sunxi_mmc.c | 39 +++++++++++++++++++++------
|
|
2 files changed, 32 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/arch/arm/include/asm/arch-sunxi/mmc.h b/arch/arm/include/asm/arch-sunxi/mmc.h
|
|
index 340e25b04d..5daacf10eb 100644
|
|
--- a/arch/arm/include/asm/arch-sunxi/mmc.h
|
|
+++ b/arch/arm/include/asm/arch-sunxi/mmc.h
|
|
@@ -119,6 +119,7 @@ struct sunxi_mmc {
|
|
#define SUNXI_MMC_STATUS_CARD_PRESENT (0x1 << 8)
|
|
#define SUNXI_MMC_STATUS_CARD_DATA_BUSY (0x1 << 9)
|
|
#define SUNXI_MMC_STATUS_DATA_FSM_BUSY (0x1 << 10)
|
|
+#define SUNXI_MMC_STATUS_FIFO_LEVEL(reg) (((reg) >> 17) & 0x3fff)
|
|
|
|
#define SUNXI_MMC_NTSR_MODE_SEL_NEW (0x1 << 31)
|
|
|
|
diff --git a/drivers/mmc/sunxi_mmc.c b/drivers/mmc/sunxi_mmc.c
|
|
index a30fd8fbdb..ce085c69f5 100644
|
|
--- a/drivers/mmc/sunxi_mmc.c
|
|
+++ b/drivers/mmc/sunxi_mmc.c
|
|
@@ -311,8 +311,9 @@ static int mmc_trans_data_by_cpu(struct sunxi_mmc_priv *priv, struct mmc *mmc,
|
|
SUNXI_MMC_STATUS_FIFO_FULL;
|
|
unsigned i;
|
|
unsigned *buff = (unsigned int *)(reading ? data->dest : data->src);
|
|
- unsigned byte_cnt = data->blocksize * data->blocks;
|
|
- unsigned timeout_msecs = byte_cnt >> 8;
|
|
+ unsigned word_cnt = (data->blocksize * data->blocks) >> 2;
|
|
+ unsigned timeout_msecs = word_cnt >> 6;
|
|
+ uint32_t status;
|
|
unsigned long start;
|
|
|
|
if (timeout_msecs < 2000)
|
|
@@ -323,16 +324,38 @@ static int mmc_trans_data_by_cpu(struct sunxi_mmc_priv *priv, struct mmc *mmc,
|
|
|
|
start = get_timer(0);
|
|
|
|
- for (i = 0; i < (byte_cnt >> 2); i++) {
|
|
- while (readl(&priv->reg->status) & status_bit) {
|
|
+ for (i = 0; i < word_cnt;) {
|
|
+ unsigned int in_fifo;
|
|
+
|
|
+ while ((status = readl(&priv->reg->status)) & status_bit) {
|
|
if (get_timer(start) > timeout_msecs)
|
|
return -1;
|
|
}
|
|
|
|
- if (reading)
|
|
- buff[i] = readl(&priv->reg->fifo);
|
|
- else
|
|
- writel(buff[i], &priv->reg->fifo);
|
|
+ /*
|
|
+ * For writing we do not easily know the FIFO size, so have
|
|
+ * to check the FIFO status after every word written.
|
|
+ * TODO: For optimisation we could work out a minimum FIFO
|
|
+ * size across all SoCs, and use that together with the current
|
|
+ * fill level to write chunks for words.
|
|
+ */
|
|
+ if (!reading) {
|
|
+ writel(buff[i++], &priv->reg->fifo);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * The status register holds the current FIFO level, so we
|
|
+ * can be sure to collect as many words from the FIFO
|
|
+ * register without checking the status register after every
|
|
+ * read. That saves half of the costly MMIO reads, effectively
|
|
+ * doubling the read performance.
|
|
+ */
|
|
+ for (in_fifo = SUNXI_MMC_STATUS_FIFO_LEVEL(status);
|
|
+ in_fifo > 0;
|
|
+ in_fifo--)
|
|
+ buff[i++] = readl_relaxed(&priv->reg->fifo);
|
|
+ dmb();
|
|
}
|
|
|
|
return 0;
|
|
--
|
|
2.31.1
|
|
|