Attention is currently required from: Angel Pons, Ashish Kumar Mishra, Saurabh Mishra, Subrata Banik.
Arthur Heymans has posted comments on this change. ( https://review.coreboot.org/c/coreboot/+/82079?usp=email )
Change subject: block/fast_spi: Use read32p/write32p for spi rw ......................................................................
Patch Set 2:
(1 comment)
File src/soc/intel/common/block/fast_spi/fast_spi_flash.c:
https://review.coreboot.org/c/coreboot/+/82079/comment/8d7e4d3c_9a229fb4 : PS2, Line 80: const uint8_t *byte_ptr = (const uint8_t *)data; : size_t bytes_to_copy; : union { : uint32_t full; : uint8_t bytes[4]; : } dword; : : for (size_t i = 0; i < len; i += 4) { : dword.full = 0; : : bytes_to_copy = (len - i < 4) ? len - i : 4; : for (size_t j = 0; j < bytes_to_copy; j++) : dword.bytes[j] = byte_ptr[i + j]; : : write32p(ctx->mmio_base + SPIBAR_FDATA(i >> 2), dword.full); : }
This is what I suggested in CB:81959 but back then I wasn't sure if the hardware can also accept smaller width transactions:
static void fill_xfer_fifo(struct fast_spi_flash_ctx *ctx, const uint8_t *data, size_t len) { /* FDATAn requires 32-bit accesses */ const uint32_t *dwords = (const uint32_t *)data; for (size_t i = 0; i < len / 4; i++) write32p(ctx->mmio_base + SPIBAR_FDATA(i), dwords[i]); if (len % 4) { uint32_t last_dword = 0; for (size_t i = 0; i < len % 4; i++) last_dword |= data[len / 4 + i] << (i * 8); write32p(ctx->mmio_base + SPIBAR_FDATA(len / 4), last_dword); } }
But if the hardware can tolerate byte-width transactions:
static void fill_xfer_fifo(struct fast_spi_flash_ctx *ctx, const uint8_t *data, size_t len) { const uint32_t *dwords = (const uint32_t *)data; for (size_t i = 0; i < len / 4; i++) write32p(ctx->mmio_base + SPIBAR_FDATA(i), dwords[i]); for (size_t i = 0; i < len % 4; i++) write8p(ctx->mmio_base + SPIBAR_FDATA(len / 4) + i, data[len / 4 + i]) }
Maximum code simplicity would be to only use byte reads/writes, but I'm not sure if there's a performance impact:
static void fill_xfer_fifo(struct fast_spi_flash_ctx *ctx, const uint8_t *data, size_t len) { for (size_t i = 0; i < len; i++) write8p(ctx->mmio_base + SPIBAR_FDATA(0) + i, data[i]) }
There usually is a performance impact as you have 4x the amount of transactions. Both flashrom and sb/intel/common do the following, which is similar to this code (maybe copy it to keep it consistent?):
if (len <= 0) return;
for (i = 0; i < len; i++) { if ((i % 4) == 0) temp32 = 0;
temp32 |= ((uint32_t)data[i]) << ((i % 4) * 8);
if ((i % 4) == 3) /* 32 bits are full, write them to regs. */ writel_(temp32, cntlr.data + (i - (i % 4))); } i--; if ((i % 4) != 3) /* Write remaining data to regs. */ writel_(temp32, cntlr.data + (i - (i % 4)));