[coreboot-gerrit] Patch set updated for coreboot: 71d4d0c load_payload: Use 32-bit accesses to speed up decompression.

Vladimir Serbinenko (phcoder@gmail.com) gerrit at coreboot.org
Wed Feb 5 17:06:35 CET 2014


Vladimir Serbinenko (phcoder at gmail.com) just uploaded a new patch set to gerrit, which you can find at http://review.coreboot.org/5144

-gerrit

commit 71d4d0c78051334134af4af7d937df007017160d
Author: Vladimir Serbinenko <phcoder at gmail.com>
Date:   Wed Feb 5 17:00:40 2014 +0100

    load_payload: Use 32-bit accesses to speed up decompression.
    
    Flash prefers 32-bit sequential access. On some platforms ROM is
    not cached due to i.a. MTRR shortage. Moreover ROM caching is not currently
    enabled by default. With this patch payload decompress is sped up by factor of
    4.
    
    Test on X201, with caching disabled:
    
    Before:
      90:load payload                  4,470,841 (24,505)
      99:selfboot jump                 6,073,812 (1,602,971)
    
    After:
      90:load payload                  4,530,979 (17,728)
      99:selfboot jump                 5,103,408 (572,429)
    
    Change-Id: Id17e61316dbbf73f4a837bf173f88bf26c01c62b
    Signed-off-by: Vladimir Serbinenko <phcoder at gmail.com>
---
 src/lib/cbfs_core.c  |  6 ++++++
 src/lib/lzmadecode.c | 11 ++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/lib/cbfs_core.c b/src/lib/cbfs_core.c
index 839b994..50c037e 100644
--- a/src/lib/cbfs_core.c
+++ b/src/lib/cbfs_core.c
@@ -202,6 +202,12 @@ int cbfs_decompress(int algo, void *src, void *dst, int len)
 {
 	switch (algo) {
 		case CBFS_COMPRESS_NONE:
+			/* Reads need to be aligned at 4 bytes to avoid
+			   poor flash performance.  */
+			while (len && ((u32)src & 3)) {
+				*(u8*)dst++ = *(u8*)src++;
+				len--;
+			}
 			memmove(dst, src, len);
 			return len;
 #ifdef CBFS_CORE_WITH_LZMA
diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c
index 1cf647d..b27e811 100644
--- a/src/lib/lzmadecode.c
+++ b/src/lib/lzmadecode.c
@@ -28,7 +28,10 @@
 #define kBitModelTotal (1 << kNumBitModelTotalBits)
 #define kNumMoveBits 5
 
-#define RC_READ_BYTE (*Buffer++)
+/* Use 32-bit reads whenever possible to avoid bad flash performance.  */
+#define RC_READ_BYTE (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
+		      : ((((UInt32) Buffer & 3) || ((unsigned long long) (BufferLim - Buffer) < 4)) ? (*Buffer++) \
+	   : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), (look_ahead_ptr = 1), look_ahead.raw[0])))
 
 #define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
   { int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }}
@@ -149,6 +152,12 @@ int LzmaDecode(CLzmaDecoderState *vs,
   int len = 0;
   const Byte *Buffer;
   const Byte *BufferLim;
+  int look_ahead_ptr = 4;
+  union
+  {
+	  Byte raw[4];
+	  UInt32 dw;
+  } look_ahead;
   UInt32 Range;
   UInt32 Code;
 



More information about the coreboot-gerrit mailing list