Lean Sheng Tan has submitted this change. ( https://review.coreboot.org/c/coreboot/+/70175?usp=email )
Change subject: lib/lzmadecode: Allow for 8 byte reads on 64bit ......................................................................
lib/lzmadecode: Allow for 8 byte reads on 64bit
This adds an optimization to lzma decode to also read from the boot medium in chunks of 8 bytes if that is the general purpose register length instead of always 4 bytes. It depends on the cache / memory / spi controller whether this is faster, but it's likely to be either the same or faster.
TESTED - google/vilboz: cached boot medium 64bit before - 32bit - 64bit after load FSP-M: 35,674 - 35,595 - 34,690 load ramstage: 42,134 - 43,378 - 40,882 load FSP-S: 24,954 - 25,496 - 24,368
- foxconn/g41m: uncached boot medium for testing 64bit before - 32bit - 64bit after load ramstage: 51,164 - 51,872 - 51,894
Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa Signed-off-by: Arthur Heymans arthur@aheymans.xyz Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175 Tested-by: build bot (Jenkins) no-reply@coreboot.org Reviewed-by: Lean Sheng Tan sheng.tan@9elements.com Reviewed-by: Julius Werner jwerner@chromium.org --- M src/lib/lzmadecode.c M src/lib/lzmadecode.h 2 files changed, 12 insertions(+), 10 deletions(-)
Approvals: Lean Sheng Tan: Looks good to me, but someone else must approve build bot (Jenkins): Verified Julius Werner: Looks good to me, approved
diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c index cb86829..5c6baa4 100644 --- a/src/lib/lzmadecode.c +++ b/src/lib/lzmadecode.c @@ -35,15 +35,15 @@ #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5
-/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back - * to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim +/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back + * to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim * is *reached* (not surpassed!), meaning we can't allow that to happen while * there are still bytes to decode from the algorithm's point of view. */ #define RC_READ_BYTE \ - (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \ - : ((((uintptr_t) Buffer & 3) \ - || ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \ - : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \ + (look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \ + : ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \ + || ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \ + : ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \ (look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ @@ -207,10 +207,10 @@ int len = 0; const Byte *Buffer; const Byte *BufferLim; - int look_ahead_ptr = 4; + int look_ahead_ptr = sizeof(SizeT); union { - Byte raw[4]; - UInt32 dw; + Byte raw[sizeof(SizeT)]; + SizeT dw; } look_ahead; UInt32 Range; UInt32 Code; diff --git a/src/lib/lzmadecode.h b/src/lib/lzmadecode.h index 9ed352a..5498061 100644 --- a/src/lib/lzmadecode.h +++ b/src/lib/lzmadecode.h @@ -22,10 +22,12 @@ #ifndef __LZMADECODE_H #define __LZMADECODE_H
+#include <types.h> + typedef unsigned char Byte; typedef unsigned short UInt16; typedef unsigned int UInt32; -typedef UInt32 SizeT; +typedef size_t SizeT;
#define CProb UInt16