Arthur Heymans has uploaded this change for review. ( https://review.coreboot.org/c/coreboot/+/70175 )
Change subject: lib/lzmadecode: Allow for 8 byte reads on 64bit ......................................................................
lib/lzmadecode: Allow for 8 byte reads on 64bit
This adds an optimization to lzma decode to also read from the boot medium in chunks of 8 bytes if that is the general purpose register length instead of always 4 bytes. It depends on the cache / memory / spi controller whether this is faster, but it's likely to be either the same or faster.
TESTED - google/vilboz: cached boot medium 64bit before - 32bit - 64bit after load FSP-M: 35,674 - 35,595 - 34,690 load ramstage: 42,134 - 43,378 - 40,882 load FSP-S: 24,954 - 25,496 - 24,368
- foxconn/g41m: uncached boot medium for testing 64bit before - 32bit - 64bit after load ramstage: 51,164 - 51,872 - 64,892
Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa Signed-off-by: Arthur Heymans arthur@aheymans.xyz --- M src/lib/lzmadecode.c M src/lib/lzmadecode.h 2 files changed, 37 insertions(+), 10 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/75/70175/1
diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c index cb86829..5c6baa4 100644 --- a/src/lib/lzmadecode.c +++ b/src/lib/lzmadecode.c @@ -35,15 +35,15 @@ #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5
-/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back - * to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim +/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back + * to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim * is *reached* (not surpassed!), meaning we can't allow that to happen while * there are still bytes to decode from the algorithm's point of view. */ #define RC_READ_BYTE \ - (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \ - : ((((uintptr_t) Buffer & 3) \ - || ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \ - : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \ + (look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \ + : ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \ + || ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \ + : ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \ (look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ @@ -207,10 +207,10 @@ int len = 0; const Byte *Buffer; const Byte *BufferLim; - int look_ahead_ptr = 4; + int look_ahead_ptr = sizeof(SizeT); union { - Byte raw[4]; - UInt32 dw; + Byte raw[sizeof(SizeT)]; + SizeT dw; } look_ahead; UInt32 Range; UInt32 Code; diff --git a/src/lib/lzmadecode.h b/src/lib/lzmadecode.h index 9ed352a..d897af7 100644 --- a/src/lib/lzmadecode.h +++ b/src/lib/lzmadecode.h @@ -25,7 +25,7 @@ typedef unsigned char Byte; typedef unsigned short UInt16; typedef unsigned int UInt32; -typedef UInt32 SizeT; +typedef unsigned long int SizeT;
#define CProb UInt16