New subject: [patch]: memcpy/memset inline asm & config_compress disabled when needed

21 Feb 2010

My fam10 is quite slow when it decompressing the module from rom to ram.
So I have to make some modification. It is not the final resolution. But
both of these two patches have their own meanings.
disable_compress_asneeded.patch:
Sometimes we need to disable the COMPRESS_FLAG. The default value
is 1 as it was. If we set CONFIG_COMPRESS is 0 in mainboard Kconfig,
we should disable the CBFS_COMPRESS_FLAG.
Kconfig:
   ------------------
    config COMPRESS
    	hex
    	default 0
    	depends on BOARD_AMD_MAHOGANY_FAM10
Signed-off-by: Zheng Bao zheng.bao@amd.com
Index: Makefile
===================================================================

--- Makefile	(revision 5133)
+++ Makefile	(working copy)
@@ -231,6 +231,9 @@
 CFLAGS += -fno-common -ffreestanding -fno-builtin -fomit-frame-pointer
CBFS_COMPRESS_FLAG:=l
+ifeq ($(CONFIG_COMPRESS),0)
+CBFS_COMPRESS_FLAG:=
+endif
 CBFS_PAYLOAD_COMPRESS_FLAG:=
 ifeq ($(CONFIG_COMPRESSED_PAYLOAD_LZMA),y)
 CBFS_PAYLOAD_COMPRESS_FLAG:=l
memcpy_memset_inline_asm.patch:
Rewrite the memset/memcpy as inlined asmembly code. It make the code
run much faster if memcpy runs in ROM.
Signed-off-by: Zheng Bao zheng.bao@amd.com
Index: src/lib/memcpy.c
===================================================================
--- src/lib/memcpy.c	(revision 5133)
+++ src/lib/memcpy.c	(working copy)
@@ -3,10 +3,14 @@
 {
    const char *src = vsrc;
    char *dest = vdest;
-	int i;
-	for (i = 0; i < (int)bytes; i++)
-		dest[i] = src[i];
+	__asm__ __volatile__ (				\
+		"cld \n\t"				\
+		"rep \n\t"				\
+		"movsb"					\
+		:		/* No output */		\
+		: "S"(src), "D"(dest), "c"(bytes)	\
+		);
return vdest;
 }
Index: src/lib/memset.c
===================================================================
--- src/lib/memset.c	(revision 5133)
+++ src/lib/memset.c	(working copy)
@@ -2,11 +2,15 @@
void *memset(void *s, int c, size_t n)
 {
-	int i;
    char *ss = (char *) s;
-	for (i = 0; i < (int)n; i++)
-		ss[i] = c;
+	__asm__ __volatile__ (			\
+		"cld\n\t"			\
+		"rep\n\t"			\
+		"stosb"				\
+		:				\
+		: "a"(c), "D"(ss), "c"(n)	\
+		);
return s;
 }