Jacob Garber has uploaded this change for review.

View Change

[TEST] Add support for link time optimization

- Code generation is not done until after linking, so the
compiler must be invoked at all linking stages instead of the linker. As
a consequence all linker arguments must be prefixed with -Wl.
- Partial linking is not supported. Instead, object files are collected
into thin archives that are linked instead.
- The dead_code() macro causes linking errors, since dead functions
aren't optimized out until after linking has begun. This macro could be
replaced with the preprocessor if necessary, or just disabled for LTO
builds.
- Wrapping libgcc functions causes a symbol mismatch when using LTO.
Wrapping these functions was originally done to support alternate
regparam values, though AFAICT this isn't used anywhere.

Using LTO leads to a ~10% decrease in stage size for QEMU and ~18% for
the Thinkpad T500, and both targets boot successfully.

Change-Id: I48c31ea8b1b57276125cffdac44c7c16642547ac
Signed-off-by: Jacob Garber <jgarber1@ualberta.ca>
---
M Makefile.inc
M src/Kconfig
M src/arch/x86/Makefile.inc
M src/cpu/x86/smm/Makefile.inc
M src/include/assert.h
M src/lib/Makefile.inc
M src/lib/gcc.c
M toolchain.inc
M util/xcompile/xcompile
9 files changed, 42 insertions(+), 24 deletions(-)

git pull ssh://review.coreboot.org:29418/coreboot refs/changes/89/38989/1
diff --git a/Makefile.inc b/Makefile.inc
index 1f18726..b85c11b 100644
--- a/Makefile.inc
+++ b/Makefile.inc
@@ -434,6 +434,10 @@
endif
endif

+ifeq ($(CONFIG_LTO),y)
+CFLAGS_common += -flto -fuse-linker-plugin -fno-fat-lto-objects
+endif
+
ADAFLAGS_common += -gnatp
ADAFLAGS_common += -Wuninitialized -Wall -Werror
ADAFLAGS_common += -pipe -g -nostdinc
@@ -478,7 +482,7 @@
# Disable style checks for now
ADAFLAGS_common += -gnatyN

-LDFLAGS_common := --gc-sections -nostdlib -nostartfiles -static --emit-relocs
+LDFLAGS_common := -nostdlib -nostartfiles -static -Wl,--emit-relocs,--gc-sections

ifeq ($(CONFIG_WARNINGS_ARE_ERRORS),y)
CFLAGS_common += -Werror
diff --git a/src/Kconfig b/src/Kconfig
index f75f942..4b81818 100644
--- a/src/Kconfig
+++ b/src/Kconfig
@@ -116,6 +116,15 @@

For details see https://ccache.samba.org.

+config LTO
+ bool "Use link time optimization"
+ # Enable now for testing
+ default y
+ depends on COMPILER_GCC
+ help
+ Compile with link time optimization. This can often decrease the
+ final binary size, but may increase compilation time.
+
config FMD_GENPARSER
bool "Generate flashmap descriptor parser using flex and bison"
default n
diff --git a/src/arch/x86/Makefile.inc b/src/arch/x86/Makefile.inc
index 534f2ce..75f96d2 100644
--- a/src/arch/x86/Makefile.inc
+++ b/src/arch/x86/Makefile.inc
@@ -80,7 +80,7 @@

$$(objcbfs)/$(1).debug: $$$$($(1)-libs) $$$$($(1)-objs)
@printf " LINK $$(subst $$(obj)/,,$$(@))\n"
- $$(LD_$(1)) $$(LDFLAGS_$(1)) -o $$@ -L$$(obj) $$(COMPILER_RT_FLAGS_$(1)) --whole-archive --start-group $$(filter-out %.ld,$$($(1)-objs)) $$($(1)-libs) --no-whole-archive $$(COMPILER_RT_$(1)) --end-group -T $(call src-to-obj,$(1),$(dir)/memlayout.ld) --oformat $(2)
+ $$(LD_$(1)) $$(LDFLAGS_$(1)) -o $$@ -L$$(obj) $$(COMPILER_RT_FLAGS_$(1)) -Wl,--whole-archive,--start-group $$(filter-out %.ld,$$($(1)-objs)) $$($(1)-libs) -Wl,--no-whole-archive $$(COMPILER_RT_$(1)) -Wl,--end-group -T $(call src-to-obj,$(1),$(dir)/memlayout.ld) -Wl,--oformat=$(2)
-LANG=C LC_ALL= $$(OBJCOPY_$(1)) --only-section .illegal_globals $$(@) $$(objcbfs)/$(1)_null.offenders >/dev/null 2>&1
if [ -z "$$$$($$(NM_$(1)) $$(objcbfs)/$(1)_null.offenders 2>&1 | grep 'no symbols')" ];then \
echo "Forbidden global variables in $(1):"; \
@@ -212,11 +212,11 @@
postcar-y += postcar.c
postcar-$(CONFIG_COLLECT_TIMESTAMPS_TSC) += timestamp.c

-LDFLAGS_postcar += -Map $(objcbfs)/postcar.map
+LDFLAGS_postcar += -Wl,-Map,$(objcbfs)/postcar.map

$(objcbfs)/postcar.debug: $$(postcar-objs)
@printf " LINK $(subst $(obj)/,,$(@))\n"
- $(LD_postcar) $(LDFLAGS_postcar) -o $@ -L$(obj) $(COMPILER_RT_FLAGS_postcar) --whole-archive --start-group $(filter-out %.ld,$^) --no-whole-archive $(COMPILER_RT_postcar) --end-group -T $(call src-to-obj,postcar,src/arch/x86/memlayout.ld)
+ $(LD_postcar) $(LDFLAGS_postcar) -o $@ -L$(obj) $(COMPILER_RT_FLAGS_postcar) -Wl,--whole-archive,--start-group $(filter-out %.ld,$^) -Wl,--no-whole-archive $(COMPILER_RT_postcar) -Wl,--end-group -T $(call src-to-obj,postcar,src/arch/x86/memlayout.ld)

$(objcbfs)/postcar.elf: $(objcbfs)/postcar.debug.rmod
cp $< $@
@@ -309,18 +309,20 @@

endif

-$(objcbfs)/ramstage.debug: $(objgenerated)/ramstage.o $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld)
- @printf " CC $(subst $(obj)/,,$(@))\n"
- $(LD_ramstage) $(CPPFLAGS) $(LDFLAGS_ramstage) -o $@ -L$(obj) $< -T $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld)
-
-$(objgenerated)/ramstage.o: $$(ramstage-objs) $(COMPILER_RT_ramstage) $$(ramstage-libs)
- @printf " CC $(subst $(obj)/,,$(@))\n"
ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y)
- $(LD_ramstage) -m elf_i386 -r -o $@ $(COMPILER_RT_FLAGS_ramstage) --whole-archive --start-group $(filter-out %.ld,$(ramstage-objs)) $(ramstage-libs) --no-whole-archive $(COMPILER_RT_ramstage) --end-group
+LDFLAGS_ramstage += -Wl,-m,elf_i386
else
- $(LD_ramstage) -m elf_x86_64 -r -o $@ $(COMPILER_RT_FLAGS_ramstage) --whole-archive --start-group $(filter-out %.ld,$(ramstage-objs)) $(ramstage-libs) --no-whole-archive $(COMPILER_RT_ramstage) --end-group
+LDFLAGS_ramstage += -Wl,-m,elf_x86_64
endif

+$(objcbfs)/ramstage.debug: $(objgenerated)/ramstage.a $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld)
+ @printf " CC $(subst $(obj)/,,$(@))\n"
+ $(LD_ramstage) $(CPPFLAGS) $(LDFLAGS_ramstage) $(COMPILER_RT_FLAGS_ramstage) -o $@ -L$(obj) -Wl,--whole-archive,--start-group $< -Wl,--no-whole-archive $(COMPILER_RT_ramstage) -Wl,--end-group -T $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld)
+
+$(objgenerated)/ramstage.a: $$(ramstage-objs) $$(ramstage-libs)
+ @printf " AR $(subst $(obj)/,,$(@))\n"
+ $(AR_ramstage) rcT $@ $(filter-out %.ld,$(ramstage-objs)) $(ramstage-libs)
+
endif # CONFIG_ARCH_RAMSTAGE_X86_32 / CONFIG_ARCH_RAMSTAGE_X86_64

smm-$(CONFIG_IDT_IN_EVERY_STAGE) += exception.c
diff --git a/src/cpu/x86/smm/Makefile.inc b/src/cpu/x86/smm/Makefile.inc
index 11a4e67..f582a31 100644
--- a/src/cpu/x86/smm/Makefile.inc
+++ b/src/cpu/x86/smm/Makefile.inc
@@ -25,8 +25,8 @@
smm-generic-ccopts += -D__SMM__
smm-c-deps:=$$(OPTION_TABLE_H)

-$(obj)/smm/smm.o: $$(smm-objs) $(COMPILER_RT_smm)
- $(LD_smm) -nostdlib -r -o $@ $(COMPILER_RT_FLAGS_smm) --whole-archive --start-group $(smm-objs) --no-whole-archive $(COMPILER_RT_smm) --end-group
+$(obj)/smm/smm.a: $$(smm-objs)
+ $(AR_smm) rcT $@ $^

# change to the target path because objcopy will use the path name in its
# ELF symbol names.
@@ -53,7 +53,7 @@
# SMM Stub Module. The stub is used as a trampoline for relocation and normal
# SMM handling.
$(obj)/smmstub/smmstub.o: $$(smmstub-objs) $(COMPILER_RT_smmstub)
- $(LD_smmstub) -nostdlib -r -o $@ $(COMPILER_RT_FLAGS_smmstub) --whole-archive --start-group $(smmstub-objs) --no-whole-archive $(COMPILER_RT_smmstub) --end-group
+ $(LD_smmstub) -nostdlib -r -o $@ $(COMPILER_RT_FLAGS_smmstub) -Wl,--whole-archive,--start-group $(smmstub-objs) -Wl,--no-whole-archive $(COMPILER_RT_smmstub) -Wl,--end-group

# Link the SMM stub module with a 0-byte heap.
ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y)
@@ -72,9 +72,9 @@
# C-based SMM handler.

ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y)
-$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.o, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_32))
+$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.a, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_32))
else
-$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.o, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_64))
+$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.a, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_64))
endif

$(obj)/smm/smm: $(obj)/smm/smm.elf.rmod
@@ -82,8 +82,8 @@

else # CONFIG_SMM_TSEG

-$(obj)/smm/smm: $(obj)/smm/smm.o $(src)/cpu/x86/smm/smm.ld
- $(LD_smm) $(LDFLAGS_smm) -o $(obj)/smm/smm.elf -T $(src)/cpu/x86/smm/smm.ld $(obj)/smm/smm.o
+$(obj)/smm/smm: $(obj)/smm/smm.a $(src)/cpu/x86/smm/smm.ld
+ $(LD_smm) $(LDFLAGS_smm) -o $(obj)/smm/smm.elf -T $(src)/cpu/x86/smm/smm.ld -Wl,--whole-archive,--start-group $(obj)/smm/smm.a -Wl,--no-whole-archive $(COMPILER_RT_smm) --end-group
$(NM_smm) -n $(obj)/smm/smm.elf | sort > $(obj)/smm/smm.map
$(OBJCOPY_smm) -O binary $(obj)/smm/smm.elf $@

diff --git a/src/include/assert.h b/src/include/assert.h
index e0db0bc..6901953 100644
--- a/src/include/assert.h
+++ b/src/include/assert.h
@@ -61,7 +61,8 @@
dead_code_assertion_failed_at_line_##line(); \
} while (0)
#define _dead_code(line) __dead_code(line)
-#define dead_code() _dead_code(__LINE__)
+//#define dead_code() _dead_code(__LINE__)
+#define dead_code()

/* This can be used in the context of an expression of type 'type'. */
#define dead_code_t(type) ({ \
diff --git a/src/lib/Makefile.inc b/src/lib/Makefile.inc
index 2333f64..ac6d624 100644
--- a/src/lib/Makefile.inc
+++ b/src/lib/Makefile.inc
@@ -291,7 +291,7 @@
# rmdoule is named $(1).rmod
define rmodule_link
$(strip $(1)): $(strip $(2)) $$(COMPILER_RT_rmodules_$(4)) $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) | $$(RMODTOOL)
- $$(LD_rmodules_$(4)) $$(LDFLAGS_rmodules_$(4)) $(RMODULE_LDFLAGS) -T $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) --defsym=__heap_size=$(strip $(3)) -o $$@ --whole-archive --start-group $(filter-out %.ld,$(2)) --end-group
+ $$(LD_rmodules_$(4)) $$(LDFLAGS_rmodules_$(4)) $(RMODULE_LDFLAGS) -T $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) -Wl,--defsym=__heap_size=$(strip $(3)) -o $$@ -Wl,--whole-archive,--start-group $(filter-out %.ld,$(2)) -Wl,--no-whole-archive $$(COMPILER_RT_rmodules_$(4)) -Wl,--end-group
$$(NM_rmodules_$(4)) -n $$@ > $$(basename $$@).map
endef

diff --git a/src/lib/gcc.c b/src/lib/gcc.c
index 5a93f45..66a7788 100644
--- a/src/lib/gcc.c
+++ b/src/lib/gcc.c
@@ -24,6 +24,7 @@
/* TODO: maybe this code should move to arch/x86 as architecture
* specific implementations may vary
*/
+#if 0
#define WRAP_LIBGCC_CALL(type, name) \
asmlinkage type __real_##name(type a, type b); \
type __wrap_##name(type a, type b); \
@@ -33,3 +34,4 @@
WRAP_LIBGCC_CALL(unsigned long long, __udivdi3)
WRAP_LIBGCC_CALL(long long, __moddi3)
WRAP_LIBGCC_CALL(unsigned long long, __umoddi3)
+#endif
diff --git a/toolchain.inc b/toolchain.inc
index 865227b..f52a518 100644
--- a/toolchain.inc
+++ b/toolchain.inc
@@ -117,7 +117,7 @@
$(error Check your .config file for CONFIG_ARCH_$(1)_* settings))
CC_$(1) := $(CC_$(2))
GCC_$(1) := $(GCC_CC_$(2))
-LD_$(1) := $(LD_$(2))
+LD_$(1) := $(CC_$(2))
NM_$(1) := $(NM_$(2))
AR_$(1) := $(AR_$(2))
GNATBIND_$(1) := $(GNATBIND_$(2))
@@ -130,7 +130,7 @@
CPPFLAGS_$(1) = $$(CPPFLAGS_common) $$(CPPFLAGS_$(2)) -D__ARCH_$(2)__
COMPILER_RT_$(1) := $$(COMPILER_RT_$(2))
COMPILER_RT_FLAGS_$(1) := $$(COMPILER_RT_FLAGS_$(2))
-LDFLAGS_$(1) = $$(LDFLAGS_common) $$(LDFLAGS_$(2))
+LDFLAGS_$(1) = $$(CFLAGS_$(1)) $$(LDFLAGS_common) $$(LDFLAGS_$(2))
endef

# define_class: Allows defining any program as dynamic class and compiler tool
diff --git a/util/xcompile/xcompile b/util/xcompile/xcompile
index 3203d71..8515813 100755
--- a/util/xcompile/xcompile
+++ b/util/xcompile/xcompile
@@ -363,7 +363,7 @@
TCLIST="i386 x86_64"
TWIDTH="32"
TABI="elf"
- CC_RT_EXTRA_GCC="--wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3"
+ #CC_RT_EXTRA_GCC="-Wl,--wrap=__divdi3,--wrap=__udivdi3,--wrap=__moddi3,--wrap=__umoddi3"
}

arch_config_ppc64() {

To view, visit change 38989. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: coreboot
Gerrit-Branch: master
Gerrit-Change-Id: I48c31ea8b1b57276125cffdac44c7c16642547ac
Gerrit-Change-Number: 38989
Gerrit-PatchSet: 1
Gerrit-Owner: Jacob Garber <jgarber1@ualberta.ca>
Gerrit-Reviewer: Jacob Garber <jgarber1@ualberta.ca>
Gerrit-Reviewer: Martin Roth <martinroth@google.com>
Gerrit-Reviewer: Patrick Georgi <pgeorgi@google.com>
Gerrit-MessageType: newchange