Jacob Garber has uploaded this change for review. ( https://review.coreboot.org/c/coreboot/+/38989 )
Change subject: [TEST] Add support for link time optimization ......................................................................
[TEST] Add support for link time optimization
- Code generation is not done until after linking, so the compiler must be invoked at all linking stages instead of the linker. As a consequence all linker arguments must be prefixed with -Wl. - Partial linking is not supported. Instead, object files are collected into thin archives that are linked instead. - The dead_code() macro causes linking errors, since dead functions aren't optimized out until after linking has begun. This macro could be replaced with the preprocessor if necessary, or just disabled for LTO builds. - Wrapping libgcc functions causes a symbol mismatch when using LTO. Wrapping these functions was originally done to support alternate regparam values, though AFAICT this isn't used anywhere.
Using LTO leads to a ~10% decrease in stage size for QEMU and ~18% for the Thinkpad T500, and both targets boot successfully.
Change-Id: I48c31ea8b1b57276125cffdac44c7c16642547ac Signed-off-by: Jacob Garber jgarber1@ualberta.ca --- M Makefile.inc M src/Kconfig M src/arch/x86/Makefile.inc M src/cpu/x86/smm/Makefile.inc M src/include/assert.h M src/lib/Makefile.inc M src/lib/gcc.c M toolchain.inc M util/xcompile/xcompile 9 files changed, 42 insertions(+), 24 deletions(-)
git pull ssh://review.coreboot.org:29418/coreboot refs/changes/89/38989/1
diff --git a/Makefile.inc b/Makefile.inc index 1f18726..b85c11b 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -434,6 +434,10 @@ endif endif
+ifeq ($(CONFIG_LTO),y) +CFLAGS_common += -flto -fuse-linker-plugin -fno-fat-lto-objects +endif + ADAFLAGS_common += -gnatp ADAFLAGS_common += -Wuninitialized -Wall -Werror ADAFLAGS_common += -pipe -g -nostdinc @@ -478,7 +482,7 @@ # Disable style checks for now ADAFLAGS_common += -gnatyN
-LDFLAGS_common := --gc-sections -nostdlib -nostartfiles -static --emit-relocs +LDFLAGS_common := -nostdlib -nostartfiles -static -Wl,--emit-relocs,--gc-sections
ifeq ($(CONFIG_WARNINGS_ARE_ERRORS),y) CFLAGS_common += -Werror diff --git a/src/Kconfig b/src/Kconfig index f75f942..4b81818 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -116,6 +116,15 @@
For details see https://ccache.samba.org.
+config LTO + bool "Use link time optimization" + # Enable now for testing + default y + depends on COMPILER_GCC + help + Compile with link time optimization. This can often decrease the + final binary size, but may increase compilation time. + config FMD_GENPARSER bool "Generate flashmap descriptor parser using flex and bison" default n diff --git a/src/arch/x86/Makefile.inc b/src/arch/x86/Makefile.inc index 534f2ce..75f96d2 100644 --- a/src/arch/x86/Makefile.inc +++ b/src/arch/x86/Makefile.inc @@ -80,7 +80,7 @@
$$(objcbfs)/$(1).debug: $$$$($(1)-libs) $$$$($(1)-objs) @printf " LINK $$(subst $$(obj)/,,$$(@))\n" - $$(LD_$(1)) $$(LDFLAGS_$(1)) -o $$@ -L$$(obj) $$(COMPILER_RT_FLAGS_$(1)) --whole-archive --start-group $$(filter-out %.ld,$$($(1)-objs)) $$($(1)-libs) --no-whole-archive $$(COMPILER_RT_$(1)) --end-group -T $(call src-to-obj,$(1),$(dir)/memlayout.ld) --oformat $(2) + $$(LD_$(1)) $$(LDFLAGS_$(1)) -o $$@ -L$$(obj) $$(COMPILER_RT_FLAGS_$(1)) -Wl,--whole-archive,--start-group $$(filter-out %.ld,$$($(1)-objs)) $$($(1)-libs) -Wl,--no-whole-archive $$(COMPILER_RT_$(1)) -Wl,--end-group -T $(call src-to-obj,$(1),$(dir)/memlayout.ld) -Wl,--oformat=$(2) -LANG=C LC_ALL= $$(OBJCOPY_$(1)) --only-section .illegal_globals $$(@) $$(objcbfs)/$(1)_null.offenders >/dev/null 2>&1 if [ -z "$$$$($$(NM_$(1)) $$(objcbfs)/$(1)_null.offenders 2>&1 | grep 'no symbols')" ];then \ echo "Forbidden global variables in $(1):"; \ @@ -212,11 +212,11 @@ postcar-y += postcar.c postcar-$(CONFIG_COLLECT_TIMESTAMPS_TSC) += timestamp.c
-LDFLAGS_postcar += -Map $(objcbfs)/postcar.map +LDFLAGS_postcar += -Wl,-Map,$(objcbfs)/postcar.map
$(objcbfs)/postcar.debug: $$(postcar-objs) @printf " LINK $(subst $(obj)/,,$(@))\n" - $(LD_postcar) $(LDFLAGS_postcar) -o $@ -L$(obj) $(COMPILER_RT_FLAGS_postcar) --whole-archive --start-group $(filter-out %.ld,$^) --no-whole-archive $(COMPILER_RT_postcar) --end-group -T $(call src-to-obj,postcar,src/arch/x86/memlayout.ld) + $(LD_postcar) $(LDFLAGS_postcar) -o $@ -L$(obj) $(COMPILER_RT_FLAGS_postcar) -Wl,--whole-archive,--start-group $(filter-out %.ld,$^) -Wl,--no-whole-archive $(COMPILER_RT_postcar) -Wl,--end-group -T $(call src-to-obj,postcar,src/arch/x86/memlayout.ld)
$(objcbfs)/postcar.elf: $(objcbfs)/postcar.debug.rmod cp $< $@ @@ -309,18 +309,20 @@
endif
-$(objcbfs)/ramstage.debug: $(objgenerated)/ramstage.o $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld) - @printf " CC $(subst $(obj)/,,$(@))\n" - $(LD_ramstage) $(CPPFLAGS) $(LDFLAGS_ramstage) -o $@ -L$(obj) $< -T $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld) - -$(objgenerated)/ramstage.o: $$(ramstage-objs) $(COMPILER_RT_ramstage) $$(ramstage-libs) - @printf " CC $(subst $(obj)/,,$(@))\n" ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y) - $(LD_ramstage) -m elf_i386 -r -o $@ $(COMPILER_RT_FLAGS_ramstage) --whole-archive --start-group $(filter-out %.ld,$(ramstage-objs)) $(ramstage-libs) --no-whole-archive $(COMPILER_RT_ramstage) --end-group +LDFLAGS_ramstage += -Wl,-m,elf_i386 else - $(LD_ramstage) -m elf_x86_64 -r -o $@ $(COMPILER_RT_FLAGS_ramstage) --whole-archive --start-group $(filter-out %.ld,$(ramstage-objs)) $(ramstage-libs) --no-whole-archive $(COMPILER_RT_ramstage) --end-group +LDFLAGS_ramstage += -Wl,-m,elf_x86_64 endif
+$(objcbfs)/ramstage.debug: $(objgenerated)/ramstage.a $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld) + @printf " CC $(subst $(obj)/,,$(@))\n" + $(LD_ramstage) $(CPPFLAGS) $(LDFLAGS_ramstage) $(COMPILER_RT_FLAGS_ramstage) -o $@ -L$(obj) -Wl,--whole-archive,--start-group $< -Wl,--no-whole-archive $(COMPILER_RT_ramstage) -Wl,--end-group -T $(call src-to-obj,ramstage,src/arch/x86/memlayout.ld) + +$(objgenerated)/ramstage.a: $$(ramstage-objs) $$(ramstage-libs) + @printf " AR $(subst $(obj)/,,$(@))\n" + $(AR_ramstage) rcT $@ $(filter-out %.ld,$(ramstage-objs)) $(ramstage-libs) + endif # CONFIG_ARCH_RAMSTAGE_X86_32 / CONFIG_ARCH_RAMSTAGE_X86_64
smm-$(CONFIG_IDT_IN_EVERY_STAGE) += exception.c diff --git a/src/cpu/x86/smm/Makefile.inc b/src/cpu/x86/smm/Makefile.inc index 11a4e67..f582a31 100644 --- a/src/cpu/x86/smm/Makefile.inc +++ b/src/cpu/x86/smm/Makefile.inc @@ -25,8 +25,8 @@ smm-generic-ccopts += -D__SMM__ smm-c-deps:=$$(OPTION_TABLE_H)
-$(obj)/smm/smm.o: $$(smm-objs) $(COMPILER_RT_smm) - $(LD_smm) -nostdlib -r -o $@ $(COMPILER_RT_FLAGS_smm) --whole-archive --start-group $(smm-objs) --no-whole-archive $(COMPILER_RT_smm) --end-group +$(obj)/smm/smm.a: $$(smm-objs) + $(AR_smm) rcT $@ $^
# change to the target path because objcopy will use the path name in its # ELF symbol names. @@ -53,7 +53,7 @@ # SMM Stub Module. The stub is used as a trampoline for relocation and normal # SMM handling. $(obj)/smmstub/smmstub.o: $$(smmstub-objs) $(COMPILER_RT_smmstub) - $(LD_smmstub) -nostdlib -r -o $@ $(COMPILER_RT_FLAGS_smmstub) --whole-archive --start-group $(smmstub-objs) --no-whole-archive $(COMPILER_RT_smmstub) --end-group + $(LD_smmstub) -nostdlib -r -o $@ $(COMPILER_RT_FLAGS_smmstub) -Wl,--whole-archive,--start-group $(smmstub-objs) -Wl,--no-whole-archive $(COMPILER_RT_smmstub) -Wl,--end-group
# Link the SMM stub module with a 0-byte heap. ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y) @@ -72,9 +72,9 @@ # C-based SMM handler.
ifeq ($(CONFIG_ARCH_RAMSTAGE_X86_32),y) -$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.o, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_32)) +$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.a, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_32)) else -$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.o, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_64)) +$(eval $(call rmodule_link,$(obj)/smm/smm.elf, $(obj)/smm/smm.a, $(CONFIG_SMM_MODULE_HEAP_SIZE),x86_64)) endif
$(obj)/smm/smm: $(obj)/smm/smm.elf.rmod @@ -82,8 +82,8 @@
else # CONFIG_SMM_TSEG
-$(obj)/smm/smm: $(obj)/smm/smm.o $(src)/cpu/x86/smm/smm.ld - $(LD_smm) $(LDFLAGS_smm) -o $(obj)/smm/smm.elf -T $(src)/cpu/x86/smm/smm.ld $(obj)/smm/smm.o +$(obj)/smm/smm: $(obj)/smm/smm.a $(src)/cpu/x86/smm/smm.ld + $(LD_smm) $(LDFLAGS_smm) -o $(obj)/smm/smm.elf -T $(src)/cpu/x86/smm/smm.ld -Wl,--whole-archive,--start-group $(obj)/smm/smm.a -Wl,--no-whole-archive $(COMPILER_RT_smm) --end-group $(NM_smm) -n $(obj)/smm/smm.elf | sort > $(obj)/smm/smm.map $(OBJCOPY_smm) -O binary $(obj)/smm/smm.elf $@
diff --git a/src/include/assert.h b/src/include/assert.h index e0db0bc..6901953 100644 --- a/src/include/assert.h +++ b/src/include/assert.h @@ -61,7 +61,8 @@ dead_code_assertion_failed_at_line_##line(); \ } while (0) #define _dead_code(line) __dead_code(line) -#define dead_code() _dead_code(__LINE__) +//#define dead_code() _dead_code(__LINE__) +#define dead_code()
/* This can be used in the context of an expression of type 'type'. */ #define dead_code_t(type) ({ \ diff --git a/src/lib/Makefile.inc b/src/lib/Makefile.inc index 2333f64..ac6d624 100644 --- a/src/lib/Makefile.inc +++ b/src/lib/Makefile.inc @@ -291,7 +291,7 @@ # rmdoule is named $(1).rmod define rmodule_link $(strip $(1)): $(strip $(2)) $$(COMPILER_RT_rmodules_$(4)) $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) | $$(RMODTOOL) - $$(LD_rmodules_$(4)) $$(LDFLAGS_rmodules_$(4)) $(RMODULE_LDFLAGS) -T $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) --defsym=__heap_size=$(strip $(3)) -o $$@ --whole-archive --start-group $(filter-out %.ld,$(2)) --end-group + $$(LD_rmodules_$(4)) $$(LDFLAGS_rmodules_$(4)) $(RMODULE_LDFLAGS) -T $(call src-to-obj,rmodules_$(4),src/lib/rmodule.ld) -Wl,--defsym=__heap_size=$(strip $(3)) -o $$@ -Wl,--whole-archive,--start-group $(filter-out %.ld,$(2)) -Wl,--no-whole-archive $$(COMPILER_RT_rmodules_$(4)) -Wl,--end-group $$(NM_rmodules_$(4)) -n $$@ > $$(basename $$@).map endef
diff --git a/src/lib/gcc.c b/src/lib/gcc.c index 5a93f45..66a7788 100644 --- a/src/lib/gcc.c +++ b/src/lib/gcc.c @@ -24,6 +24,7 @@ /* TODO: maybe this code should move to arch/x86 as architecture * specific implementations may vary */ +#if 0 #define WRAP_LIBGCC_CALL(type, name) \ asmlinkage type __real_##name(type a, type b); \ type __wrap_##name(type a, type b); \ @@ -33,3 +34,4 @@ WRAP_LIBGCC_CALL(unsigned long long, __udivdi3) WRAP_LIBGCC_CALL(long long, __moddi3) WRAP_LIBGCC_CALL(unsigned long long, __umoddi3) +#endif diff --git a/toolchain.inc b/toolchain.inc index 865227b..f52a518 100644 --- a/toolchain.inc +++ b/toolchain.inc @@ -117,7 +117,7 @@ $(error Check your .config file for CONFIG_ARCH_$(1)_* settings)) CC_$(1) := $(CC_$(2)) GCC_$(1) := $(GCC_CC_$(2)) -LD_$(1) := $(LD_$(2)) +LD_$(1) := $(CC_$(2)) NM_$(1) := $(NM_$(2)) AR_$(1) := $(AR_$(2)) GNATBIND_$(1) := $(GNATBIND_$(2)) @@ -130,7 +130,7 @@ CPPFLAGS_$(1) = $$(CPPFLAGS_common) $$(CPPFLAGS_$(2)) -D__ARCH_$(2)__ COMPILER_RT_$(1) := $$(COMPILER_RT_$(2)) COMPILER_RT_FLAGS_$(1) := $$(COMPILER_RT_FLAGS_$(2)) -LDFLAGS_$(1) = $$(LDFLAGS_common) $$(LDFLAGS_$(2)) +LDFLAGS_$(1) = $$(CFLAGS_$(1)) $$(LDFLAGS_common) $$(LDFLAGS_$(2)) endef
# define_class: Allows defining any program as dynamic class and compiler tool diff --git a/util/xcompile/xcompile b/util/xcompile/xcompile index 3203d71..8515813 100755 --- a/util/xcompile/xcompile +++ b/util/xcompile/xcompile @@ -363,7 +363,7 @@ TCLIST="i386 x86_64" TWIDTH="32" TABI="elf" - CC_RT_EXTRA_GCC="--wrap __divdi3 --wrap __udivdi3 --wrap __moddi3 --wrap __umoddi3" + #CC_RT_EXTRA_GCC="-Wl,--wrap=__divdi3,--wrap=__udivdi3,--wrap=__moddi3,--wrap=__umoddi3" }
arch_config_ppc64() {