diff --git a/Makefile b/Makefile index 840bde10c9b4..f70ef17a3d2f 100644 --- a/Makefile +++ b/Makefile @@ -608,6 +608,16 @@ ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg endif +# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure +# ar/cc/ld-* macros return correct values. +ifdef CONFIG_LTO_CLANG +# use llvm-ar for building symbol tables from IR files, and llvm-nm instead +# of objdump for processing symbol versions and exports +LLVM_AR := llvm-ar +LLVM_NM := llvm-nm +export LLVM_AR LLVM_NM +endif + # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default # values of the respective KBUILD_* variables ARCH_CPPFLAGS := @@ -795,6 +805,22 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections LDFLAGS_vmlinux += --gc-sections endif +ifdef CONFIG_LTO_CLANG +lto-clang-flags := -flto -fvisibility=hidden + +# allow disabling only clang LTO where needed +DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +export DISABLE_LTO_CLANG +endif + +ifdef CONFIG_LTO +LTO_CFLAGS := $(lto-clang-flags) +KBUILD_CFLAGS += $(LTO_CFLAGS) + +DISABLE_LTO := $(DISABLE_LTO_CLANG) +export LTO_CFLAGS DISABLE_LTO +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) @@ -1583,7 +1609,8 @@ clean: $(clean-dirs) -o -name modules.builtin -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 6801123932a5..4572791825b6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -474,6 +474,44 @@ config STACKPROTECTOR_STRONG about 20% of all kernel functions, which increases the kernel code size by about 2%. +config LTO + def_bool n + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option if it supports: + - compiling with clang, + - compiling inline assembly with clang's integrated assembler, + - and linking with LLD. + +choice + prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" + default LTO_NONE + help + This option turns on Link-Time Optimization (LTO). + +config LTO_NONE + bool "None" + +config LTO_CLANG + bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)" + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_RECORD + depends on CC_IS_CLANG && LD_IS_LLD + select LTO + help + This option enables clang's Link Time Optimization (LTO), which allows + the compiler to optimize the kernel globally at link time. If you + enable this option, the compiler generates LLVM IR instead of object + files, and the actual compilation from IR occurs at the LTO link step, + which may take several minutes. + + If you select this option, you must compile the kernel with clang and + LLD. + +endchoice + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dd38c97933f1..65cbde21a9f4 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -66,7 +66,7 @@ * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 64fac0ad32d6..30b9860ece7b 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -185,6 +185,23 @@ else cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + else \ + if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/$(@F).symversions; \ + fi; \ + fi; \ + mv -f $(@D)/.tmp_$(@F) $@; +else cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -197,6 +214,7 @@ cmd_modversions_c = \ mv -f $(@D)/.tmp_$(@F) $@; \ fi; endif +endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifndef CC_USING_RECORD_MCOUNT @@ -429,6 +447,26 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # To build objects in subdirs, we need to descend into the directories $(sort $(subdir-obj-y)): $(subdir-ym) ; +ifdef CONFIG_LTO_CLANG + ifdef CONFIG_MODVERSIONS + # combine symversions for later processing + update_lto_symversions = \ + rm -f $@.symversions; \ + for i in $(filter-out FORCE,$^); do \ + if [ -f $$i.symversions ]; then \ + cat $$i.symversions \ + >> $@.symversions; \ + fi; \ + done; + endif + # rebuild the symbol table with llvm-ar to include IR files + update_lto_symtable = ; \ + mv -f $@ $@.tmp; \ + $(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \ + $$($(AR) t $@.tmp); \ + rm -f $@.tmp +endif + # # Rule to compile a set of .o files into one .o file # @@ -439,7 +477,8 @@ ifdef builtin-target # scripts/link-vmlinux.sh builds an aggregate built-in.a with a symbol # table and index. quiet_cmd_ar_builtin = AR $@ - cmd_ar_builtin = rm -f $@; \ + cmd_ar_builtin = $(update_lto_symversions) \ + rm -f $@; \ $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(filter $(real-obj-y), $^) $(builtin-target): $(real-obj-y) FORCE @@ -468,7 +507,11 @@ ifdef lib-target quiet_cmd_link_l_target = AR $@ # lib target archives do get a symbol table and index -cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) +cmd_link_l_target = \ + $(update_lto_symversions) \ + rm -f $@; \ + $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \ + $(update_lto_symtable) $(lib-target): $(lib-y) FORCE $(call if_changed,link_l_target) @@ -479,13 +522,34 @@ dummy-object = $(obj)/.lib_exports.o ksyms-lds = $(dot-target).lds quiet_cmd_export_list = EXPORTS $@ -cmd_export_list = $(OBJDUMP) -h $< | \ - sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\ - rm -f $(dummy-object);\ +filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' +link_export_list = rm -f $(dummy-object); \ echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\ $(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\ rm $(dummy-object) $(ksyms-lds) +ifdef CONFIG_LTO_CLANG +# objdump doesn't understand IR files and llvm-nm doesn't support archives, +# so we'll walk through each file in the archive separately +cmd_export_list = \ + rm -f $(ksyms-lds); \ + for o in $$($(AR) t $<); do \ + if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \ + $(OBJDUMP) -h $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + else \ + $(LLVM_NM) $$o | \ + $(filter_export_list) \ + >>$(ksyms-lds); \ + fi; \ + done; \ + $(link_export_list) +else +cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \ + $(link_export_list) +endif + $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) @@ -509,13 +573,31 @@ $($(subst $(obj)/,,$(@:.o=-objs))) \ $($(subst $(obj)/,,$(@:.o=-y))) \ $($(subst $(obj)/,,$(@:.o=-m)))), $^) +cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) + +quiet_cmd_link_multi-y = AR $@ +cmd_link_multi-y = $(update_lto_symversions) \ + rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \ + $(update_lto_symtable) + quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) + +ifdef CONFIG_LTO_CLANG + # don't compile IR until needed + cmd_link_multi-m = $(cmd_link_multi-y) +else + cmd_link_multi-m = $(cmd_link_multi-link) +endif + +$(multi-used-y): FORCE + $(call if_changed,link_multi-y) $(multi-used-m): FORCE $(call if_changed,link_multi-m) @{ echo $(@:.o=.ko); echo $(link_multi_deps); \ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) + +$(call multi_depend, $(multi-used-y), .o, -objs -y) $(call multi_depend, $(multi-used-m), .o, -objs -y -m) targets += $(multi-used-m) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 7d4af0d0accb..dc2d4be47ac8 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -83,12 +83,28 @@ modpost = scripts/mod/modpost \ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) +# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we +# need to link them into actual objects before passing them to modpost +modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,) + +ifdef CONFIG_LTO_CLANG +quiet_cmd_cc_lto_link_modules = LD [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $(@) \ + $(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \ + echo -T $(@:$(modpost-ext).o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE + $(call if_changed,cc_lto_link_modules) +endif + # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost -__modpost: $(modules:.ko=.o) FORCE +__modpost: $(modules:.ko=$(modpost-ext).o) FORCE $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ @@ -100,7 +116,6 @@ vmlinux.o: FORCE # Declare generated files as targets for modpost $(modules:.ko=.mod.c): __modpost ; - # Step 5), compile all *.mod.c files # modname is set to make c_flags define KBUILD_MODNAME @@ -119,13 +134,24 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Step 6), final link of the modules with optional arch pass after final link quiet_cmd_ld_ko_o = LD [M] $@ + +ifdef CONFIG_LTO_CLANG + cmd_ld_ko_o = \ + $(LD) -r $(LDFLAGS) \ + $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ + $(shell [ -s $(@:.ko=.o.symversions) ] && \ + echo -T $(@:.ko=.o.symversions)) \ + -o $@ --whole-archive \ + $(filter-out FORCE,$(^:$(modpost-ext).o=.o)) +else cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \ -o $@ $(filter-out FORCE,$^) ; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif -$(modules): %.ko :%.o %.mod.o FORCE +$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE +$(call if_changed,ld_ko_o) targets += $(modules) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index c8cf45362bd6..a64e3fd412ab 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -60,6 +60,38 @@ archive_builtin() ${AR} rcsTP${KBUILD_ARFLAGS} built-in.a \ ${KBUILD_VMLINUX_INIT} \ ${KBUILD_VMLINUX_MAIN} + + # rebuild with llvm-ar to update the symbol table + if [ -n "${CONFIG_LTO_CLANG}" ]; then + mv -f built-in.a built-in.a.tmp + ${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.a $(${AR} t built-in.a.tmp) + rm -f built-in.a.tmp + fi +} + +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions +modversions() +{ + if [ -z "${CONFIG_LTO_CLANG}" ]; then + return + fi + + if [ -z "${CONFIG_MODVERSIONS}" ]; then + return + fi + + rm -f .tmp_symversions + + for a in built-in.a ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions + fi + done + done + + echo "-T .tmp_symversions" } # Link of vmlinux.o used for section mismatch analysis @@ -75,7 +107,13 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" - ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO vmlinux.o + fi + + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects} } # Link of vmlinux @@ -87,13 +125,20 @@ vmlinux_link() local objects if [ "${SRCARCH}" != "um" ]; then - objects="--whole-archive \ - built-in.a \ - --no-whole-archive \ - --start-group \ - ${KBUILD_VMLINUX_LIBS} \ - --end-group \ - ${1}" + if [ -z "${CONFIG_LTO_CLANG}" ]; then + objects="--whole-archive \ + built-in.a \ + --no-whole-archive \ + --start-group \ + ${KBUILD_VMLINUX_LIBS} \ + --end-group \ + ${1}" + else + objects="--start-group \ + vmlinux.o \ + --end-group \ + ${1}" + fi ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ -T ${lds} ${objects} @@ -114,7 +159,6 @@ vmlinux_link() fi } - # Create ${2} .o file with all symbols from the ${1} object file kallsyms() { @@ -159,6 +203,7 @@ cleanup() { rm -f .tmp_System.map rm -f .tmp_kallsyms* + rm -f .tmp_symversions rm -f .tmp_vmlinux* rm -f built-in.a rm -f System.map @@ -220,7 +265,6 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init archive_builtin #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o # modpost vmlinux.o to check for section mismatches diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 858cbe56b100..ca5bb33d7460 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -145,6 +145,9 @@ static struct module *new_module(const char *modname) p[strlen(p) - 2] = '\0'; mod->is_dot_o = 1; } + /* strip trailing .lto */ + if (strends(p, ".lto")) + p[strlen(p) - 4] = '\0'; /* add to list */ mod->name = p; @@ -1927,6 +1930,10 @@ static char *remove_dot(char *s) size_t m = strspn(s + n + 1, "0123456789"); if (m && (s[n + m] == '.' || s[n + m] == 0)) s[n] = 0; + + /* strip trailing .lto */ + if (strends(s, ".lto")) + s[strlen(s) - 4] = '\0'; } return s; }