mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-21 20:20:12 +07:00
77b0bf55bc
Using macros in inline assembly allows us to work around bugs in GCC's inlining decisions. Compile macros.S and use it to assemble all C files. Currently only x86 will use it. Background: The inlining pass of GCC doesn't include an assembler, so it's not aware of basic properties of the generated code, such as its size in bytes, or that there are such things as discontiuous blocks of code and data due to the newfangled linker feature called 'sections' ... Instead GCC uses a lazy and fragile heuristic: it does a linear count of certain syntactic and whitespace elements in inlined assembly block source code, such as a count of new-lines and semicolons (!), as a poor substitute for "code size and complexity". Unsurprisingly this heuristic falls over and breaks its neck whith certain common types of kernel code that use inline assembly, such as the frequent practice of putting useful information into alternative sections. As a result of this fresh, 20+ years old GCC bug, GCC's inlining decisions are effectively disabled for inlined functions that make use of such asm() blocks, because GCC thinks those sections of code are "large" - when in reality they are often result in just a very low number of machine instructions. This absolute lack of inlining provess when GCC comes across such asm() blocks both increases generated kernel code size and causes performance overhead, which is particularly noticeable on paravirt kernels, which make frequent use of these inlining facilities in attempt to stay out of the way when running on baremetal hardware. Instead of fixing the compiler we use a workaround: we set an assembly macro and call it from the inlined assembly block. As a result GCC considers the inline assembly block as a single instruction. (Which it often isn't but I digress.) This uglifies and bloats the source code - for example just the refcount related changes have this impact: Makefile | 9 +++++++-- arch/x86/Makefile | 7 +++++++ arch/x86/kernel/macros.S | 7 +++++++ scripts/Kbuild.include | 4 +++- scripts/mod/Makefile | 2 ++ 5 files changed, 26 insertions(+), 3 deletions(-) Yay readability and maintainability, it's not like assembly code is hard to read and maintain ... We also hope that GCC will eventually get fixed, but we are not holding our breath for that. Yet we are optimistic, it might still happen, any decade now. [ mingo: Wrote new changelog describing the background. ] Tested-by: Kees Cook <keescook@chromium.org> Signed-off-by: Nadav Amit <namit@vmware.com> Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Michal Marek <michal.lkml@markovi.net> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kbuild@vger.kernel.org Link: http://lkml.kernel.org/r/20181003213100.189959-3-namit@vmware.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
332 lines
12 KiB
Makefile
332 lines
12 KiB
Makefile
# SPDX-License-Identifier: GPL-2.0
|
|
# Unified Makefile for i386 and x86_64
|
|
|
|
# select defconfig based on actual architecture
|
|
ifeq ($(ARCH),x86)
|
|
ifeq ($(shell uname -m),x86_64)
|
|
KBUILD_DEFCONFIG := x86_64_defconfig
|
|
else
|
|
KBUILD_DEFCONFIG := i386_defconfig
|
|
endif
|
|
else
|
|
KBUILD_DEFCONFIG := $(ARCH)_defconfig
|
|
endif
|
|
|
|
# For gcc stack alignment is specified with -mpreferred-stack-boundary,
|
|
# clang has the option -mstack-alignment for that purpose.
|
|
ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
|
|
cc_stack_align4 := -mpreferred-stack-boundary=2
|
|
cc_stack_align8 := -mpreferred-stack-boundary=3
|
|
else ifneq ($(call cc-option, -mstack-alignment=16),)
|
|
cc_stack_align4 := -mstack-alignment=4
|
|
cc_stack_align8 := -mstack-alignment=8
|
|
endif
|
|
|
|
# How to compile the 16-bit code. Note we always compile for -march=i386;
|
|
# that way we can complain to the user if the CPU is insufficient.
|
|
#
|
|
# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
|
|
# older versions of GCC, include an *assembly* header to make sure that
|
|
# gcc doesn't play any games behind our back.
|
|
CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
|
|
M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
|
|
|
|
REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
|
|
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
|
|
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
|
|
-mno-mmx -mno-sse
|
|
|
|
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
|
|
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
|
|
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
|
|
export REALMODE_CFLAGS
|
|
|
|
# BITS is used as extension for files which are available in a 32 bit
|
|
# and a 64 bit version to simplify shared Makefiles.
|
|
# e.g.: obj-y += foo_$(BITS).o
|
|
export BITS
|
|
|
|
ifdef CONFIG_X86_NEED_RELOCS
|
|
LDFLAGS_vmlinux := --emit-relocs
|
|
endif
|
|
|
|
#
|
|
# Prevent GCC from generating any FP code by mistake.
|
|
#
|
|
# This must happen before we try the -mpreferred-stack-boundary, see:
|
|
#
|
|
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
|
|
#
|
|
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
|
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
|
|
|
|
ifeq ($(CONFIG_X86_32),y)
|
|
BITS := 32
|
|
UTS_MACHINE := i386
|
|
CHECKFLAGS += -D__i386__
|
|
|
|
biarch := $(call cc-option,-m32)
|
|
KBUILD_AFLAGS += $(biarch)
|
|
KBUILD_CFLAGS += $(biarch)
|
|
|
|
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
|
|
|
|
# Never want PIC in a 32-bit kernel, prevent breakage with GCC built
|
|
# with nonstandard options
|
|
KBUILD_CFLAGS += -fno-pic
|
|
|
|
# Align the stack to the register width instead of using the default
|
|
# alignment of 16 bytes. This reduces stack usage and the number of
|
|
# alignment instructions.
|
|
KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
|
|
|
|
# CPU-specific tuning. Anything which can be shared with UML should go here.
|
|
include arch/x86/Makefile_32.cpu
|
|
KBUILD_CFLAGS += $(cflags-y)
|
|
|
|
# temporary until string.h is fixed
|
|
KBUILD_CFLAGS += -ffreestanding
|
|
else
|
|
BITS := 64
|
|
UTS_MACHINE := x86_64
|
|
CHECKFLAGS += -D__x86_64__
|
|
|
|
biarch := -m64
|
|
KBUILD_AFLAGS += -m64
|
|
KBUILD_CFLAGS += -m64
|
|
|
|
# Align jump targets to 1 byte, not the default 16 bytes:
|
|
KBUILD_CFLAGS += $(call cc-option,-falign-jumps=1)
|
|
|
|
# Pack loops tightly as well:
|
|
KBUILD_CFLAGS += $(call cc-option,-falign-loops=1)
|
|
|
|
# Don't autogenerate traditional x87 instructions
|
|
KBUILD_CFLAGS += $(call cc-option,-mno-80387)
|
|
KBUILD_CFLAGS += $(call cc-option,-mno-fp-ret-in-387)
|
|
|
|
# By default gcc and clang use a stack alignment of 16 bytes for x86.
|
|
# However the standard kernel entry on x86-64 leaves the stack on an
|
|
# 8-byte boundary. If the compiler isn't informed about the actual
|
|
# alignment it will generate extra alignment instructions for the
|
|
# default alignment which keep the stack *mis*aligned.
|
|
# Furthermore an alignment to the register width reduces stack usage
|
|
# and the number of alignment instructions.
|
|
KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align8))
|
|
|
|
# Use -mskip-rax-setup if supported.
|
|
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
|
|
|
|
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
|
|
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
|
|
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
|
|
|
|
cflags-$(CONFIG_MCORE2) += \
|
|
$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
|
|
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
|
|
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
|
|
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
|
|
KBUILD_CFLAGS += $(cflags-y)
|
|
|
|
KBUILD_CFLAGS += -mno-red-zone
|
|
KBUILD_CFLAGS += -mcmodel=kernel
|
|
|
|
# -funit-at-a-time shrinks the kernel .text considerably
|
|
# unfortunately it makes reading oopses harder.
|
|
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
|
|
endif
|
|
|
|
ifdef CONFIG_X86_X32
|
|
x32_ld_ok := $(call try-run,\
|
|
/bin/echo -e '1: .quad 1b' | \
|
|
$(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \
|
|
$(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
|
|
$(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
|
|
ifeq ($(x32_ld_ok),y)
|
|
CONFIG_X86_X32_ABI := y
|
|
KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI
|
|
KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI
|
|
else
|
|
$(warning CONFIG_X86_X32 enabled but no binutils support)
|
|
endif
|
|
endif
|
|
export CONFIG_X86_X32_ABI
|
|
|
|
#
|
|
# If the function graph tracer is used with mcount instead of fentry,
|
|
# '-maccumulate-outgoing-args' is needed to prevent a GCC bug
|
|
# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109)
|
|
#
|
|
ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
ifndef CONFIG_HAVE_FENTRY
|
|
ACCUMULATE_OUTGOING_ARGS := 1
|
|
else
|
|
ifeq ($(call cc-option-yn, -mfentry), n)
|
|
ACCUMULATE_OUTGOING_ARGS := 1
|
|
|
|
# GCC ignores '-maccumulate-outgoing-args' when used with '-Os'.
|
|
# If '-Os' is enabled, disable it and print a warning.
|
|
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
|
undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
|
|
$(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
|
|
endif
|
|
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
|
|
# This compiler flag is not supported by Clang:
|
|
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
|
|
endif
|
|
|
|
# Stackpointer is addressed different for 32 bit and 64 bit x86
|
|
sp-$(CONFIG_X86_32) := esp
|
|
sp-$(CONFIG_X86_64) := rsp
|
|
|
|
# do binutils support CFI?
|
|
cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
|
|
# is .cfi_signal_frame supported too?
|
|
cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
|
|
cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
|
|
|
|
# does binutils support specific instructions?
|
|
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
|
|
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
|
|
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
|
avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
|
|
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
|
|
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
|
|
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
|
|
|
|
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
|
|
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
|
|
|
|
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
|
|
|
|
#
|
|
# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to
|
|
# the linker to force 2MB page size regardless of the default page size used
|
|
# by the linker.
|
|
#
|
|
ifdef CONFIG_X86_64
|
|
KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
|
|
endif
|
|
|
|
# Speed up the build
|
|
KBUILD_CFLAGS += -pipe
|
|
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
|
|
KBUILD_CFLAGS += -Wno-sign-compare
|
|
#
|
|
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
|
|
|
|
# Avoid indirect branches in kernel to deal with Spectre
|
|
ifdef CONFIG_RETPOLINE
|
|
ifneq ($(RETPOLINE_CFLAGS),)
|
|
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
|
|
endif
|
|
endif
|
|
|
|
archscripts: scripts_basic
|
|
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
|
|
|
|
###
|
|
# Syscall table generation
|
|
|
|
archheaders:
|
|
$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
|
|
|
|
archmacros:
|
|
$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
|
|
|
|
ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
|
|
export ASM_MACRO_FLAGS
|
|
KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
|
|
|
|
###
|
|
# Kernel objects
|
|
|
|
head-y := arch/x86/kernel/head_$(BITS).o
|
|
head-y += arch/x86/kernel/head$(BITS).o
|
|
head-y += arch/x86/kernel/ebda.o
|
|
head-y += arch/x86/kernel/platform-quirks.o
|
|
|
|
libs-y += arch/x86/lib/
|
|
|
|
# See arch/x86/Kbuild for content of core part of the kernel
|
|
core-y += arch/x86/
|
|
|
|
# drivers-y are linked after core-y
|
|
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
|
|
drivers-$(CONFIG_PCI) += arch/x86/pci/
|
|
|
|
# must be linked after kernel/
|
|
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
|
|
|
|
# suspend and hibernation support
|
|
drivers-$(CONFIG_PM) += arch/x86/power/
|
|
|
|
drivers-$(CONFIG_FB) += arch/x86/video/
|
|
|
|
####
|
|
# boot loader support. Several targets are kept for legacy purposes
|
|
|
|
boot := arch/x86/boot
|
|
|
|
BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage
|
|
|
|
PHONY += bzImage $(BOOT_TARGETS)
|
|
|
|
# Default kernel to build
|
|
all: bzImage
|
|
|
|
# KBUILD_IMAGE specify target image being built
|
|
KBUILD_IMAGE := $(boot)/bzImage
|
|
|
|
bzImage: vmlinux
|
|
ifeq ($(CONFIG_X86_DECODER_SELFTEST),y)
|
|
$(Q)$(MAKE) $(build)=arch/x86/tools posttest
|
|
endif
|
|
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
|
|
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
|
|
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
|
|
|
|
$(BOOT_TARGETS): vmlinux
|
|
$(Q)$(MAKE) $(build)=$(boot) $@
|
|
|
|
PHONY += install
|
|
install:
|
|
$(Q)$(MAKE) $(build)=$(boot) $@
|
|
|
|
PHONY += vdso_install
|
|
vdso_install:
|
|
$(Q)$(MAKE) $(build)=arch/x86/entry/vdso $@
|
|
|
|
archprepare: checkbin
|
|
checkbin:
|
|
ifndef CC_HAVE_ASM_GOTO
|
|
@echo Compiler lacks asm-goto support.
|
|
@exit 1
|
|
endif
|
|
|
|
archclean:
|
|
$(Q)rm -rf $(objtree)/arch/i386
|
|
$(Q)rm -rf $(objtree)/arch/x86_64
|
|
$(Q)$(MAKE) $(clean)=$(boot)
|
|
$(Q)$(MAKE) $(clean)=arch/x86/tools
|
|
|
|
define archhelp
|
|
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
|
|
echo ' install - Install kernel using'
|
|
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
|
|
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
|
|
echo ' install to $$(INSTALL_PATH) and run lilo'
|
|
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
|
|
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
|
|
echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
|
|
echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
|
|
echo ' bzdisk/fdimage*/isoimage also accept:'
|
|
echo ' FDARGS="..." arguments for the booted kernel'
|
|
echo ' FDINITRD=file initrd for the booted kernel'
|
|
endef
|