Commit 505569d208e61ab14f4b87957be0970ab33eb319
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "Misc fixes: two vdso fixes, two kbuild fixes and a boot failure fix with certain odd memory mappings" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86, vdso: Use asm volatile in __getcpu x86/build: Clean auto-generated processor feature files x86: Fix mkcapflags.sh bash-ism x86: Fix step size adjustment during initial memory mapping x86_64, vdso: Fix the vdso address randomization algorithm
Showing 6 changed files Inline Diff
arch/x86/boot/Makefile
1 | # | 1 | # |
2 | # arch/x86/boot/Makefile | 2 | # arch/x86/boot/Makefile |
3 | # | 3 | # |
4 | # This file is subject to the terms and conditions of the GNU General Public | 4 | # This file is subject to the terms and conditions of the GNU General Public |
5 | # License. See the file "COPYING" in the main directory of this archive | 5 | # License. See the file "COPYING" in the main directory of this archive |
6 | # for more details. | 6 | # for more details. |
7 | # | 7 | # |
8 | # Copyright (C) 1994 by Linus Torvalds | 8 | # Copyright (C) 1994 by Linus Torvalds |
9 | # Changed by many, many contributors over the years. | 9 | # Changed by many, many contributors over the years. |
10 | # | 10 | # |
11 | 11 | ||
12 | # If you want to preset the SVGA mode, uncomment the next line and | 12 | # If you want to preset the SVGA mode, uncomment the next line and |
13 | # set SVGA_MODE to whatever number you want. | 13 | # set SVGA_MODE to whatever number you want. |
14 | # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. | 14 | # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. |
15 | # The number is the same as you would ordinarily press at bootup. | 15 | # The number is the same as you would ordinarily press at bootup. |
16 | 16 | ||
17 | SVGA_MODE := -DSVGA_MODE=NORMAL_VGA | 17 | SVGA_MODE := -DSVGA_MODE=NORMAL_VGA |
18 | 18 | ||
19 | targets := vmlinux.bin setup.bin setup.elf bzImage | 19 | targets := vmlinux.bin setup.bin setup.elf bzImage |
20 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf | 20 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf |
21 | subdir- := compressed | 21 | subdir- := compressed |
22 | 22 | ||
23 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o | 23 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o |
24 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o | 24 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o |
25 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o | 25 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o |
26 | setup-y += video-mode.o version.o | 26 | setup-y += video-mode.o version.o |
27 | setup-$(CONFIG_X86_APM_BOOT) += apm.o | 27 | setup-$(CONFIG_X86_APM_BOOT) += apm.o |
28 | 28 | ||
29 | # The link order of the video-*.o modules can matter. In particular, | 29 | # The link order of the video-*.o modules can matter. In particular, |
30 | # video-vga.o *must* be listed first, followed by video-vesa.o. | 30 | # video-vga.o *must* be listed first, followed by video-vesa.o. |
31 | # Hardware-specific drivers should follow in the order they should be | 31 | # Hardware-specific drivers should follow in the order they should be |
32 | # probed, and video-bios.o should typically be last. | 32 | # probed, and video-bios.o should typically be last. |
33 | setup-y += video-vga.o | 33 | setup-y += video-vga.o |
34 | setup-y += video-vesa.o | 34 | setup-y += video-vesa.o |
35 | setup-y += video-bios.o | 35 | setup-y += video-bios.o |
36 | 36 | ||
37 | targets += $(setup-y) | 37 | targets += $(setup-y) |
38 | hostprogs-y := tools/build | 38 | hostprogs-y := tools/build |
39 | hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr | 39 | hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr |
40 | 40 | ||
41 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ | 41 | HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ |
42 | -include include/generated/autoconf.h \ | 42 | -include include/generated/autoconf.h \ |
43 | -D__EXPORTED_HEADERS__ | 43 | -D__EXPORTED_HEADERS__ |
44 | 44 | ||
45 | ifdef CONFIG_X86_FEATURE_NAMES | 45 | ifdef CONFIG_X86_FEATURE_NAMES |
46 | $(obj)/cpu.o: $(obj)/cpustr.h | 46 | $(obj)/cpu.o: $(obj)/cpustr.h |
47 | 47 | ||
48 | quiet_cmd_cpustr = CPUSTR $@ | 48 | quiet_cmd_cpustr = CPUSTR $@ |
49 | cmd_cpustr = $(obj)/mkcpustr > $@ | 49 | cmd_cpustr = $(obj)/mkcpustr > $@ |
50 | targets += cpustr.h | 50 | targets += cpustr.h |
51 | $(obj)/cpustr.h: $(obj)/mkcpustr FORCE | 51 | $(obj)/cpustr.h: $(obj)/mkcpustr FORCE |
52 | $(call if_changed,cpustr) | 52 | $(call if_changed,cpustr) |
53 | endif | 53 | endif |
54 | clean-files += cpustr.h | ||
54 | 55 | ||
55 | # --------------------------------------------------------------------------- | 56 | # --------------------------------------------------------------------------- |
56 | 57 | ||
57 | KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP | 58 | KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP |
58 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 59 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
59 | GCOV_PROFILE := n | 60 | GCOV_PROFILE := n |
60 | 61 | ||
61 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) | 62 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) |
62 | 63 | ||
63 | quiet_cmd_image = BUILD $@ | 64 | quiet_cmd_image = BUILD $@ |
64 | cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ | 65 | cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ |
65 | $(obj)/zoffset.h $@ | 66 | $(obj)/zoffset.h $@ |
66 | 67 | ||
67 | $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE | 68 | $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE |
68 | $(call if_changed,image) | 69 | $(call if_changed,image) |
69 | @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' | 70 | @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' |
70 | 71 | ||
71 | OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S | 72 | OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S |
72 | $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE | 73 | $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE |
73 | $(call if_changed,objcopy) | 74 | $(call if_changed,objcopy) |
74 | 75 | ||
75 | SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) | 76 | SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) |
76 | 77 | ||
77 | sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p' | 78 | sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p' |
78 | 79 | ||
79 | quiet_cmd_voffset = VOFFSET $@ | 80 | quiet_cmd_voffset = VOFFSET $@ |
80 | cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ | 81 | cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ |
81 | 82 | ||
82 | targets += voffset.h | 83 | targets += voffset.h |
83 | $(obj)/voffset.h: vmlinux FORCE | 84 | $(obj)/voffset.h: vmlinux FORCE |
84 | $(call if_changed,voffset) | 85 | $(call if_changed,voffset) |
85 | 86 | ||
86 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' | 87 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' |
87 | 88 | ||
88 | quiet_cmd_zoffset = ZOFFSET $@ | 89 | quiet_cmd_zoffset = ZOFFSET $@ |
89 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ | 90 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ |
90 | 91 | ||
91 | targets += zoffset.h | 92 | targets += zoffset.h |
92 | $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE | 93 | $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE |
93 | $(call if_changed,zoffset) | 94 | $(call if_changed,zoffset) |
94 | 95 | ||
95 | 96 | ||
96 | AFLAGS_header.o += -I$(obj) | 97 | AFLAGS_header.o += -I$(obj) |
97 | $(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h | 98 | $(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h |
98 | 99 | ||
99 | LDFLAGS_setup.elf := -T | 100 | LDFLAGS_setup.elf := -T |
100 | $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE | 101 | $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE |
101 | $(call if_changed,ld) | 102 | $(call if_changed,ld) |
102 | 103 | ||
103 | OBJCOPYFLAGS_setup.bin := -O binary | 104 | OBJCOPYFLAGS_setup.bin := -O binary |
104 | $(obj)/setup.bin: $(obj)/setup.elf FORCE | 105 | $(obj)/setup.bin: $(obj)/setup.elf FORCE |
105 | $(call if_changed,objcopy) | 106 | $(call if_changed,objcopy) |
106 | 107 | ||
107 | $(obj)/compressed/vmlinux: FORCE | 108 | $(obj)/compressed/vmlinux: FORCE |
108 | $(Q)$(MAKE) $(build)=$(obj)/compressed $@ | 109 | $(Q)$(MAKE) $(build)=$(obj)/compressed $@ |
109 | 110 | ||
110 | # Set this if you want to pass append arguments to the | 111 | # Set this if you want to pass append arguments to the |
111 | # bzdisk/fdimage/isoimage kernel | 112 | # bzdisk/fdimage/isoimage kernel |
112 | FDARGS = | 113 | FDARGS = |
113 | # Set this if you want an initrd included with the | 114 | # Set this if you want an initrd included with the |
114 | # bzdisk/fdimage/isoimage kernel | 115 | # bzdisk/fdimage/isoimage kernel |
115 | FDINITRD = | 116 | FDINITRD = |
116 | 117 | ||
117 | image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) | 118 | image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) |
118 | 119 | ||
119 | $(obj)/mtools.conf: $(src)/mtools.conf.in | 120 | $(obj)/mtools.conf: $(src)/mtools.conf.in |
120 | sed -e 's|@OBJ@|$(obj)|g' < $< > $@ | 121 | sed -e 's|@OBJ@|$(obj)|g' < $< > $@ |
121 | 122 | ||
122 | # This requires write access to /dev/fd0 | 123 | # This requires write access to /dev/fd0 |
123 | bzdisk: $(obj)/bzImage $(obj)/mtools.conf | 124 | bzdisk: $(obj)/bzImage $(obj)/mtools.conf |
124 | MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync | 125 | MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync |
125 | syslinux /dev/fd0 ; sync | 126 | syslinux /dev/fd0 ; sync |
126 | echo '$(image_cmdline)' | \ | 127 | echo '$(image_cmdline)' | \ |
127 | MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg | 128 | MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg |
128 | if [ -f '$(FDINITRD)' ] ; then \ | 129 | if [ -f '$(FDINITRD)' ] ; then \ |
129 | MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ | 130 | MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ |
130 | fi | 131 | fi |
131 | MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync | 132 | MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync |
132 | 133 | ||
133 | # These require being root or having syslinux 2.02 or higher installed | 134 | # These require being root or having syslinux 2.02 or higher installed |
134 | fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf | 135 | fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf |
135 | dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 | 136 | dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 |
136 | MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync | 137 | MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync |
137 | syslinux $(obj)/fdimage ; sync | 138 | syslinux $(obj)/fdimage ; sync |
138 | echo '$(image_cmdline)' | \ | 139 | echo '$(image_cmdline)' | \ |
139 | MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg | 140 | MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg |
140 | if [ -f '$(FDINITRD)' ] ; then \ | 141 | if [ -f '$(FDINITRD)' ] ; then \ |
141 | MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ | 142 | MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ |
142 | fi | 143 | fi |
143 | MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync | 144 | MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync |
144 | 145 | ||
145 | fdimage288: $(obj)/bzImage $(obj)/mtools.conf | 146 | fdimage288: $(obj)/bzImage $(obj)/mtools.conf |
146 | dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 | 147 | dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 |
147 | MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync | 148 | MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync |
148 | syslinux $(obj)/fdimage ; sync | 149 | syslinux $(obj)/fdimage ; sync |
149 | echo '$(image_cmdline)' | \ | 150 | echo '$(image_cmdline)' | \ |
150 | MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg | 151 | MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg |
151 | if [ -f '$(FDINITRD)' ] ; then \ | 152 | if [ -f '$(FDINITRD)' ] ; then \ |
152 | MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ | 153 | MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ |
153 | fi | 154 | fi |
154 | MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync | 155 | MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync |
155 | 156 | ||
156 | isoimage: $(obj)/bzImage | 157 | isoimage: $(obj)/bzImage |
157 | -rm -rf $(obj)/isoimage | 158 | -rm -rf $(obj)/isoimage |
158 | mkdir $(obj)/isoimage | 159 | mkdir $(obj)/isoimage |
159 | for i in lib lib64 share end ; do \ | 160 | for i in lib lib64 share end ; do \ |
160 | if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ | 161 | if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ |
161 | cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ | 162 | cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ |
162 | break ; \ | 163 | break ; \ |
163 | fi ; \ | 164 | fi ; \ |
164 | if [ $$i = end ] ; then exit 1 ; fi ; \ | 165 | if [ $$i = end ] ; then exit 1 ; fi ; \ |
165 | done | 166 | done |
166 | cp $(obj)/bzImage $(obj)/isoimage/linux | 167 | cp $(obj)/bzImage $(obj)/isoimage/linux |
167 | echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg | 168 | echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg |
168 | if [ -f '$(FDINITRD)' ] ; then \ | 169 | if [ -f '$(FDINITRD)' ] ; then \ |
169 | cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ | 170 | cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ |
170 | fi | 171 | fi |
171 | mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ | 172 | mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ |
172 | -no-emul-boot -boot-load-size 4 -boot-info-table \ | 173 | -no-emul-boot -boot-load-size 4 -boot-info-table \ |
173 | $(obj)/isoimage | 174 | $(obj)/isoimage |
174 | isohybrid $(obj)/image.iso 2>/dev/null || true | 175 | isohybrid $(obj)/image.iso 2>/dev/null || true |
175 | rm -rf $(obj)/isoimage | 176 | rm -rf $(obj)/isoimage |
176 | 177 | ||
177 | bzlilo: $(obj)/bzImage | 178 | bzlilo: $(obj)/bzImage |
178 | if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi | 179 | if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi |
179 | if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi | 180 | if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi |
180 | cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz | 181 | cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz |
181 | cp System.map $(INSTALL_PATH)/ | 182 | cp System.map $(INSTALL_PATH)/ |
182 | if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi | 183 | if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi |
183 | 184 | ||
184 | install: | 185 | install: |
185 | sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ | 186 | sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ |
186 | System.map "$(INSTALL_PATH)" | 187 | System.map "$(INSTALL_PATH)" |
187 | 188 |
arch/x86/include/asm/vgtod.h
1 | #ifndef _ASM_X86_VGTOD_H | 1 | #ifndef _ASM_X86_VGTOD_H |
2 | #define _ASM_X86_VGTOD_H | 2 | #define _ASM_X86_VGTOD_H |
3 | 3 | ||
4 | #include <linux/compiler.h> | 4 | #include <linux/compiler.h> |
5 | #include <linux/clocksource.h> | 5 | #include <linux/clocksource.h> |
6 | 6 | ||
7 | #ifdef BUILD_VDSO32_64 | 7 | #ifdef BUILD_VDSO32_64 |
8 | typedef u64 gtod_long_t; | 8 | typedef u64 gtod_long_t; |
9 | #else | 9 | #else |
10 | typedef unsigned long gtod_long_t; | 10 | typedef unsigned long gtod_long_t; |
11 | #endif | 11 | #endif |
12 | /* | 12 | /* |
13 | * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time | 13 | * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time |
14 | * so be carefull by modifying this structure. | 14 | * so be carefull by modifying this structure. |
15 | */ | 15 | */ |
16 | struct vsyscall_gtod_data { | 16 | struct vsyscall_gtod_data { |
17 | unsigned seq; | 17 | unsigned seq; |
18 | 18 | ||
19 | int vclock_mode; | 19 | int vclock_mode; |
20 | cycle_t cycle_last; | 20 | cycle_t cycle_last; |
21 | cycle_t mask; | 21 | cycle_t mask; |
22 | u32 mult; | 22 | u32 mult; |
23 | u32 shift; | 23 | u32 shift; |
24 | 24 | ||
25 | /* open coded 'struct timespec' */ | 25 | /* open coded 'struct timespec' */ |
26 | u64 wall_time_snsec; | 26 | u64 wall_time_snsec; |
27 | gtod_long_t wall_time_sec; | 27 | gtod_long_t wall_time_sec; |
28 | gtod_long_t monotonic_time_sec; | 28 | gtod_long_t monotonic_time_sec; |
29 | u64 monotonic_time_snsec; | 29 | u64 monotonic_time_snsec; |
30 | gtod_long_t wall_time_coarse_sec; | 30 | gtod_long_t wall_time_coarse_sec; |
31 | gtod_long_t wall_time_coarse_nsec; | 31 | gtod_long_t wall_time_coarse_nsec; |
32 | gtod_long_t monotonic_time_coarse_sec; | 32 | gtod_long_t monotonic_time_coarse_sec; |
33 | gtod_long_t monotonic_time_coarse_nsec; | 33 | gtod_long_t monotonic_time_coarse_nsec; |
34 | 34 | ||
35 | int tz_minuteswest; | 35 | int tz_minuteswest; |
36 | int tz_dsttime; | 36 | int tz_dsttime; |
37 | }; | 37 | }; |
38 | extern struct vsyscall_gtod_data vsyscall_gtod_data; | 38 | extern struct vsyscall_gtod_data vsyscall_gtod_data; |
39 | 39 | ||
40 | static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) | 40 | static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) |
41 | { | 41 | { |
42 | unsigned ret; | 42 | unsigned ret; |
43 | 43 | ||
44 | repeat: | 44 | repeat: |
45 | ret = ACCESS_ONCE(s->seq); | 45 | ret = ACCESS_ONCE(s->seq); |
46 | if (unlikely(ret & 1)) { | 46 | if (unlikely(ret & 1)) { |
47 | cpu_relax(); | 47 | cpu_relax(); |
48 | goto repeat; | 48 | goto repeat; |
49 | } | 49 | } |
50 | smp_rmb(); | 50 | smp_rmb(); |
51 | return ret; | 51 | return ret; |
52 | } | 52 | } |
53 | 53 | ||
54 | static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, | 54 | static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, |
55 | unsigned start) | 55 | unsigned start) |
56 | { | 56 | { |
57 | smp_rmb(); | 57 | smp_rmb(); |
58 | return unlikely(s->seq != start); | 58 | return unlikely(s->seq != start); |
59 | } | 59 | } |
60 | 60 | ||
61 | static inline void gtod_write_begin(struct vsyscall_gtod_data *s) | 61 | static inline void gtod_write_begin(struct vsyscall_gtod_data *s) |
62 | { | 62 | { |
63 | ++s->seq; | 63 | ++s->seq; |
64 | smp_wmb(); | 64 | smp_wmb(); |
65 | } | 65 | } |
66 | 66 | ||
67 | static inline void gtod_write_end(struct vsyscall_gtod_data *s) | 67 | static inline void gtod_write_end(struct vsyscall_gtod_data *s) |
68 | { | 68 | { |
69 | smp_wmb(); | 69 | smp_wmb(); |
70 | ++s->seq; | 70 | ++s->seq; |
71 | } | 71 | } |
72 | 72 | ||
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | 74 | ||
75 | #define VGETCPU_CPU_MASK 0xfff | 75 | #define VGETCPU_CPU_MASK 0xfff |
76 | 76 | ||
77 | static inline unsigned int __getcpu(void) | 77 | static inline unsigned int __getcpu(void) |
78 | { | 78 | { |
79 | unsigned int p; | 79 | unsigned int p; |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Load per CPU data from GDT. LSL is faster than RDTSCP and | 82 | * Load per CPU data from GDT. LSL is faster than RDTSCP and |
83 | * works on all CPUs. | 83 | * works on all CPUs. This is volatile so that it orders |
84 | * correctly wrt barrier() and to keep gcc from cleverly | ||
85 | * hoisting it out of the calling function. | ||
84 | */ | 86 | */ |
85 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | 87 | asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); |
86 | 88 | ||
87 | return p; | 89 | return p; |
88 | } | 90 | } |
89 | 91 | ||
90 | #endif /* CONFIG_X86_64 */ | 92 | #endif /* CONFIG_X86_64 */ |
91 | 93 | ||
92 | #endif /* _ASM_X86_VGTOD_H */ | 94 | #endif /* _ASM_X86_VGTOD_H */ |
93 | 95 |
arch/x86/kernel/cpu/Makefile
1 | # | 1 | # |
2 | # Makefile for x86-compatible CPU details, features and quirks | 2 | # Makefile for x86-compatible CPU details, features and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
6 | ifdef CONFIG_FUNCTION_TRACER | 6 | ifdef CONFIG_FUNCTION_TRACER |
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | CFLAGS_REMOVE_perf_event.o = -pg | 8 | CFLAGS_REMOVE_perf_event.o = -pg |
9 | endif | 9 | endif |
10 | 10 | ||
11 | # Make sure load_percpu_segment has no stackprotector | 11 | # Make sure load_percpu_segment has no stackprotector |
12 | nostackp := $(call cc-option, -fno-stack-protector) | 12 | nostackp := $(call cc-option, -fno-stack-protector) |
13 | CFLAGS_common.o := $(nostackp) | 13 | CFLAGS_common.o := $(nostackp) |
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += common.o | 16 | obj-y += common.o |
17 | obj-y += rdrand.o | 17 | obj-y += rdrand.o |
18 | obj-y += match.o | 18 | obj-y += match.o |
19 | 19 | ||
20 | obj-$(CONFIG_PROC_FS) += proc.o | 20 | obj-$(CONFIG_PROC_FS) += proc.o |
21 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o | 21 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o |
22 | 22 | ||
23 | obj-$(CONFIG_X86_32) += bugs.o | 23 | obj-$(CONFIG_X86_32) += bugs.o |
24 | obj-$(CONFIG_X86_64) += bugs_64.o | 24 | obj-$(CONFIG_X86_64) += bugs_64.o |
25 | 25 | ||
26 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 26 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
27 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o | 27 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
28 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o | 28 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o |
29 | obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | 29 | obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o |
30 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 30 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
31 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 31 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
32 | 32 | ||
33 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 33 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
34 | 34 | ||
35 | ifdef CONFIG_PERF_EVENTS | 35 | ifdef CONFIG_PERF_EVENTS |
36 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o | 36 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o |
37 | ifdef CONFIG_AMD_IOMMU | 37 | ifdef CONFIG_AMD_IOMMU |
38 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | 38 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o |
39 | endif | 39 | endif |
40 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 40 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
41 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 41 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
42 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o | 42 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o |
43 | 43 | ||
44 | obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ | 44 | obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ |
45 | perf_event_intel_uncore_snb.o \ | 45 | perf_event_intel_uncore_snb.o \ |
46 | perf_event_intel_uncore_snbep.o \ | 46 | perf_event_intel_uncore_snbep.o \ |
47 | perf_event_intel_uncore_nhmex.o | 47 | perf_event_intel_uncore_nhmex.o |
48 | endif | 48 | endif |
49 | 49 | ||
50 | 50 | ||
51 | obj-$(CONFIG_X86_MCE) += mcheck/ | 51 | obj-$(CONFIG_X86_MCE) += mcheck/ |
52 | obj-$(CONFIG_MTRR) += mtrr/ | 52 | obj-$(CONFIG_MTRR) += mtrr/ |
53 | obj-$(CONFIG_MICROCODE) += microcode/ | 53 | obj-$(CONFIG_MICROCODE) += microcode/ |
54 | 54 | ||
55 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o | 55 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o |
56 | 56 | ||
57 | obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o | 57 | obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o |
58 | 58 | ||
59 | ifdef CONFIG_X86_FEATURE_NAMES | 59 | ifdef CONFIG_X86_FEATURE_NAMES |
60 | quiet_cmd_mkcapflags = MKCAP $@ | 60 | quiet_cmd_mkcapflags = MKCAP $@ |
61 | cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ | 61 | cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@ |
62 | 62 | ||
63 | cpufeature = $(src)/../../include/asm/cpufeature.h | 63 | cpufeature = $(src)/../../include/asm/cpufeature.h |
64 | 64 | ||
65 | targets += capflags.c | 65 | targets += capflags.c |
66 | $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE | 66 | $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE |
67 | $(call if_changed,mkcapflags) | 67 | $(call if_changed,mkcapflags) |
68 | endif | 68 | endif |
69 | clean-files += capflags.c | ||
69 | 70 |
arch/x86/kernel/cpu/mkcapflags.sh
1 | #!/bin/sh | 1 | #!/bin/sh |
2 | # | 2 | # |
3 | # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h | 3 | # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h |
4 | # | 4 | # |
5 | 5 | ||
6 | IN=$1 | 6 | IN=$1 |
7 | OUT=$2 | 7 | OUT=$2 |
8 | 8 | ||
9 | function dump_array() | 9 | function dump_array() |
10 | { | 10 | { |
11 | ARRAY=$1 | 11 | ARRAY=$1 |
12 | SIZE=$2 | 12 | SIZE=$2 |
13 | PFX=$3 | 13 | PFX=$3 |
14 | POSTFIX=$4 | 14 | POSTFIX=$4 |
15 | 15 | ||
16 | PFX_SZ=$(echo $PFX | wc -c) | 16 | PFX_SZ=$(echo $PFX | wc -c) |
17 | TABS="$(printf '\t\t\t\t\t')" | 17 | TABS="$(printf '\t\t\t\t\t')" |
18 | 18 | ||
19 | echo "const char * const $ARRAY[$SIZE] = {" | 19 | echo "const char * const $ARRAY[$SIZE] = {" |
20 | 20 | ||
21 | # Iterate through any input lines starting with #define $PFX | 21 | # Iterate through any input lines starting with #define $PFX |
22 | sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN | | 22 | sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN | |
23 | while read i | 23 | while read i |
24 | do | 24 | do |
25 | # Name is everything up to the first whitespace | 25 | # Name is everything up to the first whitespace |
26 | NAME="$(echo "$i" | sed 's/ .*//')" | 26 | NAME="$(echo "$i" | sed 's/ .*//')" |
27 | 27 | ||
28 | # If the /* comment */ starts with a quote string, grab that. | 28 | # If the /* comment */ starts with a quote string, grab that. |
29 | VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')" | 29 | VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')" |
30 | [ -z "$VALUE" ] && VALUE="\"$NAME\"" | 30 | [ -z "$VALUE" ] && VALUE="\"$NAME\"" |
31 | [ "$VALUE" == '""' ] && continue | 31 | [ "$VALUE" = '""' ] && continue |
32 | 32 | ||
33 | # Name is uppercase, VALUE is all lowercase | 33 | # Name is uppercase, VALUE is all lowercase |
34 | VALUE="$(echo "$VALUE" | tr A-Z a-z)" | 34 | VALUE="$(echo "$VALUE" | tr A-Z a-z)" |
35 | 35 | ||
36 | if [ -n "$POSTFIX" ]; then | 36 | if [ -n "$POSTFIX" ]; then |
37 | T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 )) | 37 | T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 )) |
38 | TABS="$(printf '\t\t\t\t\t\t')" | 38 | TABS="$(printf '\t\t\t\t\t\t')" |
39 | TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 )) | 39 | TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 )) |
40 | printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE" | 40 | printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE" |
41 | else | 41 | else |
42 | TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 )) | 42 | TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 )) |
43 | printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE" | 43 | printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE" |
44 | fi | 44 | fi |
45 | done | 45 | done |
46 | echo "};" | 46 | echo "};" |
47 | } | 47 | } |
48 | 48 | ||
49 | trap 'rm "$OUT"' EXIT | 49 | trap 'rm "$OUT"' EXIT |
50 | 50 | ||
51 | ( | 51 | ( |
52 | echo "#ifndef _ASM_X86_CPUFEATURE_H" | 52 | echo "#ifndef _ASM_X86_CPUFEATURE_H" |
53 | echo "#include <asm/cpufeature.h>" | 53 | echo "#include <asm/cpufeature.h>" |
54 | echo "#endif" | 54 | echo "#endif" |
55 | echo "" | 55 | echo "" |
56 | 56 | ||
57 | dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" | 57 | dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" |
58 | echo "" | 58 | echo "" |
59 | 59 | ||
60 | dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" | 60 | dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" |
61 | 61 | ||
62 | ) > $OUT | 62 | ) > $OUT |
63 | 63 | ||
64 | trap - EXIT | 64 | trap - EXIT |
65 | 65 |
arch/x86/mm/init.c
1 | #include <linux/gfp.h> | 1 | #include <linux/gfp.h> |
2 | #include <linux/initrd.h> | 2 | #include <linux/initrd.h> |
3 | #include <linux/ioport.h> | 3 | #include <linux/ioport.h> |
4 | #include <linux/swap.h> | 4 | #include <linux/swap.h> |
5 | #include <linux/memblock.h> | 5 | #include <linux/memblock.h> |
6 | #include <linux/bootmem.h> /* for max_low_pfn */ | 6 | #include <linux/bootmem.h> /* for max_low_pfn */ |
7 | 7 | ||
8 | #include <asm/cacheflush.h> | 8 | #include <asm/cacheflush.h> |
9 | #include <asm/e820.h> | 9 | #include <asm/e820.h> |
10 | #include <asm/init.h> | 10 | #include <asm/init.h> |
11 | #include <asm/page.h> | 11 | #include <asm/page.h> |
12 | #include <asm/page_types.h> | 12 | #include <asm/page_types.h> |
13 | #include <asm/sections.h> | 13 | #include <asm/sections.h> |
14 | #include <asm/setup.h> | 14 | #include <asm/setup.h> |
15 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
16 | #include <asm/tlb.h> | 16 | #include <asm/tlb.h> |
17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
19 | #include <asm/microcode.h> | 19 | #include <asm/microcode.h> |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * We need to define the tracepoints somewhere, and tlb.c | 22 | * We need to define the tracepoints somewhere, and tlb.c |
23 | * is only compied when SMP=y. | 23 | * is only compied when SMP=y. |
24 | */ | 24 | */ |
25 | #define CREATE_TRACE_POINTS | 25 | #define CREATE_TRACE_POINTS |
26 | #include <trace/events/tlb.h> | 26 | #include <trace/events/tlb.h> |
27 | 27 | ||
28 | #include "mm_internal.h" | 28 | #include "mm_internal.h" |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Tables translating between page_cache_type_t and pte encoding. | 31 | * Tables translating between page_cache_type_t and pte encoding. |
32 | * Minimal supported modes are defined statically, modified if more supported | 32 | * Minimal supported modes are defined statically, modified if more supported |
33 | * cache modes are available. | 33 | * cache modes are available. |
34 | * Index into __cachemode2pte_tbl is the cachemode. | 34 | * Index into __cachemode2pte_tbl is the cachemode. |
35 | * Index into __pte2cachemode_tbl are the caching attribute bits of the pte | 35 | * Index into __pte2cachemode_tbl are the caching attribute bits of the pte |
36 | * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. | 36 | * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. |
37 | */ | 37 | */ |
38 | uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { | 38 | uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { |
39 | [_PAGE_CACHE_MODE_WB] = 0, | 39 | [_PAGE_CACHE_MODE_WB] = 0, |
40 | [_PAGE_CACHE_MODE_WC] = _PAGE_PWT, | 40 | [_PAGE_CACHE_MODE_WC] = _PAGE_PWT, |
41 | [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD, | 41 | [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD, |
42 | [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT, | 42 | [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT, |
43 | [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, | 43 | [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, |
44 | [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, | 44 | [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, |
45 | }; | 45 | }; |
46 | EXPORT_SYMBOL_GPL(__cachemode2pte_tbl); | 46 | EXPORT_SYMBOL_GPL(__cachemode2pte_tbl); |
47 | uint8_t __pte2cachemode_tbl[8] = { | 47 | uint8_t __pte2cachemode_tbl[8] = { |
48 | [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, | 48 | [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, |
49 | [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, | 49 | [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, |
50 | [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS, | 50 | [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS, |
51 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC, | 51 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC, |
52 | [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB, | 52 | [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB, |
53 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, | 53 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, |
54 | [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, | 54 | [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, |
55 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, | 55 | [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, |
56 | }; | 56 | }; |
57 | EXPORT_SYMBOL_GPL(__pte2cachemode_tbl); | 57 | EXPORT_SYMBOL_GPL(__pte2cachemode_tbl); |
58 | 58 | ||
59 | static unsigned long __initdata pgt_buf_start; | 59 | static unsigned long __initdata pgt_buf_start; |
60 | static unsigned long __initdata pgt_buf_end; | 60 | static unsigned long __initdata pgt_buf_end; |
61 | static unsigned long __initdata pgt_buf_top; | 61 | static unsigned long __initdata pgt_buf_top; |
62 | 62 | ||
63 | static unsigned long min_pfn_mapped; | 63 | static unsigned long min_pfn_mapped; |
64 | 64 | ||
65 | static bool __initdata can_use_brk_pgt = true; | 65 | static bool __initdata can_use_brk_pgt = true; |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * Pages returned are already directly mapped. | 68 | * Pages returned are already directly mapped. |
69 | * | 69 | * |
70 | * Changing that is likely to break Xen, see commit: | 70 | * Changing that is likely to break Xen, see commit: |
71 | * | 71 | * |
72 | * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve | 72 | * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve |
73 | * | 73 | * |
74 | * for detailed information. | 74 | * for detailed information. |
75 | */ | 75 | */ |
76 | __ref void *alloc_low_pages(unsigned int num) | 76 | __ref void *alloc_low_pages(unsigned int num) |
77 | { | 77 | { |
78 | unsigned long pfn; | 78 | unsigned long pfn; |
79 | int i; | 79 | int i; |
80 | 80 | ||
81 | if (after_bootmem) { | 81 | if (after_bootmem) { |
82 | unsigned int order; | 82 | unsigned int order; |
83 | 83 | ||
84 | order = get_order((unsigned long)num << PAGE_SHIFT); | 84 | order = get_order((unsigned long)num << PAGE_SHIFT); |
85 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | | 85 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | |
86 | __GFP_ZERO, order); | 86 | __GFP_ZERO, order); |
87 | } | 87 | } |
88 | 88 | ||
89 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { | 89 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { |
90 | unsigned long ret; | 90 | unsigned long ret; |
91 | if (min_pfn_mapped >= max_pfn_mapped) | 91 | if (min_pfn_mapped >= max_pfn_mapped) |
92 | panic("alloc_low_pages: ran out of memory"); | 92 | panic("alloc_low_pages: ran out of memory"); |
93 | ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, | 93 | ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, |
94 | max_pfn_mapped << PAGE_SHIFT, | 94 | max_pfn_mapped << PAGE_SHIFT, |
95 | PAGE_SIZE * num , PAGE_SIZE); | 95 | PAGE_SIZE * num , PAGE_SIZE); |
96 | if (!ret) | 96 | if (!ret) |
97 | panic("alloc_low_pages: can not alloc memory"); | 97 | panic("alloc_low_pages: can not alloc memory"); |
98 | memblock_reserve(ret, PAGE_SIZE * num); | 98 | memblock_reserve(ret, PAGE_SIZE * num); |
99 | pfn = ret >> PAGE_SHIFT; | 99 | pfn = ret >> PAGE_SHIFT; |
100 | } else { | 100 | } else { |
101 | pfn = pgt_buf_end; | 101 | pfn = pgt_buf_end; |
102 | pgt_buf_end += num; | 102 | pgt_buf_end += num; |
103 | printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", | 103 | printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", |
104 | pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); | 104 | pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); |
105 | } | 105 | } |
106 | 106 | ||
107 | for (i = 0; i < num; i++) { | 107 | for (i = 0; i < num; i++) { |
108 | void *adr; | 108 | void *adr; |
109 | 109 | ||
110 | adr = __va((pfn + i) << PAGE_SHIFT); | 110 | adr = __va((pfn + i) << PAGE_SHIFT); |
111 | clear_page(adr); | 111 | clear_page(adr); |
112 | } | 112 | } |
113 | 113 | ||
114 | return __va(pfn << PAGE_SHIFT); | 114 | return __va(pfn << PAGE_SHIFT); |
115 | } | 115 | } |
116 | 116 | ||
117 | /* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ | 117 | /* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ |
118 | #define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) | 118 | #define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) |
119 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); | 119 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); |
120 | void __init early_alloc_pgt_buf(void) | 120 | void __init early_alloc_pgt_buf(void) |
121 | { | 121 | { |
122 | unsigned long tables = INIT_PGT_BUF_SIZE; | 122 | unsigned long tables = INIT_PGT_BUF_SIZE; |
123 | phys_addr_t base; | 123 | phys_addr_t base; |
124 | 124 | ||
125 | base = __pa(extend_brk(tables, PAGE_SIZE)); | 125 | base = __pa(extend_brk(tables, PAGE_SIZE)); |
126 | 126 | ||
127 | pgt_buf_start = base >> PAGE_SHIFT; | 127 | pgt_buf_start = base >> PAGE_SHIFT; |
128 | pgt_buf_end = pgt_buf_start; | 128 | pgt_buf_end = pgt_buf_start; |
129 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); | 129 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); |
130 | } | 130 | } |
131 | 131 | ||
132 | int after_bootmem; | 132 | int after_bootmem; |
133 | 133 | ||
134 | int direct_gbpages | 134 | int direct_gbpages |
135 | #ifdef CONFIG_DIRECT_GBPAGES | 135 | #ifdef CONFIG_DIRECT_GBPAGES |
136 | = 1 | 136 | = 1 |
137 | #endif | 137 | #endif |
138 | ; | 138 | ; |
139 | 139 | ||
140 | static void __init init_gbpages(void) | 140 | static void __init init_gbpages(void) |
141 | { | 141 | { |
142 | #ifdef CONFIG_X86_64 | 142 | #ifdef CONFIG_X86_64 |
143 | if (direct_gbpages && cpu_has_gbpages) | 143 | if (direct_gbpages && cpu_has_gbpages) |
144 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | 144 | printk(KERN_INFO "Using GB pages for direct mapping\n"); |
145 | else | 145 | else |
146 | direct_gbpages = 0; | 146 | direct_gbpages = 0; |
147 | #endif | 147 | #endif |
148 | } | 148 | } |
149 | 149 | ||
150 | struct map_range { | 150 | struct map_range { |
151 | unsigned long start; | 151 | unsigned long start; |
152 | unsigned long end; | 152 | unsigned long end; |
153 | unsigned page_size_mask; | 153 | unsigned page_size_mask; |
154 | }; | 154 | }; |
155 | 155 | ||
156 | static int page_size_mask; | 156 | static int page_size_mask; |
157 | 157 | ||
158 | static void __init probe_page_size_mask(void) | 158 | static void __init probe_page_size_mask(void) |
159 | { | 159 | { |
160 | init_gbpages(); | 160 | init_gbpages(); |
161 | 161 | ||
162 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | 162 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) |
163 | /* | 163 | /* |
164 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 164 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
165 | * This will simplify cpa(), which otherwise needs to support splitting | 165 | * This will simplify cpa(), which otherwise needs to support splitting |
166 | * large pages into small in interrupt context, etc. | 166 | * large pages into small in interrupt context, etc. |
167 | */ | 167 | */ |
168 | if (direct_gbpages) | 168 | if (direct_gbpages) |
169 | page_size_mask |= 1 << PG_LEVEL_1G; | 169 | page_size_mask |= 1 << PG_LEVEL_1G; |
170 | if (cpu_has_pse) | 170 | if (cpu_has_pse) |
171 | page_size_mask |= 1 << PG_LEVEL_2M; | 171 | page_size_mask |= 1 << PG_LEVEL_2M; |
172 | #endif | 172 | #endif |
173 | 173 | ||
174 | /* Enable PSE if available */ | 174 | /* Enable PSE if available */ |
175 | if (cpu_has_pse) | 175 | if (cpu_has_pse) |
176 | set_in_cr4(X86_CR4_PSE); | 176 | set_in_cr4(X86_CR4_PSE); |
177 | 177 | ||
178 | /* Enable PGE if available */ | 178 | /* Enable PGE if available */ |
179 | if (cpu_has_pge) { | 179 | if (cpu_has_pge) { |
180 | set_in_cr4(X86_CR4_PGE); | 180 | set_in_cr4(X86_CR4_PGE); |
181 | __supported_pte_mask |= _PAGE_GLOBAL; | 181 | __supported_pte_mask |= _PAGE_GLOBAL; |
182 | } | 182 | } |
183 | } | 183 | } |
184 | 184 | ||
185 | #ifdef CONFIG_X86_32 | 185 | #ifdef CONFIG_X86_32 |
186 | #define NR_RANGE_MR 3 | 186 | #define NR_RANGE_MR 3 |
187 | #else /* CONFIG_X86_64 */ | 187 | #else /* CONFIG_X86_64 */ |
188 | #define NR_RANGE_MR 5 | 188 | #define NR_RANGE_MR 5 |
189 | #endif | 189 | #endif |
190 | 190 | ||
191 | static int __meminit save_mr(struct map_range *mr, int nr_range, | 191 | static int __meminit save_mr(struct map_range *mr, int nr_range, |
192 | unsigned long start_pfn, unsigned long end_pfn, | 192 | unsigned long start_pfn, unsigned long end_pfn, |
193 | unsigned long page_size_mask) | 193 | unsigned long page_size_mask) |
194 | { | 194 | { |
195 | if (start_pfn < end_pfn) { | 195 | if (start_pfn < end_pfn) { |
196 | if (nr_range >= NR_RANGE_MR) | 196 | if (nr_range >= NR_RANGE_MR) |
197 | panic("run out of range for init_memory_mapping\n"); | 197 | panic("run out of range for init_memory_mapping\n"); |
198 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; | 198 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; |
199 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; | 199 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; |
200 | mr[nr_range].page_size_mask = page_size_mask; | 200 | mr[nr_range].page_size_mask = page_size_mask; |
201 | nr_range++; | 201 | nr_range++; |
202 | } | 202 | } |
203 | 203 | ||
204 | return nr_range; | 204 | return nr_range; |
205 | } | 205 | } |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * adjust the page_size_mask for small range to go with | 208 | * adjust the page_size_mask for small range to go with |
209 | * big page size instead small one if nearby are ram too. | 209 | * big page size instead small one if nearby are ram too. |
210 | */ | 210 | */ |
211 | static void __init_refok adjust_range_page_size_mask(struct map_range *mr, | 211 | static void __init_refok adjust_range_page_size_mask(struct map_range *mr, |
212 | int nr_range) | 212 | int nr_range) |
213 | { | 213 | { |
214 | int i; | 214 | int i; |
215 | 215 | ||
216 | for (i = 0; i < nr_range; i++) { | 216 | for (i = 0; i < nr_range; i++) { |
217 | if ((page_size_mask & (1<<PG_LEVEL_2M)) && | 217 | if ((page_size_mask & (1<<PG_LEVEL_2M)) && |
218 | !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { | 218 | !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { |
219 | unsigned long start = round_down(mr[i].start, PMD_SIZE); | 219 | unsigned long start = round_down(mr[i].start, PMD_SIZE); |
220 | unsigned long end = round_up(mr[i].end, PMD_SIZE); | 220 | unsigned long end = round_up(mr[i].end, PMD_SIZE); |
221 | 221 | ||
222 | #ifdef CONFIG_X86_32 | 222 | #ifdef CONFIG_X86_32 |
223 | if ((end >> PAGE_SHIFT) > max_low_pfn) | 223 | if ((end >> PAGE_SHIFT) > max_low_pfn) |
224 | continue; | 224 | continue; |
225 | #endif | 225 | #endif |
226 | 226 | ||
227 | if (memblock_is_region_memory(start, end - start)) | 227 | if (memblock_is_region_memory(start, end - start)) |
228 | mr[i].page_size_mask |= 1<<PG_LEVEL_2M; | 228 | mr[i].page_size_mask |= 1<<PG_LEVEL_2M; |
229 | } | 229 | } |
230 | if ((page_size_mask & (1<<PG_LEVEL_1G)) && | 230 | if ((page_size_mask & (1<<PG_LEVEL_1G)) && |
231 | !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { | 231 | !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { |
232 | unsigned long start = round_down(mr[i].start, PUD_SIZE); | 232 | unsigned long start = round_down(mr[i].start, PUD_SIZE); |
233 | unsigned long end = round_up(mr[i].end, PUD_SIZE); | 233 | unsigned long end = round_up(mr[i].end, PUD_SIZE); |
234 | 234 | ||
235 | if (memblock_is_region_memory(start, end - start)) | 235 | if (memblock_is_region_memory(start, end - start)) |
236 | mr[i].page_size_mask |= 1<<PG_LEVEL_1G; | 236 | mr[i].page_size_mask |= 1<<PG_LEVEL_1G; |
237 | } | 237 | } |
238 | } | 238 | } |
239 | } | 239 | } |
240 | 240 | ||
241 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, | 241 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, |
242 | unsigned long start, | 242 | unsigned long start, |
243 | unsigned long end) | 243 | unsigned long end) |
244 | { | 244 | { |
245 | unsigned long start_pfn, end_pfn, limit_pfn; | 245 | unsigned long start_pfn, end_pfn, limit_pfn; |
246 | unsigned long pfn; | 246 | unsigned long pfn; |
247 | int i; | 247 | int i; |
248 | 248 | ||
249 | limit_pfn = PFN_DOWN(end); | 249 | limit_pfn = PFN_DOWN(end); |
250 | 250 | ||
251 | /* head if not big page alignment ? */ | 251 | /* head if not big page alignment ? */ |
252 | pfn = start_pfn = PFN_DOWN(start); | 252 | pfn = start_pfn = PFN_DOWN(start); |
253 | #ifdef CONFIG_X86_32 | 253 | #ifdef CONFIG_X86_32 |
254 | /* | 254 | /* |
255 | * Don't use a large page for the first 2/4MB of memory | 255 | * Don't use a large page for the first 2/4MB of memory |
256 | * because there are often fixed size MTRRs in there | 256 | * because there are often fixed size MTRRs in there |
257 | * and overlapping MTRRs into large pages can cause | 257 | * and overlapping MTRRs into large pages can cause |
258 | * slowdowns. | 258 | * slowdowns. |
259 | */ | 259 | */ |
260 | if (pfn == 0) | 260 | if (pfn == 0) |
261 | end_pfn = PFN_DOWN(PMD_SIZE); | 261 | end_pfn = PFN_DOWN(PMD_SIZE); |
262 | else | 262 | else |
263 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); | 263 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
264 | #else /* CONFIG_X86_64 */ | 264 | #else /* CONFIG_X86_64 */ |
265 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); | 265 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
266 | #endif | 266 | #endif |
267 | if (end_pfn > limit_pfn) | 267 | if (end_pfn > limit_pfn) |
268 | end_pfn = limit_pfn; | 268 | end_pfn = limit_pfn; |
269 | if (start_pfn < end_pfn) { | 269 | if (start_pfn < end_pfn) { |
270 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 270 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
271 | pfn = end_pfn; | 271 | pfn = end_pfn; |
272 | } | 272 | } |
273 | 273 | ||
274 | /* big page (2M) range */ | 274 | /* big page (2M) range */ |
275 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); | 275 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
276 | #ifdef CONFIG_X86_32 | 276 | #ifdef CONFIG_X86_32 |
277 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); | 277 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
278 | #else /* CONFIG_X86_64 */ | 278 | #else /* CONFIG_X86_64 */ |
279 | end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); | 279 | end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
280 | if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) | 280 | if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) |
281 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); | 281 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
282 | #endif | 282 | #endif |
283 | 283 | ||
284 | if (start_pfn < end_pfn) { | 284 | if (start_pfn < end_pfn) { |
285 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 285 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
286 | page_size_mask & (1<<PG_LEVEL_2M)); | 286 | page_size_mask & (1<<PG_LEVEL_2M)); |
287 | pfn = end_pfn; | 287 | pfn = end_pfn; |
288 | } | 288 | } |
289 | 289 | ||
290 | #ifdef CONFIG_X86_64 | 290 | #ifdef CONFIG_X86_64 |
291 | /* big page (1G) range */ | 291 | /* big page (1G) range */ |
292 | start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); | 292 | start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
293 | end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); | 293 | end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); |
294 | if (start_pfn < end_pfn) { | 294 | if (start_pfn < end_pfn) { |
295 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 295 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
296 | page_size_mask & | 296 | page_size_mask & |
297 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | 297 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); |
298 | pfn = end_pfn; | 298 | pfn = end_pfn; |
299 | } | 299 | } |
300 | 300 | ||
301 | /* tail is not big page (1G) alignment */ | 301 | /* tail is not big page (1G) alignment */ |
302 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); | 302 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
303 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); | 303 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
304 | if (start_pfn < end_pfn) { | 304 | if (start_pfn < end_pfn) { |
305 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 305 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
306 | page_size_mask & (1<<PG_LEVEL_2M)); | 306 | page_size_mask & (1<<PG_LEVEL_2M)); |
307 | pfn = end_pfn; | 307 | pfn = end_pfn; |
308 | } | 308 | } |
309 | #endif | 309 | #endif |
310 | 310 | ||
311 | /* tail is not big page (2M) alignment */ | 311 | /* tail is not big page (2M) alignment */ |
312 | start_pfn = pfn; | 312 | start_pfn = pfn; |
313 | end_pfn = limit_pfn; | 313 | end_pfn = limit_pfn; |
314 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 314 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
315 | 315 | ||
316 | if (!after_bootmem) | 316 | if (!after_bootmem) |
317 | adjust_range_page_size_mask(mr, nr_range); | 317 | adjust_range_page_size_mask(mr, nr_range); |
318 | 318 | ||
319 | /* try to merge same page size and continuous */ | 319 | /* try to merge same page size and continuous */ |
320 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | 320 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { |
321 | unsigned long old_start; | 321 | unsigned long old_start; |
322 | if (mr[i].end != mr[i+1].start || | 322 | if (mr[i].end != mr[i+1].start || |
323 | mr[i].page_size_mask != mr[i+1].page_size_mask) | 323 | mr[i].page_size_mask != mr[i+1].page_size_mask) |
324 | continue; | 324 | continue; |
325 | /* move it */ | 325 | /* move it */ |
326 | old_start = mr[i].start; | 326 | old_start = mr[i].start; |
327 | memmove(&mr[i], &mr[i+1], | 327 | memmove(&mr[i], &mr[i+1], |
328 | (nr_range - 1 - i) * sizeof(struct map_range)); | 328 | (nr_range - 1 - i) * sizeof(struct map_range)); |
329 | mr[i--].start = old_start; | 329 | mr[i--].start = old_start; |
330 | nr_range--; | 330 | nr_range--; |
331 | } | 331 | } |
332 | 332 | ||
333 | for (i = 0; i < nr_range; i++) | 333 | for (i = 0; i < nr_range; i++) |
334 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 334 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
335 | mr[i].start, mr[i].end - 1, | 335 | mr[i].start, mr[i].end - 1, |
336 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 336 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( |
337 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 337 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
338 | 338 | ||
339 | return nr_range; | 339 | return nr_range; |
340 | } | 340 | } |
341 | 341 | ||
342 | struct range pfn_mapped[E820_X_MAX]; | 342 | struct range pfn_mapped[E820_X_MAX]; |
343 | int nr_pfn_mapped; | 343 | int nr_pfn_mapped; |
344 | 344 | ||
345 | static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) | 345 | static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) |
346 | { | 346 | { |
347 | nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, | 347 | nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, |
348 | nr_pfn_mapped, start_pfn, end_pfn); | 348 | nr_pfn_mapped, start_pfn, end_pfn); |
349 | nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); | 349 | nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); |
350 | 350 | ||
351 | max_pfn_mapped = max(max_pfn_mapped, end_pfn); | 351 | max_pfn_mapped = max(max_pfn_mapped, end_pfn); |
352 | 352 | ||
353 | if (start_pfn < (1UL<<(32-PAGE_SHIFT))) | 353 | if (start_pfn < (1UL<<(32-PAGE_SHIFT))) |
354 | max_low_pfn_mapped = max(max_low_pfn_mapped, | 354 | max_low_pfn_mapped = max(max_low_pfn_mapped, |
355 | min(end_pfn, 1UL<<(32-PAGE_SHIFT))); | 355 | min(end_pfn, 1UL<<(32-PAGE_SHIFT))); |
356 | } | 356 | } |
357 | 357 | ||
358 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) | 358 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) |
359 | { | 359 | { |
360 | int i; | 360 | int i; |
361 | 361 | ||
362 | for (i = 0; i < nr_pfn_mapped; i++) | 362 | for (i = 0; i < nr_pfn_mapped; i++) |
363 | if ((start_pfn >= pfn_mapped[i].start) && | 363 | if ((start_pfn >= pfn_mapped[i].start) && |
364 | (end_pfn <= pfn_mapped[i].end)) | 364 | (end_pfn <= pfn_mapped[i].end)) |
365 | return true; | 365 | return true; |
366 | 366 | ||
367 | return false; | 367 | return false; |
368 | } | 368 | } |
369 | 369 | ||
370 | /* | 370 | /* |
371 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 371 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. |
372 | * This runs before bootmem is initialized and gets pages directly from | 372 | * This runs before bootmem is initialized and gets pages directly from |
373 | * the physical memory. To access them they are temporarily mapped. | 373 | * the physical memory. To access them they are temporarily mapped. |
374 | */ | 374 | */ |
375 | unsigned long __init_refok init_memory_mapping(unsigned long start, | 375 | unsigned long __init_refok init_memory_mapping(unsigned long start, |
376 | unsigned long end) | 376 | unsigned long end) |
377 | { | 377 | { |
378 | struct map_range mr[NR_RANGE_MR]; | 378 | struct map_range mr[NR_RANGE_MR]; |
379 | unsigned long ret = 0; | 379 | unsigned long ret = 0; |
380 | int nr_range, i; | 380 | int nr_range, i; |
381 | 381 | ||
382 | pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", | 382 | pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", |
383 | start, end - 1); | 383 | start, end - 1); |
384 | 384 | ||
385 | memset(mr, 0, sizeof(mr)); | 385 | memset(mr, 0, sizeof(mr)); |
386 | nr_range = split_mem_range(mr, 0, start, end); | 386 | nr_range = split_mem_range(mr, 0, start, end); |
387 | 387 | ||
388 | for (i = 0; i < nr_range; i++) | 388 | for (i = 0; i < nr_range; i++) |
389 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 389 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
390 | mr[i].page_size_mask); | 390 | mr[i].page_size_mask); |
391 | 391 | ||
392 | add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); | 392 | add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); |
393 | 393 | ||
394 | return ret >> PAGE_SHIFT; | 394 | return ret >> PAGE_SHIFT; |
395 | } | 395 | } |
396 | 396 | ||
397 | /* | 397 | /* |
398 | * We need to iterate through the E820 memory map and create direct mappings | 398 | * We need to iterate through the E820 memory map and create direct mappings |
399 | * for only E820_RAM and E820_KERN_RESERVED regions. We cannot simply | 399 | * for only E820_RAM and E820_KERN_RESERVED regions. We cannot simply |
400 | * create direct mappings for all pfns from [0 to max_low_pfn) and | 400 | * create direct mappings for all pfns from [0 to max_low_pfn) and |
401 | * [4GB to max_pfn) because of possible memory holes in high addresses | 401 | * [4GB to max_pfn) because of possible memory holes in high addresses |
402 | * that cannot be marked as UC by fixed/variable range MTRRs. | 402 | * that cannot be marked as UC by fixed/variable range MTRRs. |
403 | * Depending on the alignment of E820 ranges, this may possibly result | 403 | * Depending on the alignment of E820 ranges, this may possibly result |
404 | * in using smaller size (i.e. 4K instead of 2M or 1G) page tables. | 404 | * in using smaller size (i.e. 4K instead of 2M or 1G) page tables. |
405 | * | 405 | * |
406 | * init_mem_mapping() calls init_range_memory_mapping() with big range. | 406 | * init_mem_mapping() calls init_range_memory_mapping() with big range. |
407 | * That range would have hole in the middle or ends, and only ram parts | 407 | * That range would have hole in the middle or ends, and only ram parts |
408 | * will be mapped in init_range_memory_mapping(). | 408 | * will be mapped in init_range_memory_mapping(). |
409 | */ | 409 | */ |
410 | static unsigned long __init init_range_memory_mapping( | 410 | static unsigned long __init init_range_memory_mapping( |
411 | unsigned long r_start, | 411 | unsigned long r_start, |
412 | unsigned long r_end) | 412 | unsigned long r_end) |
413 | { | 413 | { |
414 | unsigned long start_pfn, end_pfn; | 414 | unsigned long start_pfn, end_pfn; |
415 | unsigned long mapped_ram_size = 0; | 415 | unsigned long mapped_ram_size = 0; |
416 | int i; | 416 | int i; |
417 | 417 | ||
418 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { | 418 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
419 | u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); | 419 | u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); |
420 | u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); | 420 | u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); |
421 | if (start >= end) | 421 | if (start >= end) |
422 | continue; | 422 | continue; |
423 | 423 | ||
424 | /* | 424 | /* |
425 | * if it is overlapping with brk pgt, we need to | 425 | * if it is overlapping with brk pgt, we need to |
426 | * alloc pgt buf from memblock instead. | 426 | * alloc pgt buf from memblock instead. |
427 | */ | 427 | */ |
428 | can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= | 428 | can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= |
429 | min(end, (u64)pgt_buf_top<<PAGE_SHIFT); | 429 | min(end, (u64)pgt_buf_top<<PAGE_SHIFT); |
430 | init_memory_mapping(start, end); | 430 | init_memory_mapping(start, end); |
431 | mapped_ram_size += end - start; | 431 | mapped_ram_size += end - start; |
432 | can_use_brk_pgt = true; | 432 | can_use_brk_pgt = true; |
433 | } | 433 | } |
434 | 434 | ||
435 | return mapped_ram_size; | 435 | return mapped_ram_size; |
436 | } | 436 | } |
437 | 437 | ||
438 | static unsigned long __init get_new_step_size(unsigned long step_size) | 438 | static unsigned long __init get_new_step_size(unsigned long step_size) |
439 | { | 439 | { |
440 | /* | 440 | /* |
441 | * Explain why we shift by 5 and why we don't have to worry about | 441 | * Initial mapped size is PMD_SIZE (2M). |
442 | * 'step_size << 5' overflowing: | ||
443 | * | ||
444 | * initial mapped size is PMD_SIZE (2M). | ||
445 | * We can not set step_size to be PUD_SIZE (1G) yet. | 442 | * We can not set step_size to be PUD_SIZE (1G) yet. |
446 | * In worse case, when we cross the 1G boundary, and | 443 | * In worse case, when we cross the 1G boundary, and |
447 | * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k) | 444 | * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k) |
448 | * to map 1G range with PTE. Use 5 as shift for now. | 445 | * to map 1G range with PTE. Hence we use one less than the |
446 | * difference of page table level shifts. | ||
449 | * | 447 | * |
450 | * Don't need to worry about overflow, on 32bit, when step_size | 448 | * Don't need to worry about overflow in the top-down case, on 32bit, |
451 | * is 0, round_down() returns 0 for start, and that turns it | 449 | * when step_size is 0, round_down() returns 0 for start, and that |
452 | * into 0x100000000ULL. | 450 | * turns it into 0x100000000ULL. |
451 | * In the bottom-up case, round_up(x, 0) returns 0 though too, which | ||
452 | * needs to be taken into consideration by the code below. | ||
453 | */ | 453 | */ |
454 | return step_size << 5; | 454 | return step_size << (PMD_SHIFT - PAGE_SHIFT - 1); |
455 | } | 455 | } |
456 | 456 | ||
457 | /** | 457 | /** |
458 | * memory_map_top_down - Map [map_start, map_end) top down | 458 | * memory_map_top_down - Map [map_start, map_end) top down |
459 | * @map_start: start address of the target memory range | 459 | * @map_start: start address of the target memory range |
460 | * @map_end: end address of the target memory range | 460 | * @map_end: end address of the target memory range |
461 | * | 461 | * |
462 | * This function will setup direct mapping for memory range | 462 | * This function will setup direct mapping for memory range |
463 | * [map_start, map_end) in top-down. That said, the page tables | 463 | * [map_start, map_end) in top-down. That said, the page tables |
464 | * will be allocated at the end of the memory, and we map the | 464 | * will be allocated at the end of the memory, and we map the |
465 | * memory in top-down. | 465 | * memory in top-down. |
466 | */ | 466 | */ |
467 | static void __init memory_map_top_down(unsigned long map_start, | 467 | static void __init memory_map_top_down(unsigned long map_start, |
468 | unsigned long map_end) | 468 | unsigned long map_end) |
469 | { | 469 | { |
470 | unsigned long real_end, start, last_start; | 470 | unsigned long real_end, start, last_start; |
471 | unsigned long step_size; | 471 | unsigned long step_size; |
472 | unsigned long addr; | 472 | unsigned long addr; |
473 | unsigned long mapped_ram_size = 0; | 473 | unsigned long mapped_ram_size = 0; |
474 | unsigned long new_mapped_ram_size; | ||
475 | 474 | ||
476 | /* xen has big range in reserved near end of ram, skip it at first.*/ | 475 | /* xen has big range in reserved near end of ram, skip it at first.*/ |
477 | addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); | 476 | addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); |
478 | real_end = addr + PMD_SIZE; | 477 | real_end = addr + PMD_SIZE; |
479 | 478 | ||
480 | /* step_size need to be small so pgt_buf from BRK could cover it */ | 479 | /* step_size need to be small so pgt_buf from BRK could cover it */ |
481 | step_size = PMD_SIZE; | 480 | step_size = PMD_SIZE; |
482 | max_pfn_mapped = 0; /* will get exact value next */ | 481 | max_pfn_mapped = 0; /* will get exact value next */ |
483 | min_pfn_mapped = real_end >> PAGE_SHIFT; | 482 | min_pfn_mapped = real_end >> PAGE_SHIFT; |
484 | last_start = start = real_end; | 483 | last_start = start = real_end; |
485 | 484 | ||
486 | /* | 485 | /* |
487 | * We start from the top (end of memory) and go to the bottom. | 486 | * We start from the top (end of memory) and go to the bottom. |
488 | * The memblock_find_in_range() gets us a block of RAM from the | 487 | * The memblock_find_in_range() gets us a block of RAM from the |
489 | * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages | 488 | * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages |
490 | * for page table. | 489 | * for page table. |
491 | */ | 490 | */ |
492 | while (last_start > map_start) { | 491 | while (last_start > map_start) { |
493 | if (last_start > step_size) { | 492 | if (last_start > step_size) { |
494 | start = round_down(last_start - 1, step_size); | 493 | start = round_down(last_start - 1, step_size); |
495 | if (start < map_start) | 494 | if (start < map_start) |
496 | start = map_start; | 495 | start = map_start; |
497 | } else | 496 | } else |
498 | start = map_start; | 497 | start = map_start; |
499 | new_mapped_ram_size = init_range_memory_mapping(start, | 498 | mapped_ram_size += init_range_memory_mapping(start, |
500 | last_start); | 499 | last_start); |
501 | last_start = start; | 500 | last_start = start; |
502 | min_pfn_mapped = last_start >> PAGE_SHIFT; | 501 | min_pfn_mapped = last_start >> PAGE_SHIFT; |
503 | /* only increase step_size after big range get mapped */ | 502 | if (mapped_ram_size >= step_size) |
504 | if (new_mapped_ram_size > mapped_ram_size) | ||
505 | step_size = get_new_step_size(step_size); | 503 | step_size = get_new_step_size(step_size); |
506 | mapped_ram_size += new_mapped_ram_size; | ||
507 | } | 504 | } |
508 | 505 | ||
509 | if (real_end < map_end) | 506 | if (real_end < map_end) |
510 | init_range_memory_mapping(real_end, map_end); | 507 | init_range_memory_mapping(real_end, map_end); |
511 | } | 508 | } |
512 | 509 | ||
513 | /** | 510 | /** |
514 | * memory_map_bottom_up - Map [map_start, map_end) bottom up | 511 | * memory_map_bottom_up - Map [map_start, map_end) bottom up |
515 | * @map_start: start address of the target memory range | 512 | * @map_start: start address of the target memory range |
516 | * @map_end: end address of the target memory range | 513 | * @map_end: end address of the target memory range |
517 | * | 514 | * |
518 | * This function will setup direct mapping for memory range | 515 | * This function will setup direct mapping for memory range |
519 | * [map_start, map_end) in bottom-up. Since we have limited the | 516 | * [map_start, map_end) in bottom-up. Since we have limited the |
520 | * bottom-up allocation above the kernel, the page tables will | 517 | * bottom-up allocation above the kernel, the page tables will |
521 | * be allocated just above the kernel and we map the memory | 518 | * be allocated just above the kernel and we map the memory |
522 | * in [map_start, map_end) in bottom-up. | 519 | * in [map_start, map_end) in bottom-up. |
523 | */ | 520 | */ |
524 | static void __init memory_map_bottom_up(unsigned long map_start, | 521 | static void __init memory_map_bottom_up(unsigned long map_start, |
525 | unsigned long map_end) | 522 | unsigned long map_end) |
526 | { | 523 | { |
527 | unsigned long next, new_mapped_ram_size, start; | 524 | unsigned long next, start; |
528 | unsigned long mapped_ram_size = 0; | 525 | unsigned long mapped_ram_size = 0; |
529 | /* step_size need to be small so pgt_buf from BRK could cover it */ | 526 | /* step_size need to be small so pgt_buf from BRK could cover it */ |
530 | unsigned long step_size = PMD_SIZE; | 527 | unsigned long step_size = PMD_SIZE; |
531 | 528 | ||
532 | start = map_start; | 529 | start = map_start; |
533 | min_pfn_mapped = start >> PAGE_SHIFT; | 530 | min_pfn_mapped = start >> PAGE_SHIFT; |
534 | 531 | ||
535 | /* | 532 | /* |
536 | * We start from the bottom (@map_start) and go to the top (@map_end). | 533 | * We start from the bottom (@map_start) and go to the top (@map_end). |
537 | * The memblock_find_in_range() gets us a block of RAM from the | 534 | * The memblock_find_in_range() gets us a block of RAM from the |
538 | * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages | 535 | * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages |
539 | * for page table. | 536 | * for page table. |
540 | */ | 537 | */ |
541 | while (start < map_end) { | 538 | while (start < map_end) { |
542 | if (map_end - start > step_size) { | 539 | if (step_size && map_end - start > step_size) { |
543 | next = round_up(start + 1, step_size); | 540 | next = round_up(start + 1, step_size); |
544 | if (next > map_end) | 541 | if (next > map_end) |
545 | next = map_end; | 542 | next = map_end; |
546 | } else | 543 | } else { |
547 | next = map_end; | 544 | next = map_end; |
545 | } | ||
548 | 546 | ||
549 | new_mapped_ram_size = init_range_memory_mapping(start, next); | 547 | mapped_ram_size += init_range_memory_mapping(start, next); |
550 | start = next; | 548 | start = next; |
551 | 549 | ||
552 | if (new_mapped_ram_size > mapped_ram_size) | 550 | if (mapped_ram_size >= step_size) |
553 | step_size = get_new_step_size(step_size); | 551 | step_size = get_new_step_size(step_size); |
554 | mapped_ram_size += new_mapped_ram_size; | ||
555 | } | 552 | } |
556 | } | 553 | } |
557 | 554 | ||
558 | void __init init_mem_mapping(void) | 555 | void __init init_mem_mapping(void) |
559 | { | 556 | { |
560 | unsigned long end; | 557 | unsigned long end; |
561 | 558 | ||
562 | probe_page_size_mask(); | 559 | probe_page_size_mask(); |
563 | 560 | ||
564 | #ifdef CONFIG_X86_64 | 561 | #ifdef CONFIG_X86_64 |
565 | end = max_pfn << PAGE_SHIFT; | 562 | end = max_pfn << PAGE_SHIFT; |
566 | #else | 563 | #else |
567 | end = max_low_pfn << PAGE_SHIFT; | 564 | end = max_low_pfn << PAGE_SHIFT; |
568 | #endif | 565 | #endif |
569 | 566 | ||
570 | /* the ISA range is always mapped regardless of memory holes */ | 567 | /* the ISA range is always mapped regardless of memory holes */ |
571 | init_memory_mapping(0, ISA_END_ADDRESS); | 568 | init_memory_mapping(0, ISA_END_ADDRESS); |
572 | 569 | ||
573 | /* | 570 | /* |
574 | * If the allocation is in bottom-up direction, we setup direct mapping | 571 | * If the allocation is in bottom-up direction, we setup direct mapping |
575 | * in bottom-up, otherwise we setup direct mapping in top-down. | 572 | * in bottom-up, otherwise we setup direct mapping in top-down. |
576 | */ | 573 | */ |
577 | if (memblock_bottom_up()) { | 574 | if (memblock_bottom_up()) { |
578 | unsigned long kernel_end = __pa_symbol(_end); | 575 | unsigned long kernel_end = __pa_symbol(_end); |
579 | 576 | ||
580 | /* | 577 | /* |
581 | * we need two separate calls here. This is because we want to | 578 | * we need two separate calls here. This is because we want to |
582 | * allocate page tables above the kernel. So we first map | 579 | * allocate page tables above the kernel. So we first map |
583 | * [kernel_end, end) to make memory above the kernel be mapped | 580 | * [kernel_end, end) to make memory above the kernel be mapped |
584 | * as soon as possible. And then use page tables allocated above | 581 | * as soon as possible. And then use page tables allocated above |
585 | * the kernel to map [ISA_END_ADDRESS, kernel_end). | 582 | * the kernel to map [ISA_END_ADDRESS, kernel_end). |
586 | */ | 583 | */ |
587 | memory_map_bottom_up(kernel_end, end); | 584 | memory_map_bottom_up(kernel_end, end); |
588 | memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); | 585 | memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); |
589 | } else { | 586 | } else { |
590 | memory_map_top_down(ISA_END_ADDRESS, end); | 587 | memory_map_top_down(ISA_END_ADDRESS, end); |
591 | } | 588 | } |
592 | 589 | ||
593 | #ifdef CONFIG_X86_64 | 590 | #ifdef CONFIG_X86_64 |
594 | if (max_pfn > max_low_pfn) { | 591 | if (max_pfn > max_low_pfn) { |
595 | /* can we preseve max_low_pfn ?*/ | 592 | /* can we preseve max_low_pfn ?*/ |
596 | max_low_pfn = max_pfn; | 593 | max_low_pfn = max_pfn; |
597 | } | 594 | } |
598 | #else | 595 | #else |
599 | early_ioremap_page_table_range_init(); | 596 | early_ioremap_page_table_range_init(); |
600 | #endif | 597 | #endif |
601 | 598 | ||
602 | load_cr3(swapper_pg_dir); | 599 | load_cr3(swapper_pg_dir); |
603 | __flush_tlb_all(); | 600 | __flush_tlb_all(); |
604 | 601 | ||
605 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); | 602 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); |
606 | } | 603 | } |
607 | 604 | ||
608 | /* | 605 | /* |
609 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | 606 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address |
610 | * is valid. The argument is a physical page number. | 607 | * is valid. The argument is a physical page number. |
611 | * | 608 | * |
612 | * | 609 | * |
613 | * On x86, access has to be given to the first megabyte of ram because that area | 610 | * On x86, access has to be given to the first megabyte of ram because that area |
614 | * contains bios code and data regions used by X and dosemu and similar apps. | 611 | * contains bios code and data regions used by X and dosemu and similar apps. |
615 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI | 612 | * Access has to be given to non-kernel-ram areas as well, these contain the PCI |
616 | * mmio resources as well as potential bios/acpi data regions. | 613 | * mmio resources as well as potential bios/acpi data regions. |
617 | */ | 614 | */ |
618 | int devmem_is_allowed(unsigned long pagenr) | 615 | int devmem_is_allowed(unsigned long pagenr) |
619 | { | 616 | { |
620 | if (pagenr < 256) | 617 | if (pagenr < 256) |
621 | return 1; | 618 | return 1; |
622 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | 619 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) |
623 | return 0; | 620 | return 0; |
624 | if (!page_is_ram(pagenr)) | 621 | if (!page_is_ram(pagenr)) |
625 | return 1; | 622 | return 1; |
626 | return 0; | 623 | return 0; |
627 | } | 624 | } |
628 | 625 | ||
629 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 626 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
630 | { | 627 | { |
631 | unsigned long begin_aligned, end_aligned; | 628 | unsigned long begin_aligned, end_aligned; |
632 | 629 | ||
633 | /* Make sure boundaries are page aligned */ | 630 | /* Make sure boundaries are page aligned */ |
634 | begin_aligned = PAGE_ALIGN(begin); | 631 | begin_aligned = PAGE_ALIGN(begin); |
635 | end_aligned = end & PAGE_MASK; | 632 | end_aligned = end & PAGE_MASK; |
636 | 633 | ||
637 | if (WARN_ON(begin_aligned != begin || end_aligned != end)) { | 634 | if (WARN_ON(begin_aligned != begin || end_aligned != end)) { |
638 | begin = begin_aligned; | 635 | begin = begin_aligned; |
639 | end = end_aligned; | 636 | end = end_aligned; |
640 | } | 637 | } |
641 | 638 | ||
642 | if (begin >= end) | 639 | if (begin >= end) |
643 | return; | 640 | return; |
644 | 641 | ||
645 | /* | 642 | /* |
646 | * If debugging page accesses then do not free this memory but | 643 | * If debugging page accesses then do not free this memory but |
647 | * mark them not present - any buggy init-section access will | 644 | * mark them not present - any buggy init-section access will |
648 | * create a kernel page fault: | 645 | * create a kernel page fault: |
649 | */ | 646 | */ |
650 | #ifdef CONFIG_DEBUG_PAGEALLOC | 647 | #ifdef CONFIG_DEBUG_PAGEALLOC |
651 | printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", | 648 | printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", |
652 | begin, end - 1); | 649 | begin, end - 1); |
653 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 650 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
654 | #else | 651 | #else |
655 | /* | 652 | /* |
656 | * We just marked the kernel text read only above, now that | 653 | * We just marked the kernel text read only above, now that |
657 | * we are going to free part of that, we need to make that | 654 | * we are going to free part of that, we need to make that |
658 | * writeable and non-executable first. | 655 | * writeable and non-executable first. |
659 | */ | 656 | */ |
660 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | 657 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); |
661 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 658 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
662 | 659 | ||
663 | free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); | 660 | free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what); |
664 | #endif | 661 | #endif |
665 | } | 662 | } |
666 | 663 | ||
667 | void free_initmem(void) | 664 | void free_initmem(void) |
668 | { | 665 | { |
669 | free_init_pages("unused kernel", | 666 | free_init_pages("unused kernel", |
670 | (unsigned long)(&__init_begin), | 667 | (unsigned long)(&__init_begin), |
671 | (unsigned long)(&__init_end)); | 668 | (unsigned long)(&__init_end)); |
672 | } | 669 | } |
673 | 670 | ||
674 | #ifdef CONFIG_BLK_DEV_INITRD | 671 | #ifdef CONFIG_BLK_DEV_INITRD |
675 | void __init free_initrd_mem(unsigned long start, unsigned long end) | 672 | void __init free_initrd_mem(unsigned long start, unsigned long end) |
676 | { | 673 | { |
677 | #ifdef CONFIG_MICROCODE_EARLY | 674 | #ifdef CONFIG_MICROCODE_EARLY |
678 | /* | 675 | /* |
679 | * Remember, initrd memory may contain microcode or other useful things. | 676 | * Remember, initrd memory may contain microcode or other useful things. |
680 | * Before we lose initrd mem, we need to find a place to hold them | 677 | * Before we lose initrd mem, we need to find a place to hold them |
681 | * now that normal virtual memory is enabled. | 678 | * now that normal virtual memory is enabled. |
682 | */ | 679 | */ |
683 | save_microcode_in_initrd(); | 680 | save_microcode_in_initrd(); |
684 | #endif | 681 | #endif |
685 | 682 | ||
686 | /* | 683 | /* |
687 | * end could be not aligned, and We can not align that, | 684 | * end could be not aligned, and We can not align that, |
688 | * decompresser could be confused by aligned initrd_end | 685 | * decompresser could be confused by aligned initrd_end |
689 | * We already reserve the end partial page before in | 686 | * We already reserve the end partial page before in |
690 | * - i386_start_kernel() | 687 | * - i386_start_kernel() |
691 | * - x86_64_start_kernel() | 688 | * - x86_64_start_kernel() |
692 | * - relocate_initrd() | 689 | * - relocate_initrd() |
693 | * So here We can do PAGE_ALIGN() safely to get partial page to be freed | 690 | * So here We can do PAGE_ALIGN() safely to get partial page to be freed |
694 | */ | 691 | */ |
695 | free_init_pages("initrd", start, PAGE_ALIGN(end)); | 692 | free_init_pages("initrd", start, PAGE_ALIGN(end)); |
696 | } | 693 | } |
697 | #endif | 694 | #endif |
698 | 695 | ||
699 | void __init zone_sizes_init(void) | 696 | void __init zone_sizes_init(void) |
700 | { | 697 | { |
701 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 698 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
702 | 699 | ||
703 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 700 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
704 | 701 | ||
705 | #ifdef CONFIG_ZONE_DMA | 702 | #ifdef CONFIG_ZONE_DMA |
706 | max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); | 703 | max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); |
707 | #endif | 704 | #endif |
708 | #ifdef CONFIG_ZONE_DMA32 | 705 | #ifdef CONFIG_ZONE_DMA32 |
709 | max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); | 706 | max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); |
710 | #endif | 707 | #endif |
711 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; | 708 | max_zone_pfns[ZONE_NORMAL] = max_low_pfn; |
712 | #ifdef CONFIG_HIGHMEM | 709 | #ifdef CONFIG_HIGHMEM |
713 | max_zone_pfns[ZONE_HIGHMEM] = max_pfn; | 710 | max_zone_pfns[ZONE_HIGHMEM] = max_pfn; |
714 | #endif | 711 | #endif |
715 | 712 | ||
716 | free_area_init_nodes(max_zone_pfns); | 713 | free_area_init_nodes(max_zone_pfns); |
717 | } | 714 | } |
718 | 715 | ||
719 | void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) | 716 | void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) |
720 | { | 717 | { |
721 | /* entry 0 MUST be WB (hardwired to speed up translations) */ | 718 | /* entry 0 MUST be WB (hardwired to speed up translations) */ |
722 | BUG_ON(!entry && cache != _PAGE_CACHE_MODE_WB); | 719 | BUG_ON(!entry && cache != _PAGE_CACHE_MODE_WB); |
723 | 720 |
arch/x86/vdso/vma.c
1 | /* | 1 | /* |
2 | * Copyright 2007 Andi Kleen, SUSE Labs. | 2 | * Copyright 2007 Andi Kleen, SUSE Labs. |
3 | * Subject to the GPL, v.2 | 3 | * Subject to the GPL, v.2 |
4 | * | 4 | * |
5 | * This contains most of the x86 vDSO kernel-side code. | 5 | * This contains most of the x86 vDSO kernel-side code. |
6 | */ | 6 | */ |
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/err.h> | 8 | #include <linux/err.h> |
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/random.h> | 12 | #include <linux/random.h> |
13 | #include <linux/elf.h> | 13 | #include <linux/elf.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | #include <asm/vgtod.h> | 15 | #include <asm/vgtod.h> |
16 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
17 | #include <asm/vdso.h> | 17 | #include <asm/vdso.h> |
18 | #include <asm/vvar.h> | 18 | #include <asm/vvar.h> |
19 | #include <asm/page.h> | 19 | #include <asm/page.h> |
20 | #include <asm/hpet.h> | 20 | #include <asm/hpet.h> |
21 | #include <asm/desc.h> | 21 | #include <asm/desc.h> |
22 | 22 | ||
23 | #if defined(CONFIG_X86_64) | 23 | #if defined(CONFIG_X86_64) |
24 | unsigned int __read_mostly vdso64_enabled = 1; | 24 | unsigned int __read_mostly vdso64_enabled = 1; |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | void __init init_vdso_image(const struct vdso_image *image) | 27 | void __init init_vdso_image(const struct vdso_image *image) |
28 | { | 28 | { |
29 | int i; | 29 | int i; |
30 | int npages = (image->size) / PAGE_SIZE; | 30 | int npages = (image->size) / PAGE_SIZE; |
31 | 31 | ||
32 | BUG_ON(image->size % PAGE_SIZE != 0); | 32 | BUG_ON(image->size % PAGE_SIZE != 0); |
33 | for (i = 0; i < npages; i++) | 33 | for (i = 0; i < npages; i++) |
34 | image->text_mapping.pages[i] = | 34 | image->text_mapping.pages[i] = |
35 | virt_to_page(image->data + i*PAGE_SIZE); | 35 | virt_to_page(image->data + i*PAGE_SIZE); |
36 | 36 | ||
37 | apply_alternatives((struct alt_instr *)(image->data + image->alt), | 37 | apply_alternatives((struct alt_instr *)(image->data + image->alt), |
38 | (struct alt_instr *)(image->data + image->alt + | 38 | (struct alt_instr *)(image->data + image->alt + |
39 | image->alt_len)); | 39 | image->alt_len)); |
40 | } | 40 | } |
41 | 41 | ||
42 | struct linux_binprm; | 42 | struct linux_binprm; |
43 | 43 | ||
44 | /* Put the vdso above the (randomized) stack with another randomized offset. | 44 | /* |
45 | This way there is no hole in the middle of address space. | 45 | * Put the vdso above the (randomized) stack with another randomized |
46 | To save memory make sure it is still in the same PTE as the stack top. | 46 | * offset. This way there is no hole in the middle of address space. |
47 | This doesn't give that many random bits. | 47 | * To save memory make sure it is still in the same PTE as the stack |
48 | 48 | * top. This doesn't give that many random bits. | |
49 | Only used for the 64-bit and x32 vdsos. */ | 49 | * |
50 | * Note that this algorithm is imperfect: the distribution of the vdso | ||
51 | * start address within a PMD is biased toward the end. | ||
52 | * | ||
53 | * Only used for the 64-bit and x32 vdsos. | ||
54 | */ | ||
50 | static unsigned long vdso_addr(unsigned long start, unsigned len) | 55 | static unsigned long vdso_addr(unsigned long start, unsigned len) |
51 | { | 56 | { |
52 | #ifdef CONFIG_X86_32 | 57 | #ifdef CONFIG_X86_32 |
53 | return 0; | 58 | return 0; |
54 | #else | 59 | #else |
55 | unsigned long addr, end; | 60 | unsigned long addr, end; |
56 | unsigned offset; | 61 | unsigned offset; |
57 | end = (start + PMD_SIZE - 1) & PMD_MASK; | 62 | |
63 | /* | ||
64 | * Round up the start address. It can start out unaligned as a result | ||
65 | * of stack start randomization. | ||
66 | */ | ||
67 | start = PAGE_ALIGN(start); | ||
68 | |||
69 | /* Round the lowest possible end address up to a PMD boundary. */ | ||
70 | end = (start + len + PMD_SIZE - 1) & PMD_MASK; | ||
58 | if (end >= TASK_SIZE_MAX) | 71 | if (end >= TASK_SIZE_MAX) |
59 | end = TASK_SIZE_MAX; | 72 | end = TASK_SIZE_MAX; |
60 | end -= len; | 73 | end -= len; |
61 | /* This loses some more bits than a modulo, but is cheaper */ | ||
62 | offset = get_random_int() & (PTRS_PER_PTE - 1); | ||
63 | addr = start + (offset << PAGE_SHIFT); | ||
64 | if (addr >= end) | ||
65 | addr = end; | ||
66 | 74 | ||
75 | if (end > start) { | ||
76 | offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); | ||
77 | addr = start + (offset << PAGE_SHIFT); | ||
78 | } else { | ||
79 | addr = start; | ||
80 | } | ||
81 | |||
67 | /* | 82 | /* |
68 | * page-align it here so that get_unmapped_area doesn't | 83 | * Forcibly align the final address in case we have a hardware |
69 | * align it wrongfully again to the next page. addr can come in 4K | 84 | * issue that requires alignment for performance reasons. |
70 | * unaligned here as a result of stack start randomization. | ||
71 | */ | 85 | */ |
72 | addr = PAGE_ALIGN(addr); | ||
73 | addr = align_vdso_addr(addr); | 86 | addr = align_vdso_addr(addr); |
74 | 87 | ||
75 | return addr; | 88 | return addr; |
76 | #endif | 89 | #endif |
77 | } | 90 | } |
78 | 91 | ||
79 | static int map_vdso(const struct vdso_image *image, bool calculate_addr) | 92 | static int map_vdso(const struct vdso_image *image, bool calculate_addr) |
80 | { | 93 | { |
81 | struct mm_struct *mm = current->mm; | 94 | struct mm_struct *mm = current->mm; |
82 | struct vm_area_struct *vma; | 95 | struct vm_area_struct *vma; |
83 | unsigned long addr, text_start; | 96 | unsigned long addr, text_start; |
84 | int ret = 0; | 97 | int ret = 0; |
85 | static struct page *no_pages[] = {NULL}; | 98 | static struct page *no_pages[] = {NULL}; |
86 | static struct vm_special_mapping vvar_mapping = { | 99 | static struct vm_special_mapping vvar_mapping = { |
87 | .name = "[vvar]", | 100 | .name = "[vvar]", |
88 | .pages = no_pages, | 101 | .pages = no_pages, |
89 | }; | 102 | }; |
90 | 103 | ||
91 | if (calculate_addr) { | 104 | if (calculate_addr) { |
92 | addr = vdso_addr(current->mm->start_stack, | 105 | addr = vdso_addr(current->mm->start_stack, |
93 | image->size - image->sym_vvar_start); | 106 | image->size - image->sym_vvar_start); |
94 | } else { | 107 | } else { |
95 | addr = 0; | 108 | addr = 0; |
96 | } | 109 | } |
97 | 110 | ||
98 | down_write(&mm->mmap_sem); | 111 | down_write(&mm->mmap_sem); |
99 | 112 | ||
100 | addr = get_unmapped_area(NULL, addr, | 113 | addr = get_unmapped_area(NULL, addr, |
101 | image->size - image->sym_vvar_start, 0, 0); | 114 | image->size - image->sym_vvar_start, 0, 0); |
102 | if (IS_ERR_VALUE(addr)) { | 115 | if (IS_ERR_VALUE(addr)) { |
103 | ret = addr; | 116 | ret = addr; |
104 | goto up_fail; | 117 | goto up_fail; |
105 | } | 118 | } |
106 | 119 | ||
107 | text_start = addr - image->sym_vvar_start; | 120 | text_start = addr - image->sym_vvar_start; |
108 | current->mm->context.vdso = (void __user *)text_start; | 121 | current->mm->context.vdso = (void __user *)text_start; |
109 | 122 | ||
110 | /* | 123 | /* |
111 | * MAYWRITE to allow gdb to COW and set breakpoints | 124 | * MAYWRITE to allow gdb to COW and set breakpoints |
112 | */ | 125 | */ |
113 | vma = _install_special_mapping(mm, | 126 | vma = _install_special_mapping(mm, |
114 | text_start, | 127 | text_start, |
115 | image->size, | 128 | image->size, |
116 | VM_READ|VM_EXEC| | 129 | VM_READ|VM_EXEC| |
117 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | 130 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
118 | &image->text_mapping); | 131 | &image->text_mapping); |
119 | 132 | ||
120 | if (IS_ERR(vma)) { | 133 | if (IS_ERR(vma)) { |
121 | ret = PTR_ERR(vma); | 134 | ret = PTR_ERR(vma); |
122 | goto up_fail; | 135 | goto up_fail; |
123 | } | 136 | } |
124 | 137 | ||
125 | vma = _install_special_mapping(mm, | 138 | vma = _install_special_mapping(mm, |
126 | addr, | 139 | addr, |
127 | -image->sym_vvar_start, | 140 | -image->sym_vvar_start, |
128 | VM_READ|VM_MAYREAD, | 141 | VM_READ|VM_MAYREAD, |
129 | &vvar_mapping); | 142 | &vvar_mapping); |
130 | 143 | ||
131 | if (IS_ERR(vma)) { | 144 | if (IS_ERR(vma)) { |
132 | ret = PTR_ERR(vma); | 145 | ret = PTR_ERR(vma); |
133 | goto up_fail; | 146 | goto up_fail; |
134 | } | 147 | } |
135 | 148 | ||
136 | if (image->sym_vvar_page) | 149 | if (image->sym_vvar_page) |
137 | ret = remap_pfn_range(vma, | 150 | ret = remap_pfn_range(vma, |
138 | text_start + image->sym_vvar_page, | 151 | text_start + image->sym_vvar_page, |
139 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, | 152 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, |
140 | PAGE_SIZE, | 153 | PAGE_SIZE, |
141 | PAGE_READONLY); | 154 | PAGE_READONLY); |
142 | 155 | ||
143 | if (ret) | 156 | if (ret) |
144 | goto up_fail; | 157 | goto up_fail; |
145 | 158 | ||
146 | #ifdef CONFIG_HPET_TIMER | 159 | #ifdef CONFIG_HPET_TIMER |
147 | if (hpet_address && image->sym_hpet_page) { | 160 | if (hpet_address && image->sym_hpet_page) { |
148 | ret = io_remap_pfn_range(vma, | 161 | ret = io_remap_pfn_range(vma, |
149 | text_start + image->sym_hpet_page, | 162 | text_start + image->sym_hpet_page, |
150 | hpet_address >> PAGE_SHIFT, | 163 | hpet_address >> PAGE_SHIFT, |
151 | PAGE_SIZE, | 164 | PAGE_SIZE, |
152 | pgprot_noncached(PAGE_READONLY)); | 165 | pgprot_noncached(PAGE_READONLY)); |
153 | 166 | ||
154 | if (ret) | 167 | if (ret) |
155 | goto up_fail; | 168 | goto up_fail; |
156 | } | 169 | } |
157 | #endif | 170 | #endif |
158 | 171 | ||
159 | up_fail: | 172 | up_fail: |
160 | if (ret) | 173 | if (ret) |
161 | current->mm->context.vdso = NULL; | 174 | current->mm->context.vdso = NULL; |
162 | 175 | ||
163 | up_write(&mm->mmap_sem); | 176 | up_write(&mm->mmap_sem); |
164 | return ret; | 177 | return ret; |
165 | } | 178 | } |
166 | 179 | ||
167 | #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) | 180 | #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) |
168 | static int load_vdso32(void) | 181 | static int load_vdso32(void) |
169 | { | 182 | { |
170 | int ret; | 183 | int ret; |
171 | 184 | ||
172 | if (vdso32_enabled != 1) /* Other values all mean "disabled" */ | 185 | if (vdso32_enabled != 1) /* Other values all mean "disabled" */ |
173 | return 0; | 186 | return 0; |
174 | 187 | ||
175 | ret = map_vdso(selected_vdso32, false); | 188 | ret = map_vdso(selected_vdso32, false); |
176 | if (ret) | 189 | if (ret) |
177 | return ret; | 190 | return ret; |
178 | 191 | ||
179 | if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) | 192 | if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) |
180 | current_thread_info()->sysenter_return = | 193 | current_thread_info()->sysenter_return = |
181 | current->mm->context.vdso + | 194 | current->mm->context.vdso + |
182 | selected_vdso32->sym_VDSO32_SYSENTER_RETURN; | 195 | selected_vdso32->sym_VDSO32_SYSENTER_RETURN; |
183 | 196 | ||
184 | return 0; | 197 | return 0; |
185 | } | 198 | } |
186 | #endif | 199 | #endif |
187 | 200 | ||
188 | #ifdef CONFIG_X86_64 | 201 | #ifdef CONFIG_X86_64 |
189 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | 202 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
190 | { | 203 | { |
191 | if (!vdso64_enabled) | 204 | if (!vdso64_enabled) |
192 | return 0; | 205 | return 0; |
193 | 206 | ||
194 | return map_vdso(&vdso_image_64, true); | 207 | return map_vdso(&vdso_image_64, true); |
195 | } | 208 | } |
196 | 209 | ||
197 | #ifdef CONFIG_COMPAT | 210 | #ifdef CONFIG_COMPAT |
198 | int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | 211 | int compat_arch_setup_additional_pages(struct linux_binprm *bprm, |
199 | int uses_interp) | 212 | int uses_interp) |
200 | { | 213 | { |
201 | #ifdef CONFIG_X86_X32_ABI | 214 | #ifdef CONFIG_X86_X32_ABI |
202 | if (test_thread_flag(TIF_X32)) { | 215 | if (test_thread_flag(TIF_X32)) { |
203 | if (!vdso64_enabled) | 216 | if (!vdso64_enabled) |
204 | return 0; | 217 | return 0; |
205 | 218 | ||
206 | return map_vdso(&vdso_image_x32, true); | 219 | return map_vdso(&vdso_image_x32, true); |
207 | } | 220 | } |
208 | #endif | 221 | #endif |
209 | 222 | ||
210 | return load_vdso32(); | 223 | return load_vdso32(); |
211 | } | 224 | } |
212 | #endif | 225 | #endif |
213 | #else | 226 | #else |
214 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | 227 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
215 | { | 228 | { |
216 | return load_vdso32(); | 229 | return load_vdso32(); |
217 | } | 230 | } |
218 | #endif | 231 | #endif |
219 | 232 | ||
220 | #ifdef CONFIG_X86_64 | 233 | #ifdef CONFIG_X86_64 |
221 | static __init int vdso_setup(char *s) | 234 | static __init int vdso_setup(char *s) |
222 | { | 235 | { |
223 | vdso64_enabled = simple_strtoul(s, NULL, 0); | 236 | vdso64_enabled = simple_strtoul(s, NULL, 0); |
224 | return 0; | 237 | return 0; |
225 | } | 238 | } |
226 | __setup("vdso=", vdso_setup); | 239 | __setup("vdso=", vdso_setup); |
227 | #endif | 240 | #endif |
228 | 241 | ||
229 | #ifdef CONFIG_X86_64 | 242 | #ifdef CONFIG_X86_64 |
230 | static void vgetcpu_cpu_init(void *arg) | 243 | static void vgetcpu_cpu_init(void *arg) |
231 | { | 244 | { |
232 | int cpu = smp_processor_id(); | 245 | int cpu = smp_processor_id(); |
233 | struct desc_struct d = { }; | 246 | struct desc_struct d = { }; |
234 | unsigned long node = 0; | 247 | unsigned long node = 0; |
235 | #ifdef CONFIG_NUMA | 248 | #ifdef CONFIG_NUMA |
236 | node = cpu_to_node(cpu); | 249 | node = cpu_to_node(cpu); |
237 | #endif | 250 | #endif |
238 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | 251 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) |
239 | write_rdtscp_aux((node << 12) | cpu); | 252 | write_rdtscp_aux((node << 12) | cpu); |
240 | 253 | ||
241 | /* | 254 | /* |
242 | * Store cpu number in limit so that it can be loaded | 255 | * Store cpu number in limit so that it can be loaded |
243 | * quickly in user space in vgetcpu. (12 bits for the CPU | 256 | * quickly in user space in vgetcpu. (12 bits for the CPU |
244 | * and 8 bits for the node) | 257 | * and 8 bits for the node) |
245 | */ | 258 | */ |
246 | d.limit0 = cpu | ((node & 0xf) << 12); | 259 | d.limit0 = cpu | ((node & 0xf) << 12); |
247 | d.limit = node >> 4; | 260 | d.limit = node >> 4; |
248 | d.type = 5; /* RO data, expand down, accessed */ | 261 | d.type = 5; /* RO data, expand down, accessed */ |
249 | d.dpl = 3; /* Visible to user code */ | 262 | d.dpl = 3; /* Visible to user code */ |
250 | d.s = 1; /* Not a system segment */ | 263 | d.s = 1; /* Not a system segment */ |
251 | d.p = 1; /* Present */ | 264 | d.p = 1; /* Present */ |
252 | d.d = 1; /* 32-bit */ | 265 | d.d = 1; /* 32-bit */ |
253 | 266 | ||
254 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | 267 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); |
255 | } | 268 | } |
256 | 269 | ||
257 | static int | 270 | static int |
258 | vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg) | 271 | vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg) |
259 | { | 272 | { |
260 | long cpu = (long)arg; | 273 | long cpu = (long)arg; |
261 | 274 | ||
262 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 275 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
263 | smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); | 276 | smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); |
264 | 277 | ||
265 | return NOTIFY_DONE; | 278 | return NOTIFY_DONE; |
266 | } | 279 | } |
267 | 280 | ||
268 | static int __init init_vdso(void) | 281 | static int __init init_vdso(void) |
269 | { | 282 | { |
270 | init_vdso_image(&vdso_image_64); | 283 | init_vdso_image(&vdso_image_64); |
271 | 284 | ||
272 | #ifdef CONFIG_X86_X32_ABI | 285 | #ifdef CONFIG_X86_X32_ABI |
273 | init_vdso_image(&vdso_image_x32); | 286 | init_vdso_image(&vdso_image_x32); |
274 | #endif | 287 | #endif |
275 | 288 | ||
276 | cpu_notifier_register_begin(); | 289 | cpu_notifier_register_begin(); |
277 | 290 | ||
278 | on_each_cpu(vgetcpu_cpu_init, NULL, 1); | 291 | on_each_cpu(vgetcpu_cpu_init, NULL, 1); |
279 | /* notifier priority > KVM */ | 292 | /* notifier priority > KVM */ |
280 | __hotcpu_notifier(vgetcpu_cpu_notifier, 30); | 293 | __hotcpu_notifier(vgetcpu_cpu_notifier, 30); |
281 | 294 |