Commit 3eb5b893ebec7325ac9e6b8e4864af89a9ca1ed1
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge branch 'x86-mpx-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 MPX support from Thomas Gleixner: "This enables support for x86 MPX. MPX is a new debug feature for bound checking in user space. It requires kernel support to handle the bound tables and decode the bound violating instruction in the trap handler" * 'x86-mpx-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: asm-generic: Remove asm-generic arch_bprm_mm_init() mm: Make arch_unmap()/bprm_mm_init() available to all architectures x86: Cleanly separate use of asm-generic/mm_hooks.h x86 mpx: Change return type of get_reg_offset() fs: Do not include mpx.h in exec.c x86, mpx: Add documentation on Intel MPX x86, mpx: Cleanup unused bound tables x86, mpx: On-demand kernel allocation of bounds tables x86, mpx: Decode MPX instruction to get bound violation information x86, mpx: Add MPX-specific mmap interface x86, mpx: Introduce VM_MPX to indicate that a VMA is MPX specific x86, mpx: Add MPX to disabled features ia64: Sync struct siginfo with general version mips: Sync struct siginfo with general version mpx: Extend siginfo structure to include bound violation information x86, mpx: Rename cfg_reg_u and status_reg x86: mpx: Give bndX registers actual names x86: Remove arbitrary instruction size limit in instruction decoder
Showing 35 changed files Side-by-side Diff
- Documentation/x86/intel_mpx.txt
- arch/ia64/include/uapi/asm/siginfo.h
- arch/mips/include/uapi/asm/siginfo.h
- arch/s390/include/asm/mmu_context.h
- arch/um/include/asm/mmu_context.h
- arch/unicore32/include/asm/mmu_context.h
- arch/x86/Kconfig
- arch/x86/include/asm/disabled-features.h
- arch/x86/include/asm/insn.h
- arch/x86/include/asm/mmu_context.h
- arch/x86/include/asm/mpx.h
- arch/x86/include/asm/paravirt.h
- arch/x86/include/asm/processor.h
- arch/x86/kernel/cpu/perf_event_intel_ds.c
- arch/x86/kernel/cpu/perf_event_intel_lbr.c
- arch/x86/kernel/kprobes/core.c
- arch/x86/kernel/kprobes/opt.c
- arch/x86/kernel/setup.c
- arch/x86/kernel/traps.c
- arch/x86/kernel/uprobes.c
- arch/x86/lib/insn.c
- arch/x86/mm/Makefile
- arch/x86/mm/mpx.c
- arch/x86/tools/insn_sanity.c
- arch/x86/tools/test_get_len.c
- fs/exec.c
- fs/proc/task_mmu.c
- include/asm-generic/mm_hooks.h
- include/linux/mm.h
- include/linux/mm_types.h
- include/uapi/asm-generic/siginfo.h
- include/uapi/linux/prctl.h
- kernel/signal.c
- kernel/sys.c
- mm/mmap.c
Documentation/x86/intel_mpx.txt
1 | +1. Intel(R) MPX Overview | |
2 | +======================== | |
3 | + | |
4 | +Intel(R) Memory Protection Extensions (Intel(R) MPX) is a new capability | |
5 | +introduced into Intel Architecture. Intel MPX provides hardware features | |
6 | +that can be used in conjunction with compiler changes to check memory | |
7 | +references, for those references whose compile-time normal intentions are | |
8 | +usurped at runtime due to buffer overflow or underflow. | |
9 | + | |
10 | +For more information, please refer to Intel(R) Architecture Instruction | |
11 | +Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection | |
12 | +Extensions. | |
13 | + | |
14 | +Note: Currently no hardware with MPX ISA is available but it is always | |
15 | +possible to use SDE (Intel(R) Software Development Emulator) instead, which | |
16 | +can be downloaded from | |
17 | +http://software.intel.com/en-us/articles/intel-software-development-emulator | |
18 | + | |
19 | + | |
20 | +2. How to get the advantage of MPX | |
21 | +================================== | |
22 | + | |
23 | +For MPX to work, changes are required in the kernel, binutils and compiler. | |
24 | +No source changes are required for applications, just a recompile. | |
25 | + | |
26 | +There are a lot of moving parts of this to all work right. The following | |
27 | +is how we expect the compiler, application and kernel to work together. | |
28 | + | |
29 | +1) Application developer compiles with -fmpx. The compiler will add the | |
30 | + instrumentation as well as some setup code called early after the app | |
31 | + starts. New instruction prefixes are noops for old CPUs. | |
32 | +2) That setup code allocates (virtual) space for the "bounds directory", | |
33 | + points the "bndcfgu" register to the directory and notifies the kernel | |
34 | + (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) that the app will be using | |
35 | + MPX. | |
36 | +3) The kernel detects that the CPU has MPX, allows the new prctl() to | |
37 | + succeed, and notes the location of the bounds directory. Userspace is | |
38 | + expected to keep the bounds directory at that locationWe note it | |
39 | + instead of reading it each time because the 'xsave' operation needed | |
40 | + to access the bounds directory register is an expensive operation. | |
41 | +4) If the application needs to spill bounds out of the 4 registers, it | |
42 | + issues a bndstx instruction. Since the bounds directory is empty at | |
43 | + this point, a bounds fault (#BR) is raised, the kernel allocates a | |
44 | + bounds table (in the user address space) and makes the relevant entry | |
45 | + in the bounds directory point to the new table. | |
46 | +5) If the application violates the bounds specified in the bounds registers, | |
47 | + a separate kind of #BR is raised which will deliver a signal with | |
48 | + information about the violation in the 'struct siginfo'. | |
49 | +6) Whenever memory is freed, we know that it can no longer contain valid | |
50 | + pointers, and we attempt to free the associated space in the bounds | |
51 | + tables. If an entire table becomes unused, we will attempt to free | |
52 | + the table and remove the entry in the directory. | |
53 | + | |
54 | +To summarize, there are essentially three things interacting here: | |
55 | + | |
56 | +GCC with -fmpx: | |
57 | + * enables annotation of code with MPX instructions and prefixes | |
58 | + * inserts code early in the application to call in to the "gcc runtime" | |
59 | +GCC MPX Runtime: | |
60 | + * Checks for hardware MPX support in cpuid leaf | |
61 | + * allocates virtual space for the bounds directory (malloc() essentially) | |
62 | + * points the hardware BNDCFGU register at the directory | |
63 | + * calls a new prctl(PR_MPX_ENABLE_MANAGEMENT) to notify the kernel to | |
64 | + start managing the bounds directories | |
65 | +Kernel MPX Code: | |
66 | + * Checks for hardware MPX support in cpuid leaf | |
67 | + * Handles #BR exceptions and sends SIGSEGV to the app when it violates | |
68 | + bounds, like during a buffer overflow. | |
69 | + * When bounds are spilled in to an unallocated bounds table, the kernel | |
70 | + notices in the #BR exception, allocates the virtual space, then | |
71 | + updates the bounds directory to point to the new table. It keeps | |
72 | + special track of the memory with a VM_MPX flag. | |
73 | + * Frees unused bounds tables at the time that the memory they described | |
74 | + is unmapped. | |
75 | + | |
76 | + | |
77 | +3. How does MPX kernel code work | |
78 | +================================ | |
79 | + | |
80 | +Handling #BR faults caused by MPX | |
81 | +--------------------------------- | |
82 | + | |
83 | +When MPX is enabled, there are 2 new situations that can generate | |
84 | +#BR faults. | |
85 | + * new bounds tables (BT) need to be allocated to save bounds. | |
86 | + * bounds violation caused by MPX instructions. | |
87 | + | |
88 | +We hook #BR handler to handle these two new situations. | |
89 | + | |
90 | +On-demand kernel allocation of bounds tables | |
91 | +-------------------------------------------- | |
92 | + | |
93 | +MPX only has 4 hardware registers for storing bounds information. If | |
94 | +MPX-enabled code needs more than these 4 registers, it needs to spill | |
95 | +them somewhere. It has two special instructions for this which allow | |
96 | +the bounds to be moved between the bounds registers and some new "bounds | |
97 | +tables". | |
98 | + | |
99 | +#BR exceptions are a new class of exceptions just for MPX. They are | |
100 | +similar conceptually to a page fault and will be raised by the MPX | |
101 | +hardware during both bounds violations or when the tables are not | |
102 | +present. The kernel handles those #BR exceptions for not-present tables | |
103 | +by carving the space out of the normal processes address space and then | |
104 | +pointing the bounds-directory over to it. | |
105 | + | |
106 | +The tables need to be accessed and controlled by userspace because | |
107 | +the instructions for moving bounds in and out of them are extremely | |
108 | +frequent. They potentially happen every time a register points to | |
109 | +memory. Any direct kernel involvement (like a syscall) to access the | |
110 | +tables would obviously destroy performance. | |
111 | + | |
112 | +Why not do this in userspace? MPX does not strictly require anything in | |
113 | +the kernel. It can theoretically be done completely from userspace. Here | |
114 | +are a few ways this could be done. We don't think any of them are practical | |
115 | +in the real-world, but here they are. | |
116 | + | |
117 | +Q: Can virtual space simply be reserved for the bounds tables so that we | |
118 | + never have to allocate them? | |
119 | +A: MPX-enabled application will possibly create a lot of bounds tables in | |
120 | + process address space to save bounds information. These tables can take | |
121 | + up huge swaths of memory (as much as 80% of the memory on the system) | |
122 | + even if we clean them up aggressively. In the worst-case scenario, the | |
123 | + tables can be 4x the size of the data structure being tracked. IOW, a | |
124 | + 1-page structure can require 4 bounds-table pages. An X-GB virtual | |
125 | + area needs 4*X GB of virtual space, plus 2GB for the bounds directory. | |
126 | + If we were to preallocate them for the 128TB of user virtual address | |
127 | + space, we would need to reserve 512TB+2GB, which is larger than the | |
128 | + entire virtual address space today. This means they can not be reserved | |
129 | + ahead of time. Also, a single process's pre-popualated bounds directory | |
130 | + consumes 2GB of virtual *AND* physical memory. IOW, it's completely | |
131 | + infeasible to prepopulate bounds directories. | |
132 | + | |
133 | +Q: Can we preallocate bounds table space at the same time memory is | |
134 | + allocated which might contain pointers that might eventually need | |
135 | + bounds tables? | |
136 | +A: This would work if we could hook the site of each and every memory | |
137 | + allocation syscall. This can be done for small, constrained applications. | |
138 | + But, it isn't practical at a larger scale since a given app has no | |
139 | + way of controlling how all the parts of the app might allocate memory | |
140 | + (think libraries). The kernel is really the only place to intercept | |
141 | + these calls. | |
142 | + | |
143 | +Q: Could a bounds fault be handed to userspace and the tables allocated | |
144 | + there in a signal handler intead of in the kernel? | |
145 | +A: mmap() is not on the list of safe async handler functions and even | |
146 | + if mmap() would work it still requires locking or nasty tricks to | |
147 | + keep track of the allocation state there. | |
148 | + | |
149 | +Having ruled out all of the userspace-only approaches for managing | |
150 | +bounds tables that we could think of, we create them on demand in | |
151 | +the kernel. | |
152 | + | |
153 | +Decoding MPX instructions | |
154 | +------------------------- | |
155 | + | |
156 | +If a #BR is generated due to a bounds violation caused by MPX. | |
157 | +We need to decode MPX instructions to get violation address and | |
158 | +set this address into extended struct siginfo. | |
159 | + | |
160 | +The _sigfault feild of struct siginfo is extended as follow: | |
161 | + | |
162 | +87 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ | |
163 | +88 struct { | |
164 | +89 void __user *_addr; /* faulting insn/memory ref. */ | |
165 | +90 #ifdef __ARCH_SI_TRAPNO | |
166 | +91 int _trapno; /* TRAP # which caused the signal */ | |
167 | +92 #endif | |
168 | +93 short _addr_lsb; /* LSB of the reported address */ | |
169 | +94 struct { | |
170 | +95 void __user *_lower; | |
171 | +96 void __user *_upper; | |
172 | +97 } _addr_bnd; | |
173 | +98 } _sigfault; | |
174 | + | |
175 | +The '_addr' field refers to violation address, and new '_addr_and' | |
176 | +field refers to the upper/lower bounds when a #BR is caused. | |
177 | + | |
178 | +Glibc will be also updated to support this new siginfo. So user | |
179 | +can get violation address and bounds when bounds violations occur. | |
180 | + | |
181 | +Cleanup unused bounds tables | |
182 | +---------------------------- | |
183 | + | |
184 | +When a BNDSTX instruction attempts to save bounds to a bounds directory | |
185 | +entry marked as invalid, a #BR is generated. This is an indication that | |
186 | +no bounds table exists for this entry. In this case the fault handler | |
187 | +will allocate a new bounds table on demand. | |
188 | + | |
189 | +Since the kernel allocated those tables on-demand without userspace | |
190 | +knowledge, it is also responsible for freeing them when the associated | |
191 | +mappings go away. | |
192 | + | |
193 | +Here, the solution for this issue is to hook do_munmap() to check | |
194 | +whether one process is MPX enabled. If yes, those bounds tables covered | |
195 | +in the virtual address region which is being unmapped will be freed also. | |
196 | + | |
197 | +Adding new prctl commands | |
198 | +------------------------- | |
199 | + | |
200 | +Two new prctl commands are added to enable and disable MPX bounds tables | |
201 | +management in kernel. | |
202 | + | |
203 | +155 #define PR_MPX_ENABLE_MANAGEMENT 43 | |
204 | +156 #define PR_MPX_DISABLE_MANAGEMENT 44 | |
205 | + | |
206 | +Runtime library in userspace is responsible for allocation of bounds | |
207 | +directory. So kernel have to use XSAVE instruction to get the base | |
208 | +of bounds directory from BNDCFG register. | |
209 | + | |
210 | +But XSAVE is expected to be very expensive. In order to do performance | |
211 | +optimization, we have to get the base of bounds directory and save it | |
212 | +into struct mm_struct to be used in future during PR_MPX_ENABLE_MANAGEMENT | |
213 | +command execution. | |
214 | + | |
215 | + | |
216 | +4. Special rules | |
217 | +================ | |
218 | + | |
219 | +1) If userspace is requesting help from the kernel to do the management | |
220 | +of bounds tables, it may not create or modify entries in the bounds directory. | |
221 | + | |
222 | +Certainly users can allocate bounds tables and forcibly point the bounds | |
223 | +directory at them through XSAVE instruction, and then set valid bit | |
224 | +of bounds entry to have this entry valid. But, the kernel will decline | |
225 | +to assist in managing these tables. | |
226 | + | |
227 | +2) Userspace may not take multiple bounds directory entries and point | |
228 | +them at the same bounds table. | |
229 | + | |
230 | +This is allowed architecturally. See more information "Intel(R) Architecture | |
231 | +Instruction Set Extensions Programming Reference" (9.3.4). | |
232 | + | |
233 | +However, if users did this, the kernel might be fooled in to unmaping an | |
234 | +in-use bounds table since it does not recognize sharing. |
arch/ia64/include/uapi/asm/siginfo.h
... | ... | @@ -63,6 +63,10 @@ |
63 | 63 | unsigned int _flags; /* see below */ |
64 | 64 | unsigned long _isr; /* isr */ |
65 | 65 | short _addr_lsb; /* lsb of faulting address */ |
66 | + struct { | |
67 | + void __user *_lower; | |
68 | + void __user *_upper; | |
69 | + } _addr_bnd; | |
66 | 70 | } _sigfault; |
67 | 71 | |
68 | 72 | /* SIGPOLL */ |
69 | 73 | |
... | ... | @@ -110,9 +114,9 @@ |
110 | 114 | /* |
111 | 115 | * SIGSEGV si_codes |
112 | 116 | */ |
113 | -#define __SEGV_PSTKOVF (__SI_FAULT|3) /* paragraph stack overflow */ | |
117 | +#define __SEGV_PSTKOVF (__SI_FAULT|4) /* paragraph stack overflow */ | |
114 | 118 | #undef NSIGSEGV |
115 | -#define NSIGSEGV 3 | |
119 | +#define NSIGSEGV 4 | |
116 | 120 | |
117 | 121 | #undef NSIGTRAP |
118 | 122 | #define NSIGTRAP 4 |
arch/mips/include/uapi/asm/siginfo.h
arch/s390/include/asm/mmu_context.h
... | ... | @@ -120,5 +120,16 @@ |
120 | 120 | { |
121 | 121 | } |
122 | 122 | |
123 | +static inline void arch_unmap(struct mm_struct *mm, | |
124 | + struct vm_area_struct *vma, | |
125 | + unsigned long start, unsigned long end) | |
126 | +{ | |
127 | +} | |
128 | + | |
129 | +static inline void arch_bprm_mm_init(struct mm_struct *mm, | |
130 | + struct vm_area_struct *vma) | |
131 | +{ | |
132 | +} | |
133 | + | |
123 | 134 | #endif /* __S390_MMU_CONTEXT_H */ |
arch/um/include/asm/mmu_context.h
... | ... | @@ -10,7 +10,26 @@ |
10 | 10 | #include <asm/mmu.h> |
11 | 11 | |
12 | 12 | extern void uml_setup_stubs(struct mm_struct *mm); |
13 | +/* | |
14 | + * Needed since we do not use the asm-generic/mm_hooks.h: | |
15 | + */ | |
16 | +static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |
17 | +{ | |
18 | + uml_setup_stubs(mm); | |
19 | +} | |
13 | 20 | extern void arch_exit_mmap(struct mm_struct *mm); |
21 | +static inline void arch_unmap(struct mm_struct *mm, | |
22 | + struct vm_area_struct *vma, | |
23 | + unsigned long start, unsigned long end) | |
24 | +{ | |
25 | +} | |
26 | +static inline void arch_bprm_mm_init(struct mm_struct *mm, | |
27 | + struct vm_area_struct *vma) | |
28 | +{ | |
29 | +} | |
30 | +/* | |
31 | + * end asm-generic/mm_hooks.h functions | |
32 | + */ | |
14 | 33 | |
15 | 34 | #define deactivate_mm(tsk,mm) do { } while (0) |
16 | 35 | |
... | ... | @@ -39,11 +58,6 @@ |
39 | 58 | if(next != &init_mm) |
40 | 59 | __switch_mm(&next->context.id); |
41 | 60 | } |
42 | -} | |
43 | - | |
44 | -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |
45 | -{ | |
46 | - uml_setup_stubs(mm); | |
47 | 61 | } |
48 | 62 | |
49 | 63 | static inline void enter_lazy_tlb(struct mm_struct *mm, |
arch/unicore32/include/asm/mmu_context.h
... | ... | @@ -86,5 +86,16 @@ |
86 | 86 | { |
87 | 87 | } |
88 | 88 | |
89 | +static inline void arch_unmap(struct mm_struct *mm, | |
90 | + struct vm_area_struct *vma, | |
91 | + unsigned long start, unsigned long end) | |
92 | +{ | |
93 | +} | |
94 | + | |
95 | +static inline void arch_bprm_mm_init(struct mm_struct *mm, | |
96 | + struct vm_area_struct *vma) | |
97 | +{ | |
98 | +} | |
99 | + | |
89 | 100 | #endif |
arch/x86/Kconfig
arch/x86/include/asm/disabled-features.h
... | ... | @@ -10,6 +10,12 @@ |
10 | 10 | * cpu_feature_enabled(). |
11 | 11 | */ |
12 | 12 | |
13 | +#ifdef CONFIG_X86_INTEL_MPX | |
14 | +# define DISABLE_MPX 0 | |
15 | +#else | |
16 | +# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) | |
17 | +#endif | |
18 | + | |
13 | 19 | #ifdef CONFIG_X86_64 |
14 | 20 | # define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) |
15 | 21 | # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) |
... | ... | @@ -34,7 +40,7 @@ |
34 | 40 | #define DISABLED_MASK6 0 |
35 | 41 | #define DISABLED_MASK7 0 |
36 | 42 | #define DISABLED_MASK8 0 |
37 | -#define DISABLED_MASK9 0 | |
43 | +#define DISABLED_MASK9 (DISABLE_MPX) | |
38 | 44 | |
39 | 45 | #endif /* _ASM_X86_DISABLED_FEATURES_H */ |
arch/x86/include/asm/insn.h
... | ... | @@ -65,6 +65,7 @@ |
65 | 65 | unsigned char x86_64; |
66 | 66 | |
67 | 67 | const insn_byte_t *kaddr; /* kernel address of insn to analyze */ |
68 | + const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ | |
68 | 69 | const insn_byte_t *next_byte; |
69 | 70 | }; |
70 | 71 | |
... | ... | @@ -96,7 +97,7 @@ |
96 | 97 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ |
97 | 98 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ |
98 | 99 | |
99 | -extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | |
100 | +extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); | |
100 | 101 | extern void insn_get_prefixes(struct insn *insn); |
101 | 102 | extern void insn_get_opcode(struct insn *insn); |
102 | 103 | extern void insn_get_modrm(struct insn *insn); |
103 | 104 | |
104 | 105 | |
... | ... | @@ -115,12 +116,13 @@ |
115 | 116 | extern int insn_rip_relative(struct insn *insn); |
116 | 117 | |
117 | 118 | /* Init insn for kernel text */ |
118 | -static inline void kernel_insn_init(struct insn *insn, const void *kaddr) | |
119 | +static inline void kernel_insn_init(struct insn *insn, | |
120 | + const void *kaddr, int buf_len) | |
119 | 121 | { |
120 | 122 | #ifdef CONFIG_X86_64 |
121 | - insn_init(insn, kaddr, 1); | |
123 | + insn_init(insn, kaddr, buf_len, 1); | |
122 | 124 | #else /* CONFIG_X86_32 */ |
123 | - insn_init(insn, kaddr, 0); | |
125 | + insn_init(insn, kaddr, buf_len, 0); | |
124 | 126 | #endif |
125 | 127 | } |
126 | 128 |
arch/x86/include/asm/mmu_context.h
... | ... | @@ -10,9 +10,8 @@ |
10 | 10 | #include <asm/pgalloc.h> |
11 | 11 | #include <asm/tlbflush.h> |
12 | 12 | #include <asm/paravirt.h> |
13 | +#include <asm/mpx.h> | |
13 | 14 | #ifndef CONFIG_PARAVIRT |
14 | -#include <asm-generic/mm_hooks.h> | |
15 | - | |
16 | 15 | static inline void paravirt_activate_mm(struct mm_struct *prev, |
17 | 16 | struct mm_struct *next) |
18 | 17 | { |
... | ... | @@ -101,6 +100,29 @@ |
101 | 100 | loadsegment(fs, 0); \ |
102 | 101 | } while (0) |
103 | 102 | #endif |
103 | + | |
104 | +static inline void arch_dup_mmap(struct mm_struct *oldmm, | |
105 | + struct mm_struct *mm) | |
106 | +{ | |
107 | + paravirt_arch_dup_mmap(oldmm, mm); | |
108 | +} | |
109 | + | |
110 | +static inline void arch_exit_mmap(struct mm_struct *mm) | |
111 | +{ | |
112 | + paravirt_arch_exit_mmap(mm); | |
113 | +} | |
114 | + | |
115 | +static inline void arch_bprm_mm_init(struct mm_struct *mm, | |
116 | + struct vm_area_struct *vma) | |
117 | +{ | |
118 | + mpx_mm_init(mm); | |
119 | +} | |
120 | + | |
121 | +static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, | |
122 | + unsigned long start, unsigned long end) | |
123 | +{ | |
124 | + mpx_notify_unmap(mm, vma, start, end); | |
125 | +} | |
104 | 126 | |
105 | 127 | #endif /* _ASM_X86_MMU_CONTEXT_H */ |
arch/x86/include/asm/mpx.h
1 | +#ifndef _ASM_X86_MPX_H | |
2 | +#define _ASM_X86_MPX_H | |
3 | + | |
4 | +#include <linux/types.h> | |
5 | +#include <asm/ptrace.h> | |
6 | +#include <asm/insn.h> | |
7 | + | |
8 | +/* | |
9 | + * NULL is theoretically a valid place to put the bounds | |
10 | + * directory, so point this at an invalid address. | |
11 | + */ | |
12 | +#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) | |
13 | +#define MPX_BNDCFG_ENABLE_FLAG 0x1 | |
14 | +#define MPX_BD_ENTRY_VALID_FLAG 0x1 | |
15 | + | |
16 | +#ifdef CONFIG_X86_64 | |
17 | + | |
18 | +/* upper 28 bits [47:20] of the virtual address in 64-bit used to | |
19 | + * index into bounds directory (BD). | |
20 | + */ | |
21 | +#define MPX_BD_ENTRY_OFFSET 28 | |
22 | +#define MPX_BD_ENTRY_SHIFT 3 | |
23 | +/* bits [19:3] of the virtual address in 64-bit used to index into | |
24 | + * bounds table (BT). | |
25 | + */ | |
26 | +#define MPX_BT_ENTRY_OFFSET 17 | |
27 | +#define MPX_BT_ENTRY_SHIFT 5 | |
28 | +#define MPX_IGN_BITS 3 | |
29 | +#define MPX_BD_ENTRY_TAIL 3 | |
30 | + | |
31 | +#else | |
32 | + | |
33 | +#define MPX_BD_ENTRY_OFFSET 20 | |
34 | +#define MPX_BD_ENTRY_SHIFT 2 | |
35 | +#define MPX_BT_ENTRY_OFFSET 10 | |
36 | +#define MPX_BT_ENTRY_SHIFT 4 | |
37 | +#define MPX_IGN_BITS 2 | |
38 | +#define MPX_BD_ENTRY_TAIL 2 | |
39 | + | |
40 | +#endif | |
41 | + | |
42 | +#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) | |
43 | +#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) | |
44 | + | |
45 | +#define MPX_BNDSTA_TAIL 2 | |
46 | +#define MPX_BNDCFG_TAIL 12 | |
47 | +#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) | |
48 | +#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) | |
49 | +#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1)) | |
50 | + | |
51 | +#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) | |
52 | +#define MPX_BNDSTA_ERROR_CODE 0x3 | |
53 | + | |
54 | +#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1) | |
55 | +#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1) | |
56 | +#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \ | |
57 | + MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) | |
58 | +#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ | |
59 | + MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) | |
60 | + | |
61 | +#ifdef CONFIG_X86_INTEL_MPX | |
62 | +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | |
63 | + struct xsave_struct *xsave_buf); | |
64 | +int mpx_handle_bd_fault(struct xsave_struct *xsave_buf); | |
65 | +static inline int kernel_managing_mpx_tables(struct mm_struct *mm) | |
66 | +{ | |
67 | + return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); | |
68 | +} | |
69 | +static inline void mpx_mm_init(struct mm_struct *mm) | |
70 | +{ | |
71 | + /* | |
72 | + * NULL is theoretically a valid place to put the bounds | |
73 | + * directory, so point this at an invalid address. | |
74 | + */ | |
75 | + mm->bd_addr = MPX_INVALID_BOUNDS_DIR; | |
76 | +} | |
77 | +void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, | |
78 | + unsigned long start, unsigned long end); | |
79 | +#else | |
80 | +static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | |
81 | + struct xsave_struct *xsave_buf) | |
82 | +{ | |
83 | + return NULL; | |
84 | +} | |
85 | +static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) | |
86 | +{ | |
87 | + return -EINVAL; | |
88 | +} | |
89 | +static inline int kernel_managing_mpx_tables(struct mm_struct *mm) | |
90 | +{ | |
91 | + return 0; | |
92 | +} | |
93 | +static inline void mpx_mm_init(struct mm_struct *mm) | |
94 | +{ | |
95 | +} | |
96 | +static inline void mpx_notify_unmap(struct mm_struct *mm, | |
97 | + struct vm_area_struct *vma, | |
98 | + unsigned long start, unsigned long end) | |
99 | +{ | |
100 | +} | |
101 | +#endif /* CONFIG_X86_INTEL_MPX */ | |
102 | + | |
103 | +#endif /* _ASM_X86_MPX_H */ |
arch/x86/include/asm/paravirt.h
... | ... | @@ -330,13 +330,13 @@ |
330 | 330 | PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); |
331 | 331 | } |
332 | 332 | |
333 | -static inline void arch_dup_mmap(struct mm_struct *oldmm, | |
334 | - struct mm_struct *mm) | |
333 | +static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, | |
334 | + struct mm_struct *mm) | |
335 | 335 | { |
336 | 336 | PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); |
337 | 337 | } |
338 | 338 | |
339 | -static inline void arch_exit_mmap(struct mm_struct *mm) | |
339 | +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) | |
340 | 340 | { |
341 | 341 | PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); |
342 | 342 | } |
... | ... | @@ -986,6 +986,16 @@ |
986 | 986 | #endif /* __ASSEMBLY__ */ |
987 | 987 | #else /* CONFIG_PARAVIRT */ |
988 | 988 | # define default_banner x86_init_noop |
989 | +#ifndef __ASSEMBLY__ | |
990 | +static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, | |
991 | + struct mm_struct *mm) | |
992 | +{ | |
993 | +} | |
994 | + | |
995 | +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) | |
996 | +{ | |
997 | +} | |
998 | +#endif /* __ASSEMBLY__ */ | |
989 | 999 | #endif /* !CONFIG_PARAVIRT */ |
990 | 1000 | #endif /* _ASM_X86_PARAVIRT_H */ |
arch/x86/include/asm/processor.h
... | ... | @@ -374,13 +374,14 @@ |
374 | 374 | u8 reserved[128]; |
375 | 375 | }; |
376 | 376 | |
377 | -struct bndregs_struct { | |
378 | - u64 bndregs[8]; | |
377 | +struct bndreg { | |
378 | + u64 lower_bound; | |
379 | + u64 upper_bound; | |
379 | 380 | } __packed; |
380 | 381 | |
381 | -struct bndcsr_struct { | |
382 | - u64 cfg_reg_u; | |
383 | - u64 status_reg; | |
382 | +struct bndcsr { | |
383 | + u64 bndcfgu; | |
384 | + u64 bndstatus; | |
384 | 385 | } __packed; |
385 | 386 | |
386 | 387 | struct xsave_hdr_struct { |
... | ... | @@ -394,8 +395,8 @@ |
394 | 395 | struct xsave_hdr_struct xsave_hdr; |
395 | 396 | struct ymmh_struct ymmh; |
396 | 397 | struct lwp_struct lwp; |
397 | - struct bndregs_struct bndregs; | |
398 | - struct bndcsr_struct bndcsr; | |
398 | + struct bndreg bndreg[4]; | |
399 | + struct bndcsr bndcsr; | |
399 | 400 | /* new processor state extensions will go here */ |
400 | 401 | } __attribute__ ((packed, aligned (64))); |
401 | 402 | |
... | ... | @@ -952,6 +953,24 @@ |
952 | 953 | |
953 | 954 | extern int get_tsc_mode(unsigned long adr); |
954 | 955 | extern int set_tsc_mode(unsigned int val); |
956 | + | |
957 | +/* Register/unregister a process' MPX related resource */ | |
958 | +#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) | |
959 | +#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) | |
960 | + | |
961 | +#ifdef CONFIG_X86_INTEL_MPX | |
962 | +extern int mpx_enable_management(struct task_struct *tsk); | |
963 | +extern int mpx_disable_management(struct task_struct *tsk); | |
964 | +#else | |
965 | +static inline int mpx_enable_management(struct task_struct *tsk) | |
966 | +{ | |
967 | + return -EINVAL; | |
968 | +} | |
969 | +static inline int mpx_disable_management(struct task_struct *tsk) | |
970 | +{ | |
971 | + return -EINVAL; | |
972 | +} | |
973 | +#endif /* CONFIG_X86_INTEL_MPX */ | |
955 | 974 | |
956 | 975 | extern u16 amd_get_nb_id(int cpu); |
957 | 976 |
arch/x86/kernel/cpu/perf_event_intel_ds.c
... | ... | @@ -724,6 +724,7 @@ |
724 | 724 | unsigned long ip = regs->ip; |
725 | 725 | int is_64bit = 0; |
726 | 726 | void *kaddr; |
727 | + int size; | |
727 | 728 | |
728 | 729 | /* |
729 | 730 | * We don't need to fixup if the PEBS assist is fault like |
730 | 731 | |
731 | 732 | |
... | ... | @@ -758,11 +759,12 @@ |
758 | 759 | return 1; |
759 | 760 | } |
760 | 761 | |
762 | + size = ip - to; | |
761 | 763 | if (!kernel_ip(ip)) { |
762 | - int size, bytes; | |
764 | + int bytes; | |
763 | 765 | u8 *buf = this_cpu_read(insn_buffer); |
764 | 766 | |
765 | - size = ip - to; /* Must fit our buffer, see above */ | |
767 | + /* 'size' must fit our buffer, see above */ | |
766 | 768 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); |
767 | 769 | if (bytes != 0) |
768 | 770 | return 0; |
769 | 771 | |
770 | 772 | |
... | ... | @@ -780,11 +782,20 @@ |
780 | 782 | #ifdef CONFIG_X86_64 |
781 | 783 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); |
782 | 784 | #endif |
783 | - insn_init(&insn, kaddr, is_64bit); | |
785 | + insn_init(&insn, kaddr, size, is_64bit); | |
784 | 786 | insn_get_length(&insn); |
787 | + /* | |
788 | + * Make sure there was not a problem decoding the | |
789 | + * instruction and getting the length. This is | |
790 | + * doubly important because we have an infinite | |
791 | + * loop if insn.length=0. | |
792 | + */ | |
793 | + if (!insn.length) | |
794 | + break; | |
785 | 795 | |
786 | 796 | to += insn.length; |
787 | 797 | kaddr += insn.length; |
798 | + size -= insn.length; | |
788 | 799 | } while (to < ip); |
789 | 800 | |
790 | 801 | if (to == ip) { |
arch/x86/kernel/cpu/perf_event_intel_lbr.c
... | ... | @@ -465,7 +465,7 @@ |
465 | 465 | { |
466 | 466 | struct insn insn; |
467 | 467 | void *addr; |
468 | - int bytes, size = MAX_INSN_SIZE; | |
468 | + int bytes_read, bytes_left; | |
469 | 469 | int ret = X86_BR_NONE; |
470 | 470 | int ext, to_plm, from_plm; |
471 | 471 | u8 buf[MAX_INSN_SIZE]; |
... | ... | @@ -493,8 +493,10 @@ |
493 | 493 | return X86_BR_NONE; |
494 | 494 | |
495 | 495 | /* may fail if text not present */ |
496 | - bytes = copy_from_user_nmi(buf, (void __user *)from, size); | |
497 | - if (bytes != 0) | |
496 | + bytes_left = copy_from_user_nmi(buf, (void __user *)from, | |
497 | + MAX_INSN_SIZE); | |
498 | + bytes_read = MAX_INSN_SIZE - bytes_left; | |
499 | + if (!bytes_read) | |
498 | 500 | return X86_BR_NONE; |
499 | 501 | |
500 | 502 | addr = buf; |
501 | 503 | |
502 | 504 | |
... | ... | @@ -505,10 +507,19 @@ |
505 | 507 | * Ensure we don't blindy read any address by validating it is |
506 | 508 | * a known text address. |
507 | 509 | */ |
508 | - if (kernel_text_address(from)) | |
510 | + if (kernel_text_address(from)) { | |
509 | 511 | addr = (void *)from; |
510 | - else | |
512 | + /* | |
513 | + * Assume we can get the maximum possible size | |
514 | + * when grabbing kernel data. This is not | |
515 | + * _strictly_ true since we could possibly be | |
516 | + * executing up next to a memory hole, but | |
517 | + * it is very unlikely to be a problem. | |
518 | + */ | |
519 | + bytes_read = MAX_INSN_SIZE; | |
520 | + } else { | |
511 | 521 | return X86_BR_NONE; |
522 | + } | |
512 | 523 | } |
513 | 524 | |
514 | 525 | /* |
515 | 526 | |
... | ... | @@ -518,8 +529,10 @@ |
518 | 529 | #ifdef CONFIG_X86_64 |
519 | 530 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); |
520 | 531 | #endif |
521 | - insn_init(&insn, addr, is64); | |
532 | + insn_init(&insn, addr, bytes_read, is64); | |
522 | 533 | insn_get_opcode(&insn); |
534 | + if (!insn.opcode.got) | |
535 | + return X86_BR_ABORT; | |
523 | 536 | |
524 | 537 | switch (insn.opcode.bytes[0]) { |
525 | 538 | case 0xf: |
arch/x86/kernel/kprobes/core.c
... | ... | @@ -285,7 +285,7 @@ |
285 | 285 | * normally used, we just go through if there is no kprobe. |
286 | 286 | */ |
287 | 287 | __addr = recover_probed_instruction(buf, addr); |
288 | - kernel_insn_init(&insn, (void *)__addr); | |
288 | + kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); | |
289 | 289 | insn_get_length(&insn); |
290 | 290 | |
291 | 291 | /* |
292 | 292 | |
... | ... | @@ -330,8 +330,10 @@ |
330 | 330 | { |
331 | 331 | struct insn insn; |
332 | 332 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
333 | + unsigned long recovered_insn = | |
334 | + recover_probed_instruction(buf, (unsigned long)src); | |
333 | 335 | |
334 | - kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); | |
336 | + kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); | |
335 | 337 | insn_get_length(&insn); |
336 | 338 | /* Another subsystem puts a breakpoint, failed to recover */ |
337 | 339 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) |
... | ... | @@ -342,7 +344,7 @@ |
342 | 344 | if (insn_rip_relative(&insn)) { |
343 | 345 | s64 newdisp; |
344 | 346 | u8 *disp; |
345 | - kernel_insn_init(&insn, dest); | |
347 | + kernel_insn_init(&insn, dest, insn.length); | |
346 | 348 | insn_get_displacement(&insn); |
347 | 349 | /* |
348 | 350 | * The copied instruction uses the %rip-relative addressing |
arch/x86/kernel/kprobes/opt.c
... | ... | @@ -251,13 +251,15 @@ |
251 | 251 | /* Decode instructions */ |
252 | 252 | addr = paddr - offset; |
253 | 253 | while (addr < paddr - offset + size) { /* Decode until function end */ |
254 | + unsigned long recovered_insn; | |
254 | 255 | if (search_exception_tables(addr)) |
255 | 256 | /* |
256 | 257 | * Since some fixup code will jumps into this function, |
257 | 258 | * we can't optimize kprobe in this function. |
258 | 259 | */ |
259 | 260 | return 0; |
260 | - kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); | |
261 | + recovered_insn = recover_probed_instruction(buf, addr); | |
262 | + kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); | |
261 | 263 | insn_get_length(&insn); |
262 | 264 | /* Another subsystem puts a breakpoint */ |
263 | 265 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) |
arch/x86/kernel/setup.c
... | ... | @@ -960,6 +960,8 @@ |
960 | 960 | init_mm.end_data = (unsigned long) _edata; |
961 | 961 | init_mm.brk = _brk_end; |
962 | 962 | |
963 | + mpx_mm_init(&init_mm); | |
964 | + | |
963 | 965 | code_resource.start = __pa_symbol(_text); |
964 | 966 | code_resource.end = __pa_symbol(_etext)-1; |
965 | 967 | data_resource.start = __pa_symbol(_etext); |
arch/x86/kernel/traps.c
... | ... | @@ -60,6 +60,7 @@ |
60 | 60 | #include <asm/fixmap.h> |
61 | 61 | #include <asm/mach_traps.h> |
62 | 62 | #include <asm/alternative.h> |
63 | +#include <asm/mpx.h> | |
63 | 64 | |
64 | 65 | #ifdef CONFIG_X86_64 |
65 | 66 | #include <asm/x86_init.h> |
... | ... | @@ -228,7 +229,6 @@ |
228 | 229 | |
229 | 230 | DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) |
230 | 231 | DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) |
231 | -DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) | |
232 | 232 | DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) |
233 | 233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) |
234 | 234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) |
... | ... | @@ -285,6 +285,89 @@ |
285 | 285 | die(str, regs, error_code); |
286 | 286 | } |
287 | 287 | #endif |
288 | + | |
289 | +dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |
290 | +{ | |
291 | + struct task_struct *tsk = current; | |
292 | + struct xsave_struct *xsave_buf; | |
293 | + enum ctx_state prev_state; | |
294 | + struct bndcsr *bndcsr; | |
295 | + siginfo_t *info; | |
296 | + | |
297 | + prev_state = exception_enter(); | |
298 | + if (notify_die(DIE_TRAP, "bounds", regs, error_code, | |
299 | + X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) | |
300 | + goto exit; | |
301 | + conditional_sti(regs); | |
302 | + | |
303 | + if (!user_mode(regs)) | |
304 | + die("bounds", regs, error_code); | |
305 | + | |
306 | + if (!cpu_feature_enabled(X86_FEATURE_MPX)) { | |
307 | + /* The exception is not from Intel MPX */ | |
308 | + goto exit_trap; | |
309 | + } | |
310 | + | |
311 | + /* | |
312 | + * We need to look at BNDSTATUS to resolve this exception. | |
313 | + * It is not directly accessible, though, so we need to | |
314 | + * do an xsave and then pull it out of the xsave buffer. | |
315 | + */ | |
316 | + fpu_save_init(&tsk->thread.fpu); | |
317 | + xsave_buf = &(tsk->thread.fpu.state->xsave); | |
318 | + bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); | |
319 | + if (!bndcsr) | |
320 | + goto exit_trap; | |
321 | + | |
322 | + /* | |
323 | + * The error code field of the BNDSTATUS register communicates status | |
324 | + * information of a bound range exception #BR or operation involving | |
325 | + * bound directory. | |
326 | + */ | |
327 | + switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { | |
328 | + case 2: /* Bound directory has invalid entry. */ | |
329 | + if (mpx_handle_bd_fault(xsave_buf)) | |
330 | + goto exit_trap; | |
331 | + break; /* Success, it was handled */ | |
332 | + case 1: /* Bound violation. */ | |
333 | + info = mpx_generate_siginfo(regs, xsave_buf); | |
334 | + if (PTR_ERR(info)) { | |
335 | + /* | |
336 | + * We failed to decode the MPX instruction. Act as if | |
337 | + * the exception was not caused by MPX. | |
338 | + */ | |
339 | + goto exit_trap; | |
340 | + } | |
341 | + /* | |
342 | + * Success, we decoded the instruction and retrieved | |
343 | + * an 'info' containing the address being accessed | |
344 | + * which caused the exception. This information | |
345 | + * allows and application to possibly handle the | |
346 | + * #BR exception itself. | |
347 | + */ | |
348 | + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info); | |
349 | + kfree(info); | |
350 | + break; | |
351 | + case 0: /* No exception caused by Intel MPX operations. */ | |
352 | + goto exit_trap; | |
353 | + default: | |
354 | + die("bounds", regs, error_code); | |
355 | + } | |
356 | + | |
357 | +exit: | |
358 | + exception_exit(prev_state); | |
359 | + return; | |
360 | +exit_trap: | |
361 | + /* | |
362 | + * This path out is for all the cases where we could not | |
363 | + * handle the exception in some way (like allocating a | |
364 | + * table or telling userspace about it. We will also end | |
365 | + * up here if the kernel has MPX turned off at compile | |
366 | + * time.. | |
367 | + */ | |
368 | + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); | |
369 | + exception_exit(prev_state); | |
370 | +} | |
288 | 371 | |
289 | 372 | dotraplinkage void |
290 | 373 | do_general_protection(struct pt_regs *regs, long error_code) |
arch/x86/kernel/uprobes.c
... | ... | @@ -219,7 +219,7 @@ |
219 | 219 | { |
220 | 220 | u32 volatile *good_insns; |
221 | 221 | |
222 | - insn_init(insn, auprobe->insn, x86_64); | |
222 | + insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64); | |
223 | 223 | /* has the side-effect of processing the entire instruction */ |
224 | 224 | insn_get_length(insn); |
225 | 225 | if (WARN_ON_ONCE(!insn_complete(insn))) |
arch/x86/lib/insn.c
... | ... | @@ -28,7 +28,7 @@ |
28 | 28 | |
29 | 29 | /* Verify next sizeof(t) bytes can be on the same instruction */ |
30 | 30 | #define validate_next(t, insn, n) \ |
31 | - ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE) | |
31 | + ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr) | |
32 | 32 | |
33 | 33 | #define __get_next(t, insn) \ |
34 | 34 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) |
35 | 35 | |
... | ... | @@ -50,10 +50,11 @@ |
50 | 50 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) |
51 | 51 | * @x86_64: !0 for 64-bit kernel or 64-bit app |
52 | 52 | */ |
53 | -void insn_init(struct insn *insn, const void *kaddr, int x86_64) | |
53 | +void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) | |
54 | 54 | { |
55 | 55 | memset(insn, 0, sizeof(*insn)); |
56 | 56 | insn->kaddr = kaddr; |
57 | + insn->end_kaddr = kaddr + buf_len; | |
57 | 58 | insn->next_byte = kaddr; |
58 | 59 | insn->x86_64 = x86_64 ? 1 : 0; |
59 | 60 | insn->opnd_bytes = 4; |
arch/x86/mm/Makefile
arch/x86/mm/mpx.c
1 | +/* | |
2 | + * mpx.c - Memory Protection eXtensions | |
3 | + * | |
4 | + * Copyright (c) 2014, Intel Corporation. | |
5 | + * Qiaowei Ren <qiaowei.ren@intel.com> | |
6 | + * Dave Hansen <dave.hansen@intel.com> | |
7 | + */ | |
8 | +#include <linux/kernel.h> | |
9 | +#include <linux/slab.h> | |
10 | +#include <linux/syscalls.h> | |
11 | +#include <linux/sched/sysctl.h> | |
12 | + | |
13 | +#include <asm/i387.h> | |
14 | +#include <asm/insn.h> | |
15 | +#include <asm/mman.h> | |
16 | +#include <asm/mmu_context.h> | |
17 | +#include <asm/mpx.h> | |
18 | +#include <asm/processor.h> | |
19 | +#include <asm/fpu-internal.h> | |
20 | + | |
21 | +static const char *mpx_mapping_name(struct vm_area_struct *vma) | |
22 | +{ | |
23 | + return "[mpx]"; | |
24 | +} | |
25 | + | |
26 | +static struct vm_operations_struct mpx_vma_ops = { | |
27 | + .name = mpx_mapping_name, | |
28 | +}; | |
29 | + | |
30 | +static int is_mpx_vma(struct vm_area_struct *vma) | |
31 | +{ | |
32 | + return (vma->vm_ops == &mpx_vma_ops); | |
33 | +} | |
34 | + | |
35 | +/* | |
36 | + * This is really a simplified "vm_mmap". it only handles MPX | |
37 | + * bounds tables (the bounds directory is user-allocated). | |
38 | + * | |
39 | + * Later on, we use the vma->vm_ops to uniquely identify these | |
40 | + * VMAs. | |
41 | + */ | |
42 | +static unsigned long mpx_mmap(unsigned long len) | |
43 | +{ | |
44 | + unsigned long ret; | |
45 | + unsigned long addr, pgoff; | |
46 | + struct mm_struct *mm = current->mm; | |
47 | + vm_flags_t vm_flags; | |
48 | + struct vm_area_struct *vma; | |
49 | + | |
50 | + /* Only bounds table and bounds directory can be allocated here */ | |
51 | + if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) | |
52 | + return -EINVAL; | |
53 | + | |
54 | + down_write(&mm->mmap_sem); | |
55 | + | |
56 | + /* Too many mappings? */ | |
57 | + if (mm->map_count > sysctl_max_map_count) { | |
58 | + ret = -ENOMEM; | |
59 | + goto out; | |
60 | + } | |
61 | + | |
62 | + /* Obtain the address to map to. we verify (or select) it and ensure | |
63 | + * that it represents a valid section of the address space. | |
64 | + */ | |
65 | + addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE); | |
66 | + if (addr & ~PAGE_MASK) { | |
67 | + ret = addr; | |
68 | + goto out; | |
69 | + } | |
70 | + | |
71 | + vm_flags = VM_READ | VM_WRITE | VM_MPX | | |
72 | + mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | |
73 | + | |
74 | + /* Set pgoff according to addr for anon_vma */ | |
75 | + pgoff = addr >> PAGE_SHIFT; | |
76 | + | |
77 | + ret = mmap_region(NULL, addr, len, vm_flags, pgoff); | |
78 | + if (IS_ERR_VALUE(ret)) | |
79 | + goto out; | |
80 | + | |
81 | + vma = find_vma(mm, ret); | |
82 | + if (!vma) { | |
83 | + ret = -ENOMEM; | |
84 | + goto out; | |
85 | + } | |
86 | + vma->vm_ops = &mpx_vma_ops; | |
87 | + | |
88 | + if (vm_flags & VM_LOCKED) { | |
89 | + up_write(&mm->mmap_sem); | |
90 | + mm_populate(ret, len); | |
91 | + return ret; | |
92 | + } | |
93 | + | |
94 | +out: | |
95 | + up_write(&mm->mmap_sem); | |
96 | + return ret; | |
97 | +} | |
98 | + | |
99 | +enum reg_type { | |
100 | + REG_TYPE_RM = 0, | |
101 | + REG_TYPE_INDEX, | |
102 | + REG_TYPE_BASE, | |
103 | +}; | |
104 | + | |
105 | +static int get_reg_offset(struct insn *insn, struct pt_regs *regs, | |
106 | + enum reg_type type) | |
107 | +{ | |
108 | + int regno = 0; | |
109 | + | |
110 | + static const int regoff[] = { | |
111 | + offsetof(struct pt_regs, ax), | |
112 | + offsetof(struct pt_regs, cx), | |
113 | + offsetof(struct pt_regs, dx), | |
114 | + offsetof(struct pt_regs, bx), | |
115 | + offsetof(struct pt_regs, sp), | |
116 | + offsetof(struct pt_regs, bp), | |
117 | + offsetof(struct pt_regs, si), | |
118 | + offsetof(struct pt_regs, di), | |
119 | +#ifdef CONFIG_X86_64 | |
120 | + offsetof(struct pt_regs, r8), | |
121 | + offsetof(struct pt_regs, r9), | |
122 | + offsetof(struct pt_regs, r10), | |
123 | + offsetof(struct pt_regs, r11), | |
124 | + offsetof(struct pt_regs, r12), | |
125 | + offsetof(struct pt_regs, r13), | |
126 | + offsetof(struct pt_regs, r14), | |
127 | + offsetof(struct pt_regs, r15), | |
128 | +#endif | |
129 | + }; | |
130 | + int nr_registers = ARRAY_SIZE(regoff); | |
131 | + /* | |
132 | + * Don't possibly decode a 32-bit instructions as | |
133 | + * reading a 64-bit-only register. | |
134 | + */ | |
135 | + if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64) | |
136 | + nr_registers -= 8; | |
137 | + | |
138 | + switch (type) { | |
139 | + case REG_TYPE_RM: | |
140 | + regno = X86_MODRM_RM(insn->modrm.value); | |
141 | + if (X86_REX_B(insn->rex_prefix.value) == 1) | |
142 | + regno += 8; | |
143 | + break; | |
144 | + | |
145 | + case REG_TYPE_INDEX: | |
146 | + regno = X86_SIB_INDEX(insn->sib.value); | |
147 | + if (X86_REX_X(insn->rex_prefix.value) == 1) | |
148 | + regno += 8; | |
149 | + break; | |
150 | + | |
151 | + case REG_TYPE_BASE: | |
152 | + regno = X86_SIB_BASE(insn->sib.value); | |
153 | + if (X86_REX_B(insn->rex_prefix.value) == 1) | |
154 | + regno += 8; | |
155 | + break; | |
156 | + | |
157 | + default: | |
158 | + pr_err("invalid register type"); | |
159 | + BUG(); | |
160 | + break; | |
161 | + } | |
162 | + | |
163 | + if (regno > nr_registers) { | |
164 | + WARN_ONCE(1, "decoded an instruction with an invalid register"); | |
165 | + return -EINVAL; | |
166 | + } | |
167 | + return regoff[regno]; | |
168 | +} | |
169 | + | |
170 | +/* | |
171 | + * return the address being referenced be instruction | |
172 | + * for rm=3 returning the content of the rm reg | |
173 | + * for rm!=3 calculates the address using SIB and Disp | |
174 | + */ | |
175 | +static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs) | |
176 | +{ | |
177 | + unsigned long addr, base, indx; | |
178 | + int addr_offset, base_offset, indx_offset; | |
179 | + insn_byte_t sib; | |
180 | + | |
181 | + insn_get_modrm(insn); | |
182 | + insn_get_sib(insn); | |
183 | + sib = insn->sib.value; | |
184 | + | |
185 | + if (X86_MODRM_MOD(insn->modrm.value) == 3) { | |
186 | + addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM); | |
187 | + if (addr_offset < 0) | |
188 | + goto out_err; | |
189 | + addr = regs_get_register(regs, addr_offset); | |
190 | + } else { | |
191 | + if (insn->sib.nbytes) { | |
192 | + base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE); | |
193 | + if (base_offset < 0) | |
194 | + goto out_err; | |
195 | + | |
196 | + indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX); | |
197 | + if (indx_offset < 0) | |
198 | + goto out_err; | |
199 | + | |
200 | + base = regs_get_register(regs, base_offset); | |
201 | + indx = regs_get_register(regs, indx_offset); | |
202 | + addr = base + indx * (1 << X86_SIB_SCALE(sib)); | |
203 | + } else { | |
204 | + addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM); | |
205 | + if (addr_offset < 0) | |
206 | + goto out_err; | |
207 | + addr = regs_get_register(regs, addr_offset); | |
208 | + } | |
209 | + addr += insn->displacement.value; | |
210 | + } | |
211 | + return (void __user *)addr; | |
212 | +out_err: | |
213 | + return (void __user *)-1; | |
214 | +} | |
215 | + | |
216 | +static int mpx_insn_decode(struct insn *insn, | |
217 | + struct pt_regs *regs) | |
218 | +{ | |
219 | + unsigned char buf[MAX_INSN_SIZE]; | |
220 | + int x86_64 = !test_thread_flag(TIF_IA32); | |
221 | + int not_copied; | |
222 | + int nr_copied; | |
223 | + | |
224 | + not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf)); | |
225 | + nr_copied = sizeof(buf) - not_copied; | |
226 | + /* | |
227 | + * The decoder _should_ fail nicely if we pass it a short buffer. | |
228 | + * But, let's not depend on that implementation detail. If we | |
229 | + * did not get anything, just error out now. | |
230 | + */ | |
231 | + if (!nr_copied) | |
232 | + return -EFAULT; | |
233 | + insn_init(insn, buf, nr_copied, x86_64); | |
234 | + insn_get_length(insn); | |
235 | + /* | |
236 | + * copy_from_user() tries to get as many bytes as we could see in | |
237 | + * the largest possible instruction. If the instruction we are | |
238 | + * after is shorter than that _and_ we attempt to copy from | |
239 | + * something unreadable, we might get a short read. This is OK | |
240 | + * as long as the read did not stop in the middle of the | |
241 | + * instruction. Check to see if we got a partial instruction. | |
242 | + */ | |
243 | + if (nr_copied < insn->length) | |
244 | + return -EFAULT; | |
245 | + | |
246 | + insn_get_opcode(insn); | |
247 | + /* | |
248 | + * We only _really_ need to decode bndcl/bndcn/bndcu | |
249 | + * Error out on anything else. | |
250 | + */ | |
251 | + if (insn->opcode.bytes[0] != 0x0f) | |
252 | + goto bad_opcode; | |
253 | + if ((insn->opcode.bytes[1] != 0x1a) && | |
254 | + (insn->opcode.bytes[1] != 0x1b)) | |
255 | + goto bad_opcode; | |
256 | + | |
257 | + return 0; | |
258 | +bad_opcode: | |
259 | + return -EINVAL; | |
260 | +} | |
261 | + | |
262 | +/* | |
263 | + * If a bounds overflow occurs then a #BR is generated. This | |
264 | + * function decodes MPX instructions to get violation address | |
265 | + * and set this address into extended struct siginfo. | |
266 | + * | |
267 | + * Note that this is not a super precise way of doing this. | |
268 | + * Userspace could have, by the time we get here, written | |
269 | + * anything it wants in to the instructions. We can not | |
270 | + * trust anything about it. They might not be valid | |
271 | + * instructions or might encode invalid registers, etc... | |
272 | + * | |
273 | + * The caller is expected to kfree() the returned siginfo_t. | |
274 | + */ | |
275 | +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | |
276 | + struct xsave_struct *xsave_buf) | |
277 | +{ | |
278 | + struct bndreg *bndregs, *bndreg; | |
279 | + siginfo_t *info = NULL; | |
280 | + struct insn insn; | |
281 | + uint8_t bndregno; | |
282 | + int err; | |
283 | + | |
284 | + err = mpx_insn_decode(&insn, regs); | |
285 | + if (err) | |
286 | + goto err_out; | |
287 | + | |
288 | + /* | |
289 | + * We know at this point that we are only dealing with | |
290 | + * MPX instructions. | |
291 | + */ | |
292 | + insn_get_modrm(&insn); | |
293 | + bndregno = X86_MODRM_REG(insn.modrm.value); | |
294 | + if (bndregno > 3) { | |
295 | + err = -EINVAL; | |
296 | + goto err_out; | |
297 | + } | |
298 | + /* get the bndregs _area_ of the xsave structure */ | |
299 | + bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); | |
300 | + if (!bndregs) { | |
301 | + err = -EINVAL; | |
302 | + goto err_out; | |
303 | + } | |
304 | + /* now go select the individual register in the set of 4 */ | |
305 | + bndreg = &bndregs[bndregno]; | |
306 | + | |
307 | + info = kzalloc(sizeof(*info), GFP_KERNEL); | |
308 | + if (!info) { | |
309 | + err = -ENOMEM; | |
310 | + goto err_out; | |
311 | + } | |
312 | + /* | |
313 | + * The registers are always 64-bit, but the upper 32 | |
314 | + * bits are ignored in 32-bit mode. Also, note that the | |
315 | + * upper bounds are architecturally represented in 1's | |
316 | + * complement form. | |
317 | + * | |
318 | + * The 'unsigned long' cast is because the compiler | |
319 | + * complains when casting from integers to different-size | |
320 | + * pointers. | |
321 | + */ | |
322 | + info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound; | |
323 | + info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound; | |
324 | + info->si_addr_lsb = 0; | |
325 | + info->si_signo = SIGSEGV; | |
326 | + info->si_errno = 0; | |
327 | + info->si_code = SEGV_BNDERR; | |
328 | + info->si_addr = mpx_get_addr_ref(&insn, regs); | |
329 | + /* | |
330 | + * We were not able to extract an address from the instruction, | |
331 | + * probably because there was something invalid in it. | |
332 | + */ | |
333 | + if (info->si_addr == (void *)-1) { | |
334 | + err = -EINVAL; | |
335 | + goto err_out; | |
336 | + } | |
337 | + return info; | |
338 | +err_out: | |
339 | + /* info might be NULL, but kfree() handles that */ | |
340 | + kfree(info); | |
341 | + return ERR_PTR(err); | |
342 | +} | |
343 | + | |
344 | +static __user void *task_get_bounds_dir(struct task_struct *tsk) | |
345 | +{ | |
346 | + struct bndcsr *bndcsr; | |
347 | + | |
348 | + if (!cpu_feature_enabled(X86_FEATURE_MPX)) | |
349 | + return MPX_INVALID_BOUNDS_DIR; | |
350 | + | |
351 | + /* | |
352 | + * The bounds directory pointer is stored in a register | |
353 | + * only accessible if we first do an xsave. | |
354 | + */ | |
355 | + fpu_save_init(&tsk->thread.fpu); | |
356 | + bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); | |
357 | + if (!bndcsr) | |
358 | + return MPX_INVALID_BOUNDS_DIR; | |
359 | + | |
360 | + /* | |
361 | + * Make sure the register looks valid by checking the | |
362 | + * enable bit. | |
363 | + */ | |
364 | + if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG)) | |
365 | + return MPX_INVALID_BOUNDS_DIR; | |
366 | + | |
367 | + /* | |
368 | + * Lastly, mask off the low bits used for configuration | |
369 | + * flags, and return the address of the bounds table. | |
370 | + */ | |
371 | + return (void __user *)(unsigned long) | |
372 | + (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); | |
373 | +} | |
374 | + | |
375 | +int mpx_enable_management(struct task_struct *tsk) | |
376 | +{ | |
377 | + void __user *bd_base = MPX_INVALID_BOUNDS_DIR; | |
378 | + struct mm_struct *mm = tsk->mm; | |
379 | + int ret = 0; | |
380 | + | |
381 | + /* | |
382 | + * runtime in the userspace will be responsible for allocation of | |
383 | + * the bounds directory. Then, it will save the base of the bounds | |
384 | + * directory into XSAVE/XRSTOR Save Area and enable MPX through | |
385 | + * XRSTOR instruction. | |
386 | + * | |
387 | + * fpu_xsave() is expected to be very expensive. Storing the bounds | |
388 | + * directory here means that we do not have to do xsave in the unmap | |
389 | + * path; we can just use mm->bd_addr instead. | |
390 | + */ | |
391 | + bd_base = task_get_bounds_dir(tsk); | |
392 | + down_write(&mm->mmap_sem); | |
393 | + mm->bd_addr = bd_base; | |
394 | + if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) | |
395 | + ret = -ENXIO; | |
396 | + | |
397 | + up_write(&mm->mmap_sem); | |
398 | + return ret; | |
399 | +} | |
400 | + | |
401 | +int mpx_disable_management(struct task_struct *tsk) | |
402 | +{ | |
403 | + struct mm_struct *mm = current->mm; | |
404 | + | |
405 | + if (!cpu_feature_enabled(X86_FEATURE_MPX)) | |
406 | + return -ENXIO; | |
407 | + | |
408 | + down_write(&mm->mmap_sem); | |
409 | + mm->bd_addr = MPX_INVALID_BOUNDS_DIR; | |
410 | + up_write(&mm->mmap_sem); | |
411 | + return 0; | |
412 | +} | |
413 | + | |
414 | +/* | |
415 | + * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each | |
416 | + * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, | |
417 | + * and the size of each bounds table is 4MB. | |
418 | + */ | |
419 | +static int allocate_bt(long __user *bd_entry) | |
420 | +{ | |
421 | + unsigned long expected_old_val = 0; | |
422 | + unsigned long actual_old_val = 0; | |
423 | + unsigned long bt_addr; | |
424 | + int ret = 0; | |
425 | + | |
426 | + /* | |
427 | + * Carve the virtual space out of userspace for the new | |
428 | + * bounds table: | |
429 | + */ | |
430 | + bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); | |
431 | + if (IS_ERR((void *)bt_addr)) | |
432 | + return PTR_ERR((void *)bt_addr); | |
433 | + /* | |
434 | + * Set the valid flag (kinda like _PAGE_PRESENT in a pte) | |
435 | + */ | |
436 | + bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG; | |
437 | + | |
438 | + /* | |
439 | + * Go poke the address of the new bounds table in to the | |
440 | + * bounds directory entry out in userspace memory. Note: | |
441 | + * we may race with another CPU instantiating the same table. | |
442 | + * In that case the cmpxchg will see an unexpected | |
443 | + * 'actual_old_val'. | |
444 | + * | |
445 | + * This can fault, but that's OK because we do not hold | |
446 | + * mmap_sem at this point, unlike some of the other part | |
447 | + * of the MPX code that have to pagefault_disable(). | |
448 | + */ | |
449 | + ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, | |
450 | + expected_old_val, bt_addr); | |
451 | + if (ret) | |
452 | + goto out_unmap; | |
453 | + | |
454 | + /* | |
455 | + * The user_atomic_cmpxchg_inatomic() will only return nonzero | |
456 | + * for faults, *not* if the cmpxchg itself fails. Now we must | |
457 | + * verify that the cmpxchg itself completed successfully. | |
458 | + */ | |
459 | + /* | |
460 | + * We expected an empty 'expected_old_val', but instead found | |
461 | + * an apparently valid entry. Assume we raced with another | |
462 | + * thread to instantiate this table and desclare succecss. | |
463 | + */ | |
464 | + if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) { | |
465 | + ret = 0; | |
466 | + goto out_unmap; | |
467 | + } | |
468 | + /* | |
469 | + * We found a non-empty bd_entry but it did not have the | |
470 | + * VALID_FLAG set. Return an error which will result in | |
471 | + * a SEGV since this probably means that somebody scribbled | |
472 | + * some invalid data in to a bounds table. | |
473 | + */ | |
474 | + if (expected_old_val != actual_old_val) { | |
475 | + ret = -EINVAL; | |
476 | + goto out_unmap; | |
477 | + } | |
478 | + return 0; | |
479 | +out_unmap: | |
480 | + vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); | |
481 | + return ret; | |
482 | +} | |
483 | + | |
484 | +/* | |
485 | + * When a BNDSTX instruction attempts to save bounds to a bounds | |
486 | + * table, it will first attempt to look up the table in the | |
487 | + * first-level bounds directory. If it does not find a table in | |
488 | + * the directory, a #BR is generated and we get here in order to | |
489 | + * allocate a new table. | |
490 | + * | |
491 | + * With 32-bit mode, the size of BD is 4MB, and the size of each | |
492 | + * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, | |
493 | + * and the size of each bound table is 4MB. | |
494 | + */ | |
495 | +static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) | |
496 | +{ | |
497 | + unsigned long bd_entry, bd_base; | |
498 | + struct bndcsr *bndcsr; | |
499 | + | |
500 | + bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); | |
501 | + if (!bndcsr) | |
502 | + return -EINVAL; | |
503 | + /* | |
504 | + * Mask off the preserve and enable bits | |
505 | + */ | |
506 | + bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK; | |
507 | + /* | |
508 | + * The hardware provides the address of the missing or invalid | |
509 | + * entry via BNDSTATUS, so we don't have to go look it up. | |
510 | + */ | |
511 | + bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK; | |
512 | + /* | |
513 | + * Make sure the directory entry is within where we think | |
514 | + * the directory is. | |
515 | + */ | |
516 | + if ((bd_entry < bd_base) || | |
517 | + (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) | |
518 | + return -EINVAL; | |
519 | + | |
520 | + return allocate_bt((long __user *)bd_entry); | |
521 | +} | |
522 | + | |
523 | +int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) | |
524 | +{ | |
525 | + /* | |
526 | + * Userspace never asked us to manage the bounds tables, | |
527 | + * so refuse to help. | |
528 | + */ | |
529 | + if (!kernel_managing_mpx_tables(current->mm)) | |
530 | + return -EINVAL; | |
531 | + | |
532 | + if (do_mpx_bt_fault(xsave_buf)) { | |
533 | + force_sig(SIGSEGV, current); | |
534 | + /* | |
535 | + * The force_sig() is essentially "handling" this | |
536 | + * exception, so we do not pass up the error | |
537 | + * from do_mpx_bt_fault(). | |
538 | + */ | |
539 | + } | |
540 | + return 0; | |
541 | +} | |
542 | + | |
543 | +/* | |
544 | + * A thin wrapper around get_user_pages(). Returns 0 if the | |
545 | + * fault was resolved or -errno if not. | |
546 | + */ | |
547 | +static int mpx_resolve_fault(long __user *addr, int write) | |
548 | +{ | |
549 | + long gup_ret; | |
550 | + int nr_pages = 1; | |
551 | + int force = 0; | |
552 | + | |
553 | + gup_ret = get_user_pages(current, current->mm, (unsigned long)addr, | |
554 | + nr_pages, write, force, NULL, NULL); | |
555 | + /* | |
556 | + * get_user_pages() returns number of pages gotten. | |
557 | + * 0 means we failed to fault in and get anything, | |
558 | + * probably because 'addr' is bad. | |
559 | + */ | |
560 | + if (!gup_ret) | |
561 | + return -EFAULT; | |
562 | + /* Other error, return it */ | |
563 | + if (gup_ret < 0) | |
564 | + return gup_ret; | |
565 | + /* must have gup'd a page and gup_ret>0, success */ | |
566 | + return 0; | |
567 | +} | |
568 | + | |
569 | +/* | |
570 | + * Get the base of bounds tables pointed by specific bounds | |
571 | + * directory entry. | |
572 | + */ | |
573 | +static int get_bt_addr(struct mm_struct *mm, | |
574 | + long __user *bd_entry, unsigned long *bt_addr) | |
575 | +{ | |
576 | + int ret; | |
577 | + int valid_bit; | |
578 | + | |
579 | + if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry))) | |
580 | + return -EFAULT; | |
581 | + | |
582 | + while (1) { | |
583 | + int need_write = 0; | |
584 | + | |
585 | + pagefault_disable(); | |
586 | + ret = get_user(*bt_addr, bd_entry); | |
587 | + pagefault_enable(); | |
588 | + if (!ret) | |
589 | + break; | |
590 | + if (ret == -EFAULT) | |
591 | + ret = mpx_resolve_fault(bd_entry, need_write); | |
592 | + /* | |
593 | + * If we could not resolve the fault, consider it | |
594 | + * userspace's fault and error out. | |
595 | + */ | |
596 | + if (ret) | |
597 | + return ret; | |
598 | + } | |
599 | + | |
600 | + valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG; | |
601 | + *bt_addr &= MPX_BT_ADDR_MASK; | |
602 | + | |
603 | + /* | |
604 | + * When the kernel is managing bounds tables, a bounds directory | |
605 | + * entry will either have a valid address (plus the valid bit) | |
606 | + * *OR* be completely empty. If we see a !valid entry *and* some | |
607 | + * data in the address field, we know something is wrong. This | |
608 | + * -EINVAL return will cause a SIGSEGV. | |
609 | + */ | |
610 | + if (!valid_bit && *bt_addr) | |
611 | + return -EINVAL; | |
612 | + /* | |
613 | + * Do we have an completely zeroed bt entry? That is OK. It | |
614 | + * just means there was no bounds table for this memory. Make | |
615 | + * sure to distinguish this from -EINVAL, which will cause | |
616 | + * a SEGV. | |
617 | + */ | |
618 | + if (!valid_bit) | |
619 | + return -ENOENT; | |
620 | + | |
621 | + return 0; | |
622 | +} | |
623 | + | |
624 | +/* | |
625 | + * Free the backing physical pages of bounds table 'bt_addr'. | |
626 | + * Assume start...end is within that bounds table. | |
627 | + */ | |
628 | +static int zap_bt_entries(struct mm_struct *mm, | |
629 | + unsigned long bt_addr, | |
630 | + unsigned long start, unsigned long end) | |
631 | +{ | |
632 | + struct vm_area_struct *vma; | |
633 | + unsigned long addr, len; | |
634 | + | |
635 | + /* | |
636 | + * Find the first overlapping vma. If vma->vm_start > start, there | |
637 | + * will be a hole in the bounds table. This -EINVAL return will | |
638 | + * cause a SIGSEGV. | |
639 | + */ | |
640 | + vma = find_vma(mm, start); | |
641 | + if (!vma || vma->vm_start > start) | |
642 | + return -EINVAL; | |
643 | + | |
644 | + /* | |
645 | + * A NUMA policy on a VM_MPX VMA could cause this bouds table to | |
646 | + * be split. So we need to look across the entire 'start -> end' | |
647 | + * range of this bounds table, find all of the VM_MPX VMAs, and | |
648 | + * zap only those. | |
649 | + */ | |
650 | + addr = start; | |
651 | + while (vma && vma->vm_start < end) { | |
652 | + /* | |
653 | + * We followed a bounds directory entry down | |
654 | + * here. If we find a non-MPX VMA, that's bad, | |
655 | + * so stop immediately and return an error. This | |
656 | + * probably results in a SIGSEGV. | |
657 | + */ | |
658 | + if (!is_mpx_vma(vma)) | |
659 | + return -EINVAL; | |
660 | + | |
661 | + len = min(vma->vm_end, end) - addr; | |
662 | + zap_page_range(vma, addr, len, NULL); | |
663 | + | |
664 | + vma = vma->vm_next; | |
665 | + addr = vma->vm_start; | |
666 | + } | |
667 | + | |
668 | + return 0; | |
669 | +} | |
670 | + | |
671 | +static int unmap_single_bt(struct mm_struct *mm, | |
672 | + long __user *bd_entry, unsigned long bt_addr) | |
673 | +{ | |
674 | + unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; | |
675 | + unsigned long actual_old_val = 0; | |
676 | + int ret; | |
677 | + | |
678 | + while (1) { | |
679 | + int need_write = 1; | |
680 | + | |
681 | + pagefault_disable(); | |
682 | + ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, | |
683 | + expected_old_val, 0); | |
684 | + pagefault_enable(); | |
685 | + if (!ret) | |
686 | + break; | |
687 | + if (ret == -EFAULT) | |
688 | + ret = mpx_resolve_fault(bd_entry, need_write); | |
689 | + /* | |
690 | + * If we could not resolve the fault, consider it | |
691 | + * userspace's fault and error out. | |
692 | + */ | |
693 | + if (ret) | |
694 | + return ret; | |
695 | + } | |
696 | + /* | |
697 | + * The cmpxchg was performed, check the results. | |
698 | + */ | |
699 | + if (actual_old_val != expected_old_val) { | |
700 | + /* | |
701 | + * Someone else raced with us to unmap the table. | |
702 | + * There was no bounds table pointed to by the | |
703 | + * directory, so declare success. Somebody freed | |
704 | + * it. | |
705 | + */ | |
706 | + if (!actual_old_val) | |
707 | + return 0; | |
708 | + /* | |
709 | + * Something messed with the bounds directory | |
710 | + * entry. We hold mmap_sem for read or write | |
711 | + * here, so it could not be a _new_ bounds table | |
712 | + * that someone just allocated. Something is | |
713 | + * wrong, so pass up the error and SIGSEGV. | |
714 | + */ | |
715 | + return -EINVAL; | |
716 | + } | |
717 | + | |
718 | + /* | |
719 | + * Note, we are likely being called under do_munmap() already. To | |
720 | + * avoid recursion, do_munmap() will check whether it comes | |
721 | + * from one bounds table through VM_MPX flag. | |
722 | + */ | |
723 | + return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); | |
724 | +} | |
725 | + | |
726 | +/* | |
727 | + * If the bounds table pointed by bounds directory 'bd_entry' is | |
728 | + * not shared, unmap this whole bounds table. Otherwise, only free | |
729 | + * those backing physical pages of bounds table entries covered | |
730 | + * in this virtual address region start...end. | |
731 | + */ | |
732 | +static int unmap_shared_bt(struct mm_struct *mm, | |
733 | + long __user *bd_entry, unsigned long start, | |
734 | + unsigned long end, bool prev_shared, bool next_shared) | |
735 | +{ | |
736 | + unsigned long bt_addr; | |
737 | + int ret; | |
738 | + | |
739 | + ret = get_bt_addr(mm, bd_entry, &bt_addr); | |
740 | + /* | |
741 | + * We could see an "error" ret for not-present bounds | |
742 | + * tables (not really an error), or actual errors, but | |
743 | + * stop unmapping either way. | |
744 | + */ | |
745 | + if (ret) | |
746 | + return ret; | |
747 | + | |
748 | + if (prev_shared && next_shared) | |
749 | + ret = zap_bt_entries(mm, bt_addr, | |
750 | + bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), | |
751 | + bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); | |
752 | + else if (prev_shared) | |
753 | + ret = zap_bt_entries(mm, bt_addr, | |
754 | + bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), | |
755 | + bt_addr+MPX_BT_SIZE_BYTES); | |
756 | + else if (next_shared) | |
757 | + ret = zap_bt_entries(mm, bt_addr, bt_addr, | |
758 | + bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); | |
759 | + else | |
760 | + ret = unmap_single_bt(mm, bd_entry, bt_addr); | |
761 | + | |
762 | + return ret; | |
763 | +} | |
764 | + | |
765 | +/* | |
766 | + * A virtual address region being munmap()ed might share bounds table | |
767 | + * with adjacent VMAs. We only need to free the backing physical | |
768 | + * memory of these shared bounds tables entries covered in this virtual | |
769 | + * address region. | |
770 | + */ | |
771 | +static int unmap_edge_bts(struct mm_struct *mm, | |
772 | + unsigned long start, unsigned long end) | |
773 | +{ | |
774 | + int ret; | |
775 | + long __user *bde_start, *bde_end; | |
776 | + struct vm_area_struct *prev, *next; | |
777 | + bool prev_shared = false, next_shared = false; | |
778 | + | |
779 | + bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); | |
780 | + bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); | |
781 | + | |
782 | + /* | |
783 | + * Check whether bde_start and bde_end are shared with adjacent | |
784 | + * VMAs. | |
785 | + * | |
786 | + * We already unliked the VMAs from the mm's rbtree so 'start' | |
787 | + * is guaranteed to be in a hole. This gets us the first VMA | |
788 | + * before the hole in to 'prev' and the next VMA after the hole | |
789 | + * in to 'next'. | |
790 | + */ | |
791 | + next = find_vma_prev(mm, start, &prev); | |
792 | + if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1)) | |
793 | + == bde_start) | |
794 | + prev_shared = true; | |
795 | + if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start)) | |
796 | + == bde_end) | |
797 | + next_shared = true; | |
798 | + | |
799 | + /* | |
800 | + * This virtual address region being munmap()ed is only | |
801 | + * covered by one bounds table. | |
802 | + * | |
803 | + * In this case, if this table is also shared with adjacent | |
804 | + * VMAs, only part of the backing physical memory of the bounds | |
805 | + * table need be freeed. Otherwise the whole bounds table need | |
806 | + * be unmapped. | |
807 | + */ | |
808 | + if (bde_start == bde_end) { | |
809 | + return unmap_shared_bt(mm, bde_start, start, end, | |
810 | + prev_shared, next_shared); | |
811 | + } | |
812 | + | |
813 | + /* | |
814 | + * If more than one bounds tables are covered in this virtual | |
815 | + * address region being munmap()ed, we need to separately check | |
816 | + * whether bde_start and bde_end are shared with adjacent VMAs. | |
817 | + */ | |
818 | + ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false); | |
819 | + if (ret) | |
820 | + return ret; | |
821 | + ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared); | |
822 | + if (ret) | |
823 | + return ret; | |
824 | + | |
825 | + return 0; | |
826 | +} | |
827 | + | |
828 | +static int mpx_unmap_tables(struct mm_struct *mm, | |
829 | + unsigned long start, unsigned long end) | |
830 | +{ | |
831 | + int ret; | |
832 | + long __user *bd_entry, *bde_start, *bde_end; | |
833 | + unsigned long bt_addr; | |
834 | + | |
835 | + /* | |
836 | + * "Edge" bounds tables are those which are being used by the region | |
837 | + * (start -> end), but that may be shared with adjacent areas. If they | |
838 | + * turn out to be completely unshared, they will be freed. If they are | |
839 | + * shared, we will free the backing store (like an MADV_DONTNEED) for | |
840 | + * areas used by this region. | |
841 | + */ | |
842 | + ret = unmap_edge_bts(mm, start, end); | |
843 | + switch (ret) { | |
844 | + /* non-present tables are OK */ | |
845 | + case 0: | |
846 | + case -ENOENT: | |
847 | + /* Success, or no tables to unmap */ | |
848 | + break; | |
849 | + case -EINVAL: | |
850 | + case -EFAULT: | |
851 | + default: | |
852 | + return ret; | |
853 | + } | |
854 | + | |
855 | + /* | |
856 | + * Only unmap the bounds table that are | |
857 | + * 1. fully covered | |
858 | + * 2. not at the edges of the mapping, even if full aligned | |
859 | + */ | |
860 | + bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); | |
861 | + bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); | |
862 | + for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { | |
863 | + ret = get_bt_addr(mm, bd_entry, &bt_addr); | |
864 | + switch (ret) { | |
865 | + case 0: | |
866 | + break; | |
867 | + case -ENOENT: | |
868 | + /* No table here, try the next one */ | |
869 | + continue; | |
870 | + case -EINVAL: | |
871 | + case -EFAULT: | |
872 | + default: | |
873 | + /* | |
874 | + * Note: we are being strict here. | |
875 | + * Any time we run in to an issue | |
876 | + * unmapping tables, we stop and | |
877 | + * SIGSEGV. | |
878 | + */ | |
879 | + return ret; | |
880 | + } | |
881 | + | |
882 | + ret = unmap_single_bt(mm, bd_entry, bt_addr); | |
883 | + if (ret) | |
884 | + return ret; | |
885 | + } | |
886 | + | |
887 | + return 0; | |
888 | +} | |
889 | + | |
890 | +/* | |
891 | + * Free unused bounds tables covered in a virtual address region being | |
892 | + * munmap()ed. Assume end > start. | |
893 | + * | |
894 | + * This function will be called by do_munmap(), and the VMAs covering | |
895 | + * the virtual address region start...end have already been split if | |
896 | + * necessary, and the 'vma' is the first vma in this range (start -> end). | |
897 | + */ | |
898 | +void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, | |
899 | + unsigned long start, unsigned long end) | |
900 | +{ | |
901 | + int ret; | |
902 | + | |
903 | + /* | |
904 | + * Refuse to do anything unless userspace has asked | |
905 | + * the kernel to help manage the bounds tables, | |
906 | + */ | |
907 | + if (!kernel_managing_mpx_tables(current->mm)) | |
908 | + return; | |
909 | + /* | |
910 | + * This will look across the entire 'start -> end' range, | |
911 | + * and find all of the non-VM_MPX VMAs. | |
912 | + * | |
913 | + * To avoid recursion, if a VM_MPX vma is found in the range | |
914 | + * (start->end), we will not continue follow-up work. This | |
915 | + * recursion represents having bounds tables for bounds tables, | |
916 | + * which should not occur normally. Being strict about it here | |
917 | + * helps ensure that we do not have an exploitable stack overflow. | |
918 | + */ | |
919 | + do { | |
920 | + if (vma->vm_flags & VM_MPX) | |
921 | + return; | |
922 | + vma = vma->vm_next; | |
923 | + } while (vma && vma->vm_start < end); | |
924 | + | |
925 | + ret = mpx_unmap_tables(mm, start, end); | |
926 | + if (ret) | |
927 | + force_sig(SIGSEGV, current); | |
928 | +} |
arch/x86/tools/insn_sanity.c
arch/x86/tools/test_get_len.c
fs/exec.c
fs/proc/task_mmu.c
... | ... | @@ -552,6 +552,9 @@ |
552 | 552 | [ilog2(VM_GROWSDOWN)] = "gd", |
553 | 553 | [ilog2(VM_PFNMAP)] = "pf", |
554 | 554 | [ilog2(VM_DENYWRITE)] = "dw", |
555 | +#ifdef CONFIG_X86_INTEL_MPX | |
556 | + [ilog2(VM_MPX)] = "mp", | |
557 | +#endif | |
555 | 558 | [ilog2(VM_LOCKED)] = "lo", |
556 | 559 | [ilog2(VM_IO)] = "io", |
557 | 560 | [ilog2(VM_SEQ_READ)] = "sr", |
include/asm-generic/mm_hooks.h
1 | 1 | /* |
2 | - * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap, to | |
3 | - * be included in asm-FOO/mmu_context.h for any arch FOO which doesn't | |
4 | - * need to hook these. | |
2 | + * Define generic no-op hooks for arch_dup_mmap, arch_exit_mmap | |
3 | + * and arch_unmap to be included in asm-FOO/mmu_context.h for any | |
4 | + * arch FOO which doesn't need to hook these. | |
5 | 5 | */ |
6 | 6 | #ifndef _ASM_GENERIC_MM_HOOKS_H |
7 | 7 | #define _ASM_GENERIC_MM_HOOKS_H |
... | ... | @@ -12,6 +12,17 @@ |
12 | 12 | } |
13 | 13 | |
14 | 14 | static inline void arch_exit_mmap(struct mm_struct *mm) |
15 | +{ | |
16 | +} | |
17 | + | |
18 | +static inline void arch_unmap(struct mm_struct *mm, | |
19 | + struct vm_area_struct *vma, | |
20 | + unsigned long start, unsigned long end) | |
21 | +{ | |
22 | +} | |
23 | + | |
24 | +static inline void arch_bprm_mm_init(struct mm_struct *mm, | |
25 | + struct vm_area_struct *vma) | |
15 | 26 | { |
16 | 27 | } |
17 | 28 |
include/linux/mm.h
... | ... | @@ -128,6 +128,7 @@ |
128 | 128 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
129 | 129 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ |
130 | 130 | #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ |
131 | +#define VM_ARCH_2 0x02000000 | |
131 | 132 | #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ |
132 | 133 | |
133 | 134 | #ifdef CONFIG_MEM_SOFT_DIRTY |
... | ... | @@ -153,6 +154,11 @@ |
153 | 154 | # define VM_GROWSUP VM_ARCH_1 |
154 | 155 | #elif !defined(CONFIG_MMU) |
155 | 156 | # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ |
157 | +#endif | |
158 | + | |
159 | +#if defined(CONFIG_X86) | |
160 | +/* MPX specific bounds table or bounds directory */ | |
161 | +# define VM_MPX VM_ARCH_2 | |
156 | 162 | #endif |
157 | 163 | |
158 | 164 | #ifndef VM_GROWSUP |
include/linux/mm_types.h
... | ... | @@ -454,6 +454,10 @@ |
454 | 454 | bool tlb_flush_pending; |
455 | 455 | #endif |
456 | 456 | struct uprobes_state uprobes_state; |
457 | +#ifdef CONFIG_X86_INTEL_MPX | |
458 | + /* address of the bounds directory */ | |
459 | + void __user *bd_addr; | |
460 | +#endif | |
457 | 461 | }; |
458 | 462 | |
459 | 463 | static inline void mm_init_cpumask(struct mm_struct *mm) |
include/uapi/asm-generic/siginfo.h
... | ... | @@ -91,6 +91,10 @@ |
91 | 91 | int _trapno; /* TRAP # which caused the signal */ |
92 | 92 | #endif |
93 | 93 | short _addr_lsb; /* LSB of the reported address */ |
94 | + struct { | |
95 | + void __user *_lower; | |
96 | + void __user *_upper; | |
97 | + } _addr_bnd; | |
94 | 98 | } _sigfault; |
95 | 99 | |
96 | 100 | /* SIGPOLL */ |
... | ... | @@ -131,6 +135,8 @@ |
131 | 135 | #define si_trapno _sifields._sigfault._trapno |
132 | 136 | #endif |
133 | 137 | #define si_addr_lsb _sifields._sigfault._addr_lsb |
138 | +#define si_lower _sifields._sigfault._addr_bnd._lower | |
139 | +#define si_upper _sifields._sigfault._addr_bnd._upper | |
134 | 140 | #define si_band _sifields._sigpoll._band |
135 | 141 | #define si_fd _sifields._sigpoll._fd |
136 | 142 | #ifdef __ARCH_SIGSYS |
... | ... | @@ -199,7 +205,8 @@ |
199 | 205 | */ |
200 | 206 | #define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ |
201 | 207 | #define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ |
202 | -#define NSIGSEGV 2 | |
208 | +#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ | |
209 | +#define NSIGSEGV 3 | |
203 | 210 | |
204 | 211 | /* |
205 | 212 | * SIGBUS si_codes |
include/uapi/linux/prctl.h
... | ... | @@ -179,5 +179,11 @@ |
179 | 179 | #define PR_SET_THP_DISABLE 41 |
180 | 180 | #define PR_GET_THP_DISABLE 42 |
181 | 181 | |
182 | +/* | |
183 | + * Tell the kernel to start/stop helping userspace manage bounds tables. | |
184 | + */ | |
185 | +#define PR_MPX_ENABLE_MANAGEMENT 43 | |
186 | +#define PR_MPX_DISABLE_MANAGEMENT 44 | |
187 | + | |
182 | 188 | #endif /* _LINUX_PRCTL_H */ |
kernel/signal.c
... | ... | @@ -2756,6 +2756,10 @@ |
2756 | 2756 | if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) |
2757 | 2757 | err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); |
2758 | 2758 | #endif |
2759 | +#ifdef SEGV_BNDERR | |
2760 | + err |= __put_user(from->si_lower, &to->si_lower); | |
2761 | + err |= __put_user(from->si_upper, &to->si_upper); | |
2762 | +#endif | |
2759 | 2763 | break; |
2760 | 2764 | case __SI_CHLD: |
2761 | 2765 | err |= __put_user(from->si_pid, &to->si_pid); |
kernel/sys.c
... | ... | @@ -91,6 +91,12 @@ |
91 | 91 | #ifndef SET_TSC_CTL |
92 | 92 | # define SET_TSC_CTL(a) (-EINVAL) |
93 | 93 | #endif |
94 | +#ifndef MPX_ENABLE_MANAGEMENT | |
95 | +# define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) | |
96 | +#endif | |
97 | +#ifndef MPX_DISABLE_MANAGEMENT | |
98 | +# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) | |
99 | +#endif | |
94 | 100 | |
95 | 101 | /* |
96 | 102 | * this is where the system-wide overflow UID and GID are defined, for |
... | ... | @@ -2202,6 +2208,12 @@ |
2202 | 2208 | else |
2203 | 2209 | me->mm->def_flags &= ~VM_NOHUGEPAGE; |
2204 | 2210 | up_write(&me->mm->mmap_sem); |
2211 | + break; | |
2212 | + case PR_MPX_ENABLE_MANAGEMENT: | |
2213 | + error = MPX_ENABLE_MANAGEMENT(me); | |
2214 | + break; | |
2215 | + case PR_MPX_DISABLE_MANAGEMENT: | |
2216 | + error = MPX_DISABLE_MANAGEMENT(me); | |
2205 | 2217 | break; |
2206 | 2218 | default: |
2207 | 2219 | error = -EINVAL; |
mm/mmap.c