Commit 39c715b71740c4a78ba4769fb54826929bac03cb
Committed by
Linus Torvalds
1 parent
84929801e1
Exists in
master
and in
4 other branches
[PATCH] smp_processor_id() cleanup
This patch implements a number of smp_processor_id() cleanup ideas that Arjan van de Ven and I came up with. The previous __smp_processor_id/_smp_processor_id/smp_processor_id API spaghetti was hard to follow both on the implementational and on the usage side. Some of the complexity arose from picking wrong names, some of the complexity comes from the fact that not all architectures defined __smp_processor_id. In the new code, there are two externally visible symbols: - smp_processor_id(): debug variant. - raw_smp_processor_id(): nondebug variant. Replaces all existing uses of _smp_processor_id() and __smp_processor_id(). Defined by every SMP architecture in include/asm-*/smp.h. There is one new internal symbol, dependent on DEBUG_PREEMPT: - debug_smp_processor_id(): internal debug variant, mapped to smp_processor_id(). Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new lib/smp_processor_id.c file. All related comments got updated and/or clarified. I have build/boot tested the following 8 .config combinations on x86: {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT} I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT. (Other architectures are untested, but should work just fine.) Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@infradead.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 37 changed files with 119 additions and 125 deletions Inline Diff
- arch/i386/kernel/traps.c
- arch/i386/lib/delay.c
- arch/ppc/lib/locks.c
- arch/ppc64/kernel/idle.c
- arch/sh/lib/delay.c
- arch/sparc64/lib/delay.c
- arch/x86_64/lib/delay.c
- drivers/acpi/processor_idle.c
- drivers/input/gameport/gameport.c
- drivers/oprofile/buffer_sync.c
- fs/xfs/linux-2.6/xfs_linux.h
- include/asm-alpha/smp.h
- include/asm-arm/smp.h
- include/asm-i386/smp.h
- include/asm-ia64/smp.h
- include/asm-m32r/smp.h
- include/asm-mips/smp.h
- include/asm-parisc/smp.h
- include/asm-ppc/smp.h
- include/asm-ppc64/smp.h
- include/asm-s390/smp.h
- include/asm-sh/smp.h
- include/asm-sparc/smp.h
- include/asm-sparc64/smp.h
- include/asm-um/smp.h
- include/asm-x86_64/smp.h
- include/linux/mmzone.h
- include/linux/smp.h
- include/net/route.h
- include/net/snmp.h
- kernel/module.c
- kernel/power/smp.c
- kernel/sched.c
- kernel/stop_machine.c
- lib/Makefile
- lib/kernel_lock.c
- lib/smp_processor_id.c
arch/i386/kernel/traps.c
1 | /* | 1 | /* |
2 | * linux/arch/i386/traps.c | 2 | * linux/arch/i386/traps.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * | 5 | * |
6 | * Pentium III FXSR, SSE support | 6 | * Pentium III FXSR, SSE support |
7 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 7 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
8 | */ | 8 | */ |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * 'Traps.c' handles hardware traps and faults after we have saved some | 11 | * 'Traps.c' handles hardware traps and faults after we have saved some |
12 | * state in 'asm.s'. | 12 | * state in 'asm.s'. |
13 | */ | 13 | */ |
14 | #include <linux/config.h> | 14 | #include <linux/config.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/string.h> | 17 | #include <linux/string.h> |
18 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
19 | #include <linux/timer.h> | 19 | #include <linux/timer.h> |
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/delay.h> | 22 | #include <linux/delay.h> |
23 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
26 | #include <linux/kallsyms.h> | 26 | #include <linux/kallsyms.h> |
27 | #include <linux/ptrace.h> | 27 | #include <linux/ptrace.h> |
28 | #include <linux/utsname.h> | 28 | #include <linux/utsname.h> |
29 | #include <linux/kprobes.h> | 29 | #include <linux/kprobes.h> |
30 | 30 | ||
31 | #ifdef CONFIG_EISA | 31 | #ifdef CONFIG_EISA |
32 | #include <linux/ioport.h> | 32 | #include <linux/ioport.h> |
33 | #include <linux/eisa.h> | 33 | #include <linux/eisa.h> |
34 | #endif | 34 | #endif |
35 | 35 | ||
36 | #ifdef CONFIG_MCA | 36 | #ifdef CONFIG_MCA |
37 | #include <linux/mca.h> | 37 | #include <linux/mca.h> |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #include <asm/processor.h> | 40 | #include <asm/processor.h> |
41 | #include <asm/system.h> | 41 | #include <asm/system.h> |
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | #include <asm/io.h> | 43 | #include <asm/io.h> |
44 | #include <asm/atomic.h> | 44 | #include <asm/atomic.h> |
45 | #include <asm/debugreg.h> | 45 | #include <asm/debugreg.h> |
46 | #include <asm/desc.h> | 46 | #include <asm/desc.h> |
47 | #include <asm/i387.h> | 47 | #include <asm/i387.h> |
48 | #include <asm/nmi.h> | 48 | #include <asm/nmi.h> |
49 | 49 | ||
50 | #include <asm/smp.h> | 50 | #include <asm/smp.h> |
51 | #include <asm/arch_hooks.h> | 51 | #include <asm/arch_hooks.h> |
52 | #include <asm/kdebug.h> | 52 | #include <asm/kdebug.h> |
53 | 53 | ||
54 | #include <linux/irq.h> | 54 | #include <linux/irq.h> |
55 | #include <linux/module.h> | 55 | #include <linux/module.h> |
56 | 56 | ||
57 | #include "mach_traps.h" | 57 | #include "mach_traps.h" |
58 | 58 | ||
59 | asmlinkage int system_call(void); | 59 | asmlinkage int system_call(void); |
60 | 60 | ||
61 | struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, | 61 | struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, |
62 | { 0, 0 }, { 0, 0 } }; | 62 | { 0, 0 }, { 0, 0 } }; |
63 | 63 | ||
64 | /* Do we ignore FPU interrupts ? */ | 64 | /* Do we ignore FPU interrupts ? */ |
65 | char ignore_fpu_irq = 0; | 65 | char ignore_fpu_irq = 0; |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * The IDT has to be page-aligned to simplify the Pentium | 68 | * The IDT has to be page-aligned to simplify the Pentium |
69 | * F0 0F bug workaround.. We have a special link segment | 69 | * F0 0F bug workaround.. We have a special link segment |
70 | * for this. | 70 | * for this. |
71 | */ | 71 | */ |
72 | struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; | 72 | struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; |
73 | 73 | ||
74 | asmlinkage void divide_error(void); | 74 | asmlinkage void divide_error(void); |
75 | asmlinkage void debug(void); | 75 | asmlinkage void debug(void); |
76 | asmlinkage void nmi(void); | 76 | asmlinkage void nmi(void); |
77 | asmlinkage void int3(void); | 77 | asmlinkage void int3(void); |
78 | asmlinkage void overflow(void); | 78 | asmlinkage void overflow(void); |
79 | asmlinkage void bounds(void); | 79 | asmlinkage void bounds(void); |
80 | asmlinkage void invalid_op(void); | 80 | asmlinkage void invalid_op(void); |
81 | asmlinkage void device_not_available(void); | 81 | asmlinkage void device_not_available(void); |
82 | asmlinkage void coprocessor_segment_overrun(void); | 82 | asmlinkage void coprocessor_segment_overrun(void); |
83 | asmlinkage void invalid_TSS(void); | 83 | asmlinkage void invalid_TSS(void); |
84 | asmlinkage void segment_not_present(void); | 84 | asmlinkage void segment_not_present(void); |
85 | asmlinkage void stack_segment(void); | 85 | asmlinkage void stack_segment(void); |
86 | asmlinkage void general_protection(void); | 86 | asmlinkage void general_protection(void); |
87 | asmlinkage void page_fault(void); | 87 | asmlinkage void page_fault(void); |
88 | asmlinkage void coprocessor_error(void); | 88 | asmlinkage void coprocessor_error(void); |
89 | asmlinkage void simd_coprocessor_error(void); | 89 | asmlinkage void simd_coprocessor_error(void); |
90 | asmlinkage void alignment_check(void); | 90 | asmlinkage void alignment_check(void); |
91 | asmlinkage void spurious_interrupt_bug(void); | 91 | asmlinkage void spurious_interrupt_bug(void); |
92 | asmlinkage void machine_check(void); | 92 | asmlinkage void machine_check(void); |
93 | 93 | ||
94 | static int kstack_depth_to_print = 24; | 94 | static int kstack_depth_to_print = 24; |
95 | struct notifier_block *i386die_chain; | 95 | struct notifier_block *i386die_chain; |
96 | static DEFINE_SPINLOCK(die_notifier_lock); | 96 | static DEFINE_SPINLOCK(die_notifier_lock); |
97 | 97 | ||
98 | int register_die_notifier(struct notifier_block *nb) | 98 | int register_die_notifier(struct notifier_block *nb) |
99 | { | 99 | { |
100 | int err = 0; | 100 | int err = 0; |
101 | unsigned long flags; | 101 | unsigned long flags; |
102 | spin_lock_irqsave(&die_notifier_lock, flags); | 102 | spin_lock_irqsave(&die_notifier_lock, flags); |
103 | err = notifier_chain_register(&i386die_chain, nb); | 103 | err = notifier_chain_register(&i386die_chain, nb); |
104 | spin_unlock_irqrestore(&die_notifier_lock, flags); | 104 | spin_unlock_irqrestore(&die_notifier_lock, flags); |
105 | return err; | 105 | return err; |
106 | } | 106 | } |
107 | 107 | ||
108 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | 108 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) |
109 | { | 109 | { |
110 | return p > (void *)tinfo && | 110 | return p > (void *)tinfo && |
111 | p < (void *)tinfo + THREAD_SIZE - 3; | 111 | p < (void *)tinfo + THREAD_SIZE - 3; |
112 | } | 112 | } |
113 | 113 | ||
114 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | 114 | static inline unsigned long print_context_stack(struct thread_info *tinfo, |
115 | unsigned long *stack, unsigned long ebp) | 115 | unsigned long *stack, unsigned long ebp) |
116 | { | 116 | { |
117 | unsigned long addr; | 117 | unsigned long addr; |
118 | 118 | ||
119 | #ifdef CONFIG_FRAME_POINTER | 119 | #ifdef CONFIG_FRAME_POINTER |
120 | while (valid_stack_ptr(tinfo, (void *)ebp)) { | 120 | while (valid_stack_ptr(tinfo, (void *)ebp)) { |
121 | addr = *(unsigned long *)(ebp + 4); | 121 | addr = *(unsigned long *)(ebp + 4); |
122 | printk(" [<%08lx>] ", addr); | 122 | printk(" [<%08lx>] ", addr); |
123 | print_symbol("%s", addr); | 123 | print_symbol("%s", addr); |
124 | printk("\n"); | 124 | printk("\n"); |
125 | ebp = *(unsigned long *)ebp; | 125 | ebp = *(unsigned long *)ebp; |
126 | } | 126 | } |
127 | #else | 127 | #else |
128 | while (valid_stack_ptr(tinfo, stack)) { | 128 | while (valid_stack_ptr(tinfo, stack)) { |
129 | addr = *stack++; | 129 | addr = *stack++; |
130 | if (__kernel_text_address(addr)) { | 130 | if (__kernel_text_address(addr)) { |
131 | printk(" [<%08lx>]", addr); | 131 | printk(" [<%08lx>]", addr); |
132 | print_symbol(" %s", addr); | 132 | print_symbol(" %s", addr); |
133 | printk("\n"); | 133 | printk("\n"); |
134 | } | 134 | } |
135 | } | 135 | } |
136 | #endif | 136 | #endif |
137 | return ebp; | 137 | return ebp; |
138 | } | 138 | } |
139 | 139 | ||
140 | void show_trace(struct task_struct *task, unsigned long * stack) | 140 | void show_trace(struct task_struct *task, unsigned long * stack) |
141 | { | 141 | { |
142 | unsigned long ebp; | 142 | unsigned long ebp; |
143 | 143 | ||
144 | if (!task) | 144 | if (!task) |
145 | task = current; | 145 | task = current; |
146 | 146 | ||
147 | if (task == current) { | 147 | if (task == current) { |
148 | /* Grab ebp right from our regs */ | 148 | /* Grab ebp right from our regs */ |
149 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); | 149 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); |
150 | } else { | 150 | } else { |
151 | /* ebp is the last reg pushed by switch_to */ | 151 | /* ebp is the last reg pushed by switch_to */ |
152 | ebp = *(unsigned long *) task->thread.esp; | 152 | ebp = *(unsigned long *) task->thread.esp; |
153 | } | 153 | } |
154 | 154 | ||
155 | while (1) { | 155 | while (1) { |
156 | struct thread_info *context; | 156 | struct thread_info *context; |
157 | context = (struct thread_info *) | 157 | context = (struct thread_info *) |
158 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 158 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
159 | ebp = print_context_stack(context, stack, ebp); | 159 | ebp = print_context_stack(context, stack, ebp); |
160 | stack = (unsigned long*)context->previous_esp; | 160 | stack = (unsigned long*)context->previous_esp; |
161 | if (!stack) | 161 | if (!stack) |
162 | break; | 162 | break; |
163 | printk(" =======================\n"); | 163 | printk(" =======================\n"); |
164 | } | 164 | } |
165 | } | 165 | } |
166 | 166 | ||
167 | void show_stack(struct task_struct *task, unsigned long *esp) | 167 | void show_stack(struct task_struct *task, unsigned long *esp) |
168 | { | 168 | { |
169 | unsigned long *stack; | 169 | unsigned long *stack; |
170 | int i; | 170 | int i; |
171 | 171 | ||
172 | if (esp == NULL) { | 172 | if (esp == NULL) { |
173 | if (task) | 173 | if (task) |
174 | esp = (unsigned long*)task->thread.esp; | 174 | esp = (unsigned long*)task->thread.esp; |
175 | else | 175 | else |
176 | esp = (unsigned long *)&esp; | 176 | esp = (unsigned long *)&esp; |
177 | } | 177 | } |
178 | 178 | ||
179 | stack = esp; | 179 | stack = esp; |
180 | for(i = 0; i < kstack_depth_to_print; i++) { | 180 | for(i = 0; i < kstack_depth_to_print; i++) { |
181 | if (kstack_end(stack)) | 181 | if (kstack_end(stack)) |
182 | break; | 182 | break; |
183 | if (i && ((i % 8) == 0)) | 183 | if (i && ((i % 8) == 0)) |
184 | printk("\n "); | 184 | printk("\n "); |
185 | printk("%08lx ", *stack++); | 185 | printk("%08lx ", *stack++); |
186 | } | 186 | } |
187 | printk("\nCall Trace:\n"); | 187 | printk("\nCall Trace:\n"); |
188 | show_trace(task, esp); | 188 | show_trace(task, esp); |
189 | } | 189 | } |
190 | 190 | ||
191 | /* | 191 | /* |
192 | * The architecture-independent dump_stack generator | 192 | * The architecture-independent dump_stack generator |
193 | */ | 193 | */ |
194 | void dump_stack(void) | 194 | void dump_stack(void) |
195 | { | 195 | { |
196 | unsigned long stack; | 196 | unsigned long stack; |
197 | 197 | ||
198 | show_trace(current, &stack); | 198 | show_trace(current, &stack); |
199 | } | 199 | } |
200 | 200 | ||
201 | EXPORT_SYMBOL(dump_stack); | 201 | EXPORT_SYMBOL(dump_stack); |
202 | 202 | ||
203 | void show_registers(struct pt_regs *regs) | 203 | void show_registers(struct pt_regs *regs) |
204 | { | 204 | { |
205 | int i; | 205 | int i; |
206 | int in_kernel = 1; | 206 | int in_kernel = 1; |
207 | unsigned long esp; | 207 | unsigned long esp; |
208 | unsigned short ss; | 208 | unsigned short ss; |
209 | 209 | ||
210 | esp = (unsigned long) (®s->esp); | 210 | esp = (unsigned long) (®s->esp); |
211 | ss = __KERNEL_DS; | 211 | ss = __KERNEL_DS; |
212 | if (regs->xcs & 3) { | 212 | if (regs->xcs & 3) { |
213 | in_kernel = 0; | 213 | in_kernel = 0; |
214 | esp = regs->esp; | 214 | esp = regs->esp; |
215 | ss = regs->xss & 0xffff; | 215 | ss = regs->xss & 0xffff; |
216 | } | 216 | } |
217 | print_modules(); | 217 | print_modules(); |
218 | printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" | 218 | printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" |
219 | " (%s) \n", | 219 | " (%s) \n", |
220 | smp_processor_id(), 0xffff & regs->xcs, regs->eip, | 220 | smp_processor_id(), 0xffff & regs->xcs, regs->eip, |
221 | print_tainted(), regs->eflags, system_utsname.release); | 221 | print_tainted(), regs->eflags, system_utsname.release); |
222 | print_symbol("EIP is at %s\n", regs->eip); | 222 | print_symbol("EIP is at %s\n", regs->eip); |
223 | printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", | 223 | printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", |
224 | regs->eax, regs->ebx, regs->ecx, regs->edx); | 224 | regs->eax, regs->ebx, regs->ecx, regs->edx); |
225 | printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | 225 | printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
226 | regs->esi, regs->edi, regs->ebp, esp); | 226 | regs->esi, regs->edi, regs->ebp, esp); |
227 | printk("ds: %04x es: %04x ss: %04x\n", | 227 | printk("ds: %04x es: %04x ss: %04x\n", |
228 | regs->xds & 0xffff, regs->xes & 0xffff, ss); | 228 | regs->xds & 0xffff, regs->xes & 0xffff, ss); |
229 | printk("Process %s (pid: %d, threadinfo=%p task=%p)", | 229 | printk("Process %s (pid: %d, threadinfo=%p task=%p)", |
230 | current->comm, current->pid, current_thread_info(), current); | 230 | current->comm, current->pid, current_thread_info(), current); |
231 | /* | 231 | /* |
232 | * When in-kernel, we also print out the stack and code at the | 232 | * When in-kernel, we also print out the stack and code at the |
233 | * time of the fault.. | 233 | * time of the fault.. |
234 | */ | 234 | */ |
235 | if (in_kernel) { | 235 | if (in_kernel) { |
236 | u8 *eip; | 236 | u8 *eip; |
237 | 237 | ||
238 | printk("\nStack: "); | 238 | printk("\nStack: "); |
239 | show_stack(NULL, (unsigned long*)esp); | 239 | show_stack(NULL, (unsigned long*)esp); |
240 | 240 | ||
241 | printk("Code: "); | 241 | printk("Code: "); |
242 | 242 | ||
243 | eip = (u8 *)regs->eip - 43; | 243 | eip = (u8 *)regs->eip - 43; |
244 | for (i = 0; i < 64; i++, eip++) { | 244 | for (i = 0; i < 64; i++, eip++) { |
245 | unsigned char c; | 245 | unsigned char c; |
246 | 246 | ||
247 | if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { | 247 | if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { |
248 | printk(" Bad EIP value."); | 248 | printk(" Bad EIP value."); |
249 | break; | 249 | break; |
250 | } | 250 | } |
251 | if (eip == (u8 *)regs->eip) | 251 | if (eip == (u8 *)regs->eip) |
252 | printk("<%02x> ", c); | 252 | printk("<%02x> ", c); |
253 | else | 253 | else |
254 | printk("%02x ", c); | 254 | printk("%02x ", c); |
255 | } | 255 | } |
256 | } | 256 | } |
257 | printk("\n"); | 257 | printk("\n"); |
258 | } | 258 | } |
259 | 259 | ||
260 | static void handle_BUG(struct pt_regs *regs) | 260 | static void handle_BUG(struct pt_regs *regs) |
261 | { | 261 | { |
262 | unsigned short ud2; | 262 | unsigned short ud2; |
263 | unsigned short line; | 263 | unsigned short line; |
264 | char *file; | 264 | char *file; |
265 | char c; | 265 | char c; |
266 | unsigned long eip; | 266 | unsigned long eip; |
267 | 267 | ||
268 | if (regs->xcs & 3) | 268 | if (regs->xcs & 3) |
269 | goto no_bug; /* Not in kernel */ | 269 | goto no_bug; /* Not in kernel */ |
270 | 270 | ||
271 | eip = regs->eip; | 271 | eip = regs->eip; |
272 | 272 | ||
273 | if (eip < PAGE_OFFSET) | 273 | if (eip < PAGE_OFFSET) |
274 | goto no_bug; | 274 | goto no_bug; |
275 | if (__get_user(ud2, (unsigned short *)eip)) | 275 | if (__get_user(ud2, (unsigned short *)eip)) |
276 | goto no_bug; | 276 | goto no_bug; |
277 | if (ud2 != 0x0b0f) | 277 | if (ud2 != 0x0b0f) |
278 | goto no_bug; | 278 | goto no_bug; |
279 | if (__get_user(line, (unsigned short *)(eip + 2))) | 279 | if (__get_user(line, (unsigned short *)(eip + 2))) |
280 | goto bug; | 280 | goto bug; |
281 | if (__get_user(file, (char **)(eip + 4)) || | 281 | if (__get_user(file, (char **)(eip + 4)) || |
282 | (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) | 282 | (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) |
283 | file = "<bad filename>"; | 283 | file = "<bad filename>"; |
284 | 284 | ||
285 | printk("------------[ cut here ]------------\n"); | 285 | printk("------------[ cut here ]------------\n"); |
286 | printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); | 286 | printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); |
287 | 287 | ||
288 | no_bug: | 288 | no_bug: |
289 | return; | 289 | return; |
290 | 290 | ||
291 | /* Here we know it was a BUG but file-n-line is unavailable */ | 291 | /* Here we know it was a BUG but file-n-line is unavailable */ |
292 | bug: | 292 | bug: |
293 | printk("Kernel BUG\n"); | 293 | printk("Kernel BUG\n"); |
294 | } | 294 | } |
295 | 295 | ||
296 | void die(const char * str, struct pt_regs * regs, long err) | 296 | void die(const char * str, struct pt_regs * regs, long err) |
297 | { | 297 | { |
298 | static struct { | 298 | static struct { |
299 | spinlock_t lock; | 299 | spinlock_t lock; |
300 | u32 lock_owner; | 300 | u32 lock_owner; |
301 | int lock_owner_depth; | 301 | int lock_owner_depth; |
302 | } die = { | 302 | } die = { |
303 | .lock = SPIN_LOCK_UNLOCKED, | 303 | .lock = SPIN_LOCK_UNLOCKED, |
304 | .lock_owner = -1, | 304 | .lock_owner = -1, |
305 | .lock_owner_depth = 0 | 305 | .lock_owner_depth = 0 |
306 | }; | 306 | }; |
307 | static int die_counter; | 307 | static int die_counter; |
308 | 308 | ||
309 | if (die.lock_owner != _smp_processor_id()) { | 309 | if (die.lock_owner != raw_smp_processor_id()) { |
310 | console_verbose(); | 310 | console_verbose(); |
311 | spin_lock_irq(&die.lock); | 311 | spin_lock_irq(&die.lock); |
312 | die.lock_owner = smp_processor_id(); | 312 | die.lock_owner = smp_processor_id(); |
313 | die.lock_owner_depth = 0; | 313 | die.lock_owner_depth = 0; |
314 | bust_spinlocks(1); | 314 | bust_spinlocks(1); |
315 | } | 315 | } |
316 | 316 | ||
317 | if (++die.lock_owner_depth < 3) { | 317 | if (++die.lock_owner_depth < 3) { |
318 | int nl = 0; | 318 | int nl = 0; |
319 | handle_BUG(regs); | 319 | handle_BUG(regs); |
320 | printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); | 320 | printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); |
321 | #ifdef CONFIG_PREEMPT | 321 | #ifdef CONFIG_PREEMPT |
322 | printk("PREEMPT "); | 322 | printk("PREEMPT "); |
323 | nl = 1; | 323 | nl = 1; |
324 | #endif | 324 | #endif |
325 | #ifdef CONFIG_SMP | 325 | #ifdef CONFIG_SMP |
326 | printk("SMP "); | 326 | printk("SMP "); |
327 | nl = 1; | 327 | nl = 1; |
328 | #endif | 328 | #endif |
329 | #ifdef CONFIG_DEBUG_PAGEALLOC | 329 | #ifdef CONFIG_DEBUG_PAGEALLOC |
330 | printk("DEBUG_PAGEALLOC"); | 330 | printk("DEBUG_PAGEALLOC"); |
331 | nl = 1; | 331 | nl = 1; |
332 | #endif | 332 | #endif |
333 | if (nl) | 333 | if (nl) |
334 | printk("\n"); | 334 | printk("\n"); |
335 | notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); | 335 | notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); |
336 | show_registers(regs); | 336 | show_registers(regs); |
337 | } else | 337 | } else |
338 | printk(KERN_ERR "Recursive die() failure, output suppressed\n"); | 338 | printk(KERN_ERR "Recursive die() failure, output suppressed\n"); |
339 | 339 | ||
340 | bust_spinlocks(0); | 340 | bust_spinlocks(0); |
341 | die.lock_owner = -1; | 341 | die.lock_owner = -1; |
342 | spin_unlock_irq(&die.lock); | 342 | spin_unlock_irq(&die.lock); |
343 | if (in_interrupt()) | 343 | if (in_interrupt()) |
344 | panic("Fatal exception in interrupt"); | 344 | panic("Fatal exception in interrupt"); |
345 | 345 | ||
346 | if (panic_on_oops) { | 346 | if (panic_on_oops) { |
347 | printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); | 347 | printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); |
348 | ssleep(5); | 348 | ssleep(5); |
349 | panic("Fatal exception"); | 349 | panic("Fatal exception"); |
350 | } | 350 | } |
351 | do_exit(SIGSEGV); | 351 | do_exit(SIGSEGV); |
352 | } | 352 | } |
353 | 353 | ||
354 | static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) | 354 | static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) |
355 | { | 355 | { |
356 | if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) | 356 | if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) |
357 | die(str, regs, err); | 357 | die(str, regs, err); |
358 | } | 358 | } |
359 | 359 | ||
360 | static void do_trap(int trapnr, int signr, char *str, int vm86, | 360 | static void do_trap(int trapnr, int signr, char *str, int vm86, |
361 | struct pt_regs * regs, long error_code, siginfo_t *info) | 361 | struct pt_regs * regs, long error_code, siginfo_t *info) |
362 | { | 362 | { |
363 | if (regs->eflags & VM_MASK) { | 363 | if (regs->eflags & VM_MASK) { |
364 | if (vm86) | 364 | if (vm86) |
365 | goto vm86_trap; | 365 | goto vm86_trap; |
366 | goto trap_signal; | 366 | goto trap_signal; |
367 | } | 367 | } |
368 | 368 | ||
369 | if (!(regs->xcs & 3)) | 369 | if (!(regs->xcs & 3)) |
370 | goto kernel_trap; | 370 | goto kernel_trap; |
371 | 371 | ||
372 | trap_signal: { | 372 | trap_signal: { |
373 | struct task_struct *tsk = current; | 373 | struct task_struct *tsk = current; |
374 | tsk->thread.error_code = error_code; | 374 | tsk->thread.error_code = error_code; |
375 | tsk->thread.trap_no = trapnr; | 375 | tsk->thread.trap_no = trapnr; |
376 | if (info) | 376 | if (info) |
377 | force_sig_info(signr, info, tsk); | 377 | force_sig_info(signr, info, tsk); |
378 | else | 378 | else |
379 | force_sig(signr, tsk); | 379 | force_sig(signr, tsk); |
380 | return; | 380 | return; |
381 | } | 381 | } |
382 | 382 | ||
383 | kernel_trap: { | 383 | kernel_trap: { |
384 | if (!fixup_exception(regs)) | 384 | if (!fixup_exception(regs)) |
385 | die(str, regs, error_code); | 385 | die(str, regs, error_code); |
386 | return; | 386 | return; |
387 | } | 387 | } |
388 | 388 | ||
389 | vm86_trap: { | 389 | vm86_trap: { |
390 | int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); | 390 | int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); |
391 | if (ret) goto trap_signal; | 391 | if (ret) goto trap_signal; |
392 | return; | 392 | return; |
393 | } | 393 | } |
394 | } | 394 | } |
395 | 395 | ||
396 | #define DO_ERROR(trapnr, signr, str, name) \ | 396 | #define DO_ERROR(trapnr, signr, str, name) \ |
397 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 397 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ |
398 | { \ | 398 | { \ |
399 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 399 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
400 | == NOTIFY_STOP) \ | 400 | == NOTIFY_STOP) \ |
401 | return; \ | 401 | return; \ |
402 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ | 402 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ |
403 | } | 403 | } |
404 | 404 | ||
405 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 405 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
406 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 406 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ |
407 | { \ | 407 | { \ |
408 | siginfo_t info; \ | 408 | siginfo_t info; \ |
409 | info.si_signo = signr; \ | 409 | info.si_signo = signr; \ |
410 | info.si_errno = 0; \ | 410 | info.si_errno = 0; \ |
411 | info.si_code = sicode; \ | 411 | info.si_code = sicode; \ |
412 | info.si_addr = (void __user *)siaddr; \ | 412 | info.si_addr = (void __user *)siaddr; \ |
413 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 413 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
414 | == NOTIFY_STOP) \ | 414 | == NOTIFY_STOP) \ |
415 | return; \ | 415 | return; \ |
416 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ | 416 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ |
417 | } | 417 | } |
418 | 418 | ||
419 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ | 419 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ |
420 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 420 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ |
421 | { \ | 421 | { \ |
422 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 422 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
423 | == NOTIFY_STOP) \ | 423 | == NOTIFY_STOP) \ |
424 | return; \ | 424 | return; \ |
425 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ | 425 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ |
426 | } | 426 | } |
427 | 427 | ||
428 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 428 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
429 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | 429 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ |
430 | { \ | 430 | { \ |
431 | siginfo_t info; \ | 431 | siginfo_t info; \ |
432 | info.si_signo = signr; \ | 432 | info.si_signo = signr; \ |
433 | info.si_errno = 0; \ | 433 | info.si_errno = 0; \ |
434 | info.si_code = sicode; \ | 434 | info.si_code = sicode; \ |
435 | info.si_addr = (void __user *)siaddr; \ | 435 | info.si_addr = (void __user *)siaddr; \ |
436 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 436 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
437 | == NOTIFY_STOP) \ | 437 | == NOTIFY_STOP) \ |
438 | return; \ | 438 | return; \ |
439 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | 439 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ |
440 | } | 440 | } |
441 | 441 | ||
442 | DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) | 442 | DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) |
443 | #ifndef CONFIG_KPROBES | 443 | #ifndef CONFIG_KPROBES |
444 | DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) | 444 | DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) |
445 | #endif | 445 | #endif |
446 | DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) | 446 | DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) |
447 | DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) | 447 | DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) |
448 | DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) | 448 | DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) |
449 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 449 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
450 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 450 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
451 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 451 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
452 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | 452 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) |
453 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | 453 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
454 | DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) | 454 | DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) |
455 | 455 | ||
456 | fastcall void do_general_protection(struct pt_regs * regs, long error_code) | 456 | fastcall void do_general_protection(struct pt_regs * regs, long error_code) |
457 | { | 457 | { |
458 | int cpu = get_cpu(); | 458 | int cpu = get_cpu(); |
459 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 459 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
460 | struct thread_struct *thread = ¤t->thread; | 460 | struct thread_struct *thread = ¤t->thread; |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | 463 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an |
464 | * invalid offset set (the LAZY one) and the faulting thread has | 464 | * invalid offset set (the LAZY one) and the faulting thread has |
465 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS | 465 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS |
466 | * and we set the offset field correctly. Then we let the CPU to | 466 | * and we set the offset field correctly. Then we let the CPU to |
467 | * restart the faulting instruction. | 467 | * restart the faulting instruction. |
468 | */ | 468 | */ |
469 | if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | 469 | if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && |
470 | thread->io_bitmap_ptr) { | 470 | thread->io_bitmap_ptr) { |
471 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | 471 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, |
472 | thread->io_bitmap_max); | 472 | thread->io_bitmap_max); |
473 | /* | 473 | /* |
474 | * If the previously set map was extending to higher ports | 474 | * If the previously set map was extending to higher ports |
475 | * than the current one, pad extra space with 0xff (no access). | 475 | * than the current one, pad extra space with 0xff (no access). |
476 | */ | 476 | */ |
477 | if (thread->io_bitmap_max < tss->io_bitmap_max) | 477 | if (thread->io_bitmap_max < tss->io_bitmap_max) |
478 | memset((char *) tss->io_bitmap + | 478 | memset((char *) tss->io_bitmap + |
479 | thread->io_bitmap_max, 0xff, | 479 | thread->io_bitmap_max, 0xff, |
480 | tss->io_bitmap_max - thread->io_bitmap_max); | 480 | tss->io_bitmap_max - thread->io_bitmap_max); |
481 | tss->io_bitmap_max = thread->io_bitmap_max; | 481 | tss->io_bitmap_max = thread->io_bitmap_max; |
482 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | 482 | tss->io_bitmap_base = IO_BITMAP_OFFSET; |
483 | put_cpu(); | 483 | put_cpu(); |
484 | return; | 484 | return; |
485 | } | 485 | } |
486 | put_cpu(); | 486 | put_cpu(); |
487 | 487 | ||
488 | if (regs->eflags & VM_MASK) | 488 | if (regs->eflags & VM_MASK) |
489 | goto gp_in_vm86; | 489 | goto gp_in_vm86; |
490 | 490 | ||
491 | if (!(regs->xcs & 3)) | 491 | if (!(regs->xcs & 3)) |
492 | goto gp_in_kernel; | 492 | goto gp_in_kernel; |
493 | 493 | ||
494 | current->thread.error_code = error_code; | 494 | current->thread.error_code = error_code; |
495 | current->thread.trap_no = 13; | 495 | current->thread.trap_no = 13; |
496 | force_sig(SIGSEGV, current); | 496 | force_sig(SIGSEGV, current); |
497 | return; | 497 | return; |
498 | 498 | ||
499 | gp_in_vm86: | 499 | gp_in_vm86: |
500 | local_irq_enable(); | 500 | local_irq_enable(); |
501 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | 501 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); |
502 | return; | 502 | return; |
503 | 503 | ||
504 | gp_in_kernel: | 504 | gp_in_kernel: |
505 | if (!fixup_exception(regs)) { | 505 | if (!fixup_exception(regs)) { |
506 | if (notify_die(DIE_GPF, "general protection fault", regs, | 506 | if (notify_die(DIE_GPF, "general protection fault", regs, |
507 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 507 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
508 | return; | 508 | return; |
509 | die("general protection fault", regs, error_code); | 509 | die("general protection fault", regs, error_code); |
510 | } | 510 | } |
511 | } | 511 | } |
512 | 512 | ||
513 | static void mem_parity_error(unsigned char reason, struct pt_regs * regs) | 513 | static void mem_parity_error(unsigned char reason, struct pt_regs * regs) |
514 | { | 514 | { |
515 | printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); | 515 | printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); |
516 | printk("You probably have a hardware problem with your RAM chips\n"); | 516 | printk("You probably have a hardware problem with your RAM chips\n"); |
517 | 517 | ||
518 | /* Clear and disable the memory parity error line. */ | 518 | /* Clear and disable the memory parity error line. */ |
519 | clear_mem_error(reason); | 519 | clear_mem_error(reason); |
520 | } | 520 | } |
521 | 521 | ||
522 | static void io_check_error(unsigned char reason, struct pt_regs * regs) | 522 | static void io_check_error(unsigned char reason, struct pt_regs * regs) |
523 | { | 523 | { |
524 | unsigned long i; | 524 | unsigned long i; |
525 | 525 | ||
526 | printk("NMI: IOCK error (debug interrupt?)\n"); | 526 | printk("NMI: IOCK error (debug interrupt?)\n"); |
527 | show_registers(regs); | 527 | show_registers(regs); |
528 | 528 | ||
529 | /* Re-enable the IOCK line, wait for a few seconds */ | 529 | /* Re-enable the IOCK line, wait for a few seconds */ |
530 | reason = (reason & 0xf) | 8; | 530 | reason = (reason & 0xf) | 8; |
531 | outb(reason, 0x61); | 531 | outb(reason, 0x61); |
532 | i = 2000; | 532 | i = 2000; |
533 | while (--i) udelay(1000); | 533 | while (--i) udelay(1000); |
534 | reason &= ~8; | 534 | reason &= ~8; |
535 | outb(reason, 0x61); | 535 | outb(reason, 0x61); |
536 | } | 536 | } |
537 | 537 | ||
538 | static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | 538 | static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) |
539 | { | 539 | { |
540 | #ifdef CONFIG_MCA | 540 | #ifdef CONFIG_MCA |
541 | /* Might actually be able to figure out what the guilty party | 541 | /* Might actually be able to figure out what the guilty party |
542 | * is. */ | 542 | * is. */ |
543 | if( MCA_bus ) { | 543 | if( MCA_bus ) { |
544 | mca_handle_nmi(); | 544 | mca_handle_nmi(); |
545 | return; | 545 | return; |
546 | } | 546 | } |
547 | #endif | 547 | #endif |
548 | printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | 548 | printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", |
549 | reason, smp_processor_id()); | 549 | reason, smp_processor_id()); |
550 | printk("Dazed and confused, but trying to continue\n"); | 550 | printk("Dazed and confused, but trying to continue\n"); |
551 | printk("Do you have a strange power saving mode enabled?\n"); | 551 | printk("Do you have a strange power saving mode enabled?\n"); |
552 | } | 552 | } |
553 | 553 | ||
554 | static DEFINE_SPINLOCK(nmi_print_lock); | 554 | static DEFINE_SPINLOCK(nmi_print_lock); |
555 | 555 | ||
556 | void die_nmi (struct pt_regs *regs, const char *msg) | 556 | void die_nmi (struct pt_regs *regs, const char *msg) |
557 | { | 557 | { |
558 | spin_lock(&nmi_print_lock); | 558 | spin_lock(&nmi_print_lock); |
559 | /* | 559 | /* |
560 | * We are in trouble anyway, lets at least try | 560 | * We are in trouble anyway, lets at least try |
561 | * to get a message out. | 561 | * to get a message out. |
562 | */ | 562 | */ |
563 | bust_spinlocks(1); | 563 | bust_spinlocks(1); |
564 | printk(msg); | 564 | printk(msg); |
565 | printk(" on CPU%d, eip %08lx, registers:\n", | 565 | printk(" on CPU%d, eip %08lx, registers:\n", |
566 | smp_processor_id(), regs->eip); | 566 | smp_processor_id(), regs->eip); |
567 | show_registers(regs); | 567 | show_registers(regs); |
568 | printk("console shuts up ...\n"); | 568 | printk("console shuts up ...\n"); |
569 | console_silent(); | 569 | console_silent(); |
570 | spin_unlock(&nmi_print_lock); | 570 | spin_unlock(&nmi_print_lock); |
571 | bust_spinlocks(0); | 571 | bust_spinlocks(0); |
572 | do_exit(SIGSEGV); | 572 | do_exit(SIGSEGV); |
573 | } | 573 | } |
574 | 574 | ||
575 | static void default_do_nmi(struct pt_regs * regs) | 575 | static void default_do_nmi(struct pt_regs * regs) |
576 | { | 576 | { |
577 | unsigned char reason = 0; | 577 | unsigned char reason = 0; |
578 | 578 | ||
579 | /* Only the BSP gets external NMIs from the system. */ | 579 | /* Only the BSP gets external NMIs from the system. */ |
580 | if (!smp_processor_id()) | 580 | if (!smp_processor_id()) |
581 | reason = get_nmi_reason(); | 581 | reason = get_nmi_reason(); |
582 | 582 | ||
583 | if (!(reason & 0xc0)) { | 583 | if (!(reason & 0xc0)) { |
584 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) | 584 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) |
585 | == NOTIFY_STOP) | 585 | == NOTIFY_STOP) |
586 | return; | 586 | return; |
587 | #ifdef CONFIG_X86_LOCAL_APIC | 587 | #ifdef CONFIG_X86_LOCAL_APIC |
588 | /* | 588 | /* |
589 | * Ok, so this is none of the documented NMI sources, | 589 | * Ok, so this is none of the documented NMI sources, |
590 | * so it must be the NMI watchdog. | 590 | * so it must be the NMI watchdog. |
591 | */ | 591 | */ |
592 | if (nmi_watchdog) { | 592 | if (nmi_watchdog) { |
593 | nmi_watchdog_tick(regs); | 593 | nmi_watchdog_tick(regs); |
594 | return; | 594 | return; |
595 | } | 595 | } |
596 | #endif | 596 | #endif |
597 | unknown_nmi_error(reason, regs); | 597 | unknown_nmi_error(reason, regs); |
598 | return; | 598 | return; |
599 | } | 599 | } |
600 | if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) | 600 | if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) |
601 | return; | 601 | return; |
602 | if (reason & 0x80) | 602 | if (reason & 0x80) |
603 | mem_parity_error(reason, regs); | 603 | mem_parity_error(reason, regs); |
604 | if (reason & 0x40) | 604 | if (reason & 0x40) |
605 | io_check_error(reason, regs); | 605 | io_check_error(reason, regs); |
606 | /* | 606 | /* |
607 | * Reassert NMI in case it became active meanwhile | 607 | * Reassert NMI in case it became active meanwhile |
608 | * as it's edge-triggered. | 608 | * as it's edge-triggered. |
609 | */ | 609 | */ |
610 | reassert_nmi(); | 610 | reassert_nmi(); |
611 | } | 611 | } |
612 | 612 | ||
613 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) | 613 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) |
614 | { | 614 | { |
615 | return 0; | 615 | return 0; |
616 | } | 616 | } |
617 | 617 | ||
618 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | 618 | static nmi_callback_t nmi_callback = dummy_nmi_callback; |
619 | 619 | ||
620 | fastcall void do_nmi(struct pt_regs * regs, long error_code) | 620 | fastcall void do_nmi(struct pt_regs * regs, long error_code) |
621 | { | 621 | { |
622 | int cpu; | 622 | int cpu; |
623 | 623 | ||
624 | nmi_enter(); | 624 | nmi_enter(); |
625 | 625 | ||
626 | cpu = smp_processor_id(); | 626 | cpu = smp_processor_id(); |
627 | ++nmi_count(cpu); | 627 | ++nmi_count(cpu); |
628 | 628 | ||
629 | if (!nmi_callback(regs, cpu)) | 629 | if (!nmi_callback(regs, cpu)) |
630 | default_do_nmi(regs); | 630 | default_do_nmi(regs); |
631 | 631 | ||
632 | nmi_exit(); | 632 | nmi_exit(); |
633 | } | 633 | } |
634 | 634 | ||
635 | void set_nmi_callback(nmi_callback_t callback) | 635 | void set_nmi_callback(nmi_callback_t callback) |
636 | { | 636 | { |
637 | nmi_callback = callback; | 637 | nmi_callback = callback; |
638 | } | 638 | } |
639 | 639 | ||
640 | void unset_nmi_callback(void) | 640 | void unset_nmi_callback(void) |
641 | { | 641 | { |
642 | nmi_callback = dummy_nmi_callback; | 642 | nmi_callback = dummy_nmi_callback; |
643 | } | 643 | } |
644 | 644 | ||
645 | #ifdef CONFIG_KPROBES | 645 | #ifdef CONFIG_KPROBES |
646 | fastcall void do_int3(struct pt_regs *regs, long error_code) | 646 | fastcall void do_int3(struct pt_regs *regs, long error_code) |
647 | { | 647 | { |
648 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 648 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
649 | == NOTIFY_STOP) | 649 | == NOTIFY_STOP) |
650 | return; | 650 | return; |
651 | /* This is an interrupt gate, because kprobes wants interrupts | 651 | /* This is an interrupt gate, because kprobes wants interrupts |
652 | disabled. Normal trap handlers don't. */ | 652 | disabled. Normal trap handlers don't. */ |
653 | restore_interrupts(regs); | 653 | restore_interrupts(regs); |
654 | do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); | 654 | do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); |
655 | } | 655 | } |
656 | #endif | 656 | #endif |
657 | 657 | ||
658 | /* | 658 | /* |
659 | * Our handling of the processor debug registers is non-trivial. | 659 | * Our handling of the processor debug registers is non-trivial. |
660 | * We do not clear them on entry and exit from the kernel. Therefore | 660 | * We do not clear them on entry and exit from the kernel. Therefore |
661 | * it is possible to get a watchpoint trap here from inside the kernel. | 661 | * it is possible to get a watchpoint trap here from inside the kernel. |
662 | * However, the code in ./ptrace.c has ensured that the user can | 662 | * However, the code in ./ptrace.c has ensured that the user can |
663 | * only set watchpoints on userspace addresses. Therefore the in-kernel | 663 | * only set watchpoints on userspace addresses. Therefore the in-kernel |
664 | * watchpoint trap can only occur in code which is reading/writing | 664 | * watchpoint trap can only occur in code which is reading/writing |
665 | * from user space. Such code must not hold kernel locks (since it | 665 | * from user space. Such code must not hold kernel locks (since it |
666 | * can equally take a page fault), therefore it is safe to call | 666 | * can equally take a page fault), therefore it is safe to call |
667 | * force_sig_info even though that claims and releases locks. | 667 | * force_sig_info even though that claims and releases locks. |
668 | * | 668 | * |
669 | * Code in ./signal.c ensures that the debug control register | 669 | * Code in ./signal.c ensures that the debug control register |
670 | * is restored before we deliver any signal, and therefore that | 670 | * is restored before we deliver any signal, and therefore that |
671 | * user code runs with the correct debug control register even though | 671 | * user code runs with the correct debug control register even though |
672 | * we clear it here. | 672 | * we clear it here. |
673 | * | 673 | * |
674 | * Being careful here means that we don't have to be as careful in a | 674 | * Being careful here means that we don't have to be as careful in a |
675 | * lot of more complicated places (task switching can be a bit lazy | 675 | * lot of more complicated places (task switching can be a bit lazy |
676 | * about restoring all the debug state, and ptrace doesn't have to | 676 | * about restoring all the debug state, and ptrace doesn't have to |
677 | * find every occurrence of the TF bit that could be saved away even | 677 | * find every occurrence of the TF bit that could be saved away even |
678 | * by user code) | 678 | * by user code) |
679 | */ | 679 | */ |
680 | fastcall void do_debug(struct pt_regs * regs, long error_code) | 680 | fastcall void do_debug(struct pt_regs * regs, long error_code) |
681 | { | 681 | { |
682 | unsigned int condition; | 682 | unsigned int condition; |
683 | struct task_struct *tsk = current; | 683 | struct task_struct *tsk = current; |
684 | 684 | ||
685 | __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); | 685 | __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); |
686 | 686 | ||
687 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 687 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
688 | SIGTRAP) == NOTIFY_STOP) | 688 | SIGTRAP) == NOTIFY_STOP) |
689 | return; | 689 | return; |
690 | /* It's safe to allow irq's after DR6 has been saved */ | 690 | /* It's safe to allow irq's after DR6 has been saved */ |
691 | if (regs->eflags & X86_EFLAGS_IF) | 691 | if (regs->eflags & X86_EFLAGS_IF) |
692 | local_irq_enable(); | 692 | local_irq_enable(); |
693 | 693 | ||
694 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 694 | /* Mask out spurious debug traps due to lazy DR7 setting */ |
695 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 695 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { |
696 | if (!tsk->thread.debugreg[7]) | 696 | if (!tsk->thread.debugreg[7]) |
697 | goto clear_dr7; | 697 | goto clear_dr7; |
698 | } | 698 | } |
699 | 699 | ||
700 | if (regs->eflags & VM_MASK) | 700 | if (regs->eflags & VM_MASK) |
701 | goto debug_vm86; | 701 | goto debug_vm86; |
702 | 702 | ||
703 | /* Save debug status register where ptrace can see it */ | 703 | /* Save debug status register where ptrace can see it */ |
704 | tsk->thread.debugreg[6] = condition; | 704 | tsk->thread.debugreg[6] = condition; |
705 | 705 | ||
706 | /* | 706 | /* |
707 | * Single-stepping through TF: make sure we ignore any events in | 707 | * Single-stepping through TF: make sure we ignore any events in |
708 | * kernel space (but re-enable TF when returning to user mode). | 708 | * kernel space (but re-enable TF when returning to user mode). |
709 | */ | 709 | */ |
710 | if (condition & DR_STEP) { | 710 | if (condition & DR_STEP) { |
711 | /* | 711 | /* |
712 | * We already checked v86 mode above, so we can | 712 | * We already checked v86 mode above, so we can |
713 | * check for kernel mode by just checking the CPL | 713 | * check for kernel mode by just checking the CPL |
714 | * of CS. | 714 | * of CS. |
715 | */ | 715 | */ |
716 | if ((regs->xcs & 3) == 0) | 716 | if ((regs->xcs & 3) == 0) |
717 | goto clear_TF_reenable; | 717 | goto clear_TF_reenable; |
718 | } | 718 | } |
719 | 719 | ||
720 | /* Ok, finally something we can handle */ | 720 | /* Ok, finally something we can handle */ |
721 | send_sigtrap(tsk, regs, error_code); | 721 | send_sigtrap(tsk, regs, error_code); |
722 | 722 | ||
723 | /* Disable additional traps. They'll be re-enabled when | 723 | /* Disable additional traps. They'll be re-enabled when |
724 | * the signal is delivered. | 724 | * the signal is delivered. |
725 | */ | 725 | */ |
726 | clear_dr7: | 726 | clear_dr7: |
727 | __asm__("movl %0,%%db7" | 727 | __asm__("movl %0,%%db7" |
728 | : /* no output */ | 728 | : /* no output */ |
729 | : "r" (0)); | 729 | : "r" (0)); |
730 | return; | 730 | return; |
731 | 731 | ||
732 | debug_vm86: | 732 | debug_vm86: |
733 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | 733 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); |
734 | return; | 734 | return; |
735 | 735 | ||
736 | clear_TF_reenable: | 736 | clear_TF_reenable: |
737 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 737 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
738 | regs->eflags &= ~TF_MASK; | 738 | regs->eflags &= ~TF_MASK; |
739 | return; | 739 | return; |
740 | } | 740 | } |
741 | 741 | ||
742 | /* | 742 | /* |
743 | * Note that we play around with the 'TS' bit in an attempt to get | 743 | * Note that we play around with the 'TS' bit in an attempt to get |
744 | * the correct behaviour even in the presence of the asynchronous | 744 | * the correct behaviour even in the presence of the asynchronous |
745 | * IRQ13 behaviour | 745 | * IRQ13 behaviour |
746 | */ | 746 | */ |
747 | void math_error(void __user *eip) | 747 | void math_error(void __user *eip) |
748 | { | 748 | { |
749 | struct task_struct * task; | 749 | struct task_struct * task; |
750 | siginfo_t info; | 750 | siginfo_t info; |
751 | unsigned short cwd, swd; | 751 | unsigned short cwd, swd; |
752 | 752 | ||
753 | /* | 753 | /* |
754 | * Save the info for the exception handler and clear the error. | 754 | * Save the info for the exception handler and clear the error. |
755 | */ | 755 | */ |
756 | task = current; | 756 | task = current; |
757 | save_init_fpu(task); | 757 | save_init_fpu(task); |
758 | task->thread.trap_no = 16; | 758 | task->thread.trap_no = 16; |
759 | task->thread.error_code = 0; | 759 | task->thread.error_code = 0; |
760 | info.si_signo = SIGFPE; | 760 | info.si_signo = SIGFPE; |
761 | info.si_errno = 0; | 761 | info.si_errno = 0; |
762 | info.si_code = __SI_FAULT; | 762 | info.si_code = __SI_FAULT; |
763 | info.si_addr = eip; | 763 | info.si_addr = eip; |
764 | /* | 764 | /* |
765 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 765 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
766 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 766 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
767 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 767 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
768 | * fault bit. We should only be taking one exception at a time, | 768 | * fault bit. We should only be taking one exception at a time, |
769 | * so if this combination doesn't produce any single exception, | 769 | * so if this combination doesn't produce any single exception, |
770 | * then we have a bad program that isn't syncronizing its FPU usage | 770 | * then we have a bad program that isn't syncronizing its FPU usage |
771 | * and it will suffer the consequences since we won't be able to | 771 | * and it will suffer the consequences since we won't be able to |
772 | * fully reproduce the context of the exception | 772 | * fully reproduce the context of the exception |
773 | */ | 773 | */ |
774 | cwd = get_fpu_cwd(task); | 774 | cwd = get_fpu_cwd(task); |
775 | swd = get_fpu_swd(task); | 775 | swd = get_fpu_swd(task); |
776 | switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { | 776 | switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { |
777 | case 0x000: | 777 | case 0x000: |
778 | default: | 778 | default: |
779 | break; | 779 | break; |
780 | case 0x001: /* Invalid Op */ | 780 | case 0x001: /* Invalid Op */ |
781 | case 0x041: /* Stack Fault */ | 781 | case 0x041: /* Stack Fault */ |
782 | case 0x241: /* Stack Fault | Direction */ | 782 | case 0x241: /* Stack Fault | Direction */ |
783 | info.si_code = FPE_FLTINV; | 783 | info.si_code = FPE_FLTINV; |
784 | /* Should we clear the SF or let user space do it ???? */ | 784 | /* Should we clear the SF or let user space do it ???? */ |
785 | break; | 785 | break; |
786 | case 0x002: /* Denormalize */ | 786 | case 0x002: /* Denormalize */ |
787 | case 0x010: /* Underflow */ | 787 | case 0x010: /* Underflow */ |
788 | info.si_code = FPE_FLTUND; | 788 | info.si_code = FPE_FLTUND; |
789 | break; | 789 | break; |
790 | case 0x004: /* Zero Divide */ | 790 | case 0x004: /* Zero Divide */ |
791 | info.si_code = FPE_FLTDIV; | 791 | info.si_code = FPE_FLTDIV; |
792 | break; | 792 | break; |
793 | case 0x008: /* Overflow */ | 793 | case 0x008: /* Overflow */ |
794 | info.si_code = FPE_FLTOVF; | 794 | info.si_code = FPE_FLTOVF; |
795 | break; | 795 | break; |
796 | case 0x020: /* Precision */ | 796 | case 0x020: /* Precision */ |
797 | info.si_code = FPE_FLTRES; | 797 | info.si_code = FPE_FLTRES; |
798 | break; | 798 | break; |
799 | } | 799 | } |
800 | force_sig_info(SIGFPE, &info, task); | 800 | force_sig_info(SIGFPE, &info, task); |
801 | } | 801 | } |
802 | 802 | ||
803 | fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) | 803 | fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) |
804 | { | 804 | { |
805 | ignore_fpu_irq = 1; | 805 | ignore_fpu_irq = 1; |
806 | math_error((void __user *)regs->eip); | 806 | math_error((void __user *)regs->eip); |
807 | } | 807 | } |
808 | 808 | ||
809 | static void simd_math_error(void __user *eip) | 809 | static void simd_math_error(void __user *eip) |
810 | { | 810 | { |
811 | struct task_struct * task; | 811 | struct task_struct * task; |
812 | siginfo_t info; | 812 | siginfo_t info; |
813 | unsigned short mxcsr; | 813 | unsigned short mxcsr; |
814 | 814 | ||
815 | /* | 815 | /* |
816 | * Save the info for the exception handler and clear the error. | 816 | * Save the info for the exception handler and clear the error. |
817 | */ | 817 | */ |
818 | task = current; | 818 | task = current; |
819 | save_init_fpu(task); | 819 | save_init_fpu(task); |
820 | task->thread.trap_no = 19; | 820 | task->thread.trap_no = 19; |
821 | task->thread.error_code = 0; | 821 | task->thread.error_code = 0; |
822 | info.si_signo = SIGFPE; | 822 | info.si_signo = SIGFPE; |
823 | info.si_errno = 0; | 823 | info.si_errno = 0; |
824 | info.si_code = __SI_FAULT; | 824 | info.si_code = __SI_FAULT; |
825 | info.si_addr = eip; | 825 | info.si_addr = eip; |
826 | /* | 826 | /* |
827 | * The SIMD FPU exceptions are handled a little differently, as there | 827 | * The SIMD FPU exceptions are handled a little differently, as there |
828 | * is only a single status/control register. Thus, to determine which | 828 | * is only a single status/control register. Thus, to determine which |
829 | * unmasked exception was caught we must mask the exception mask bits | 829 | * unmasked exception was caught we must mask the exception mask bits |
830 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | 830 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. |
831 | */ | 831 | */ |
832 | mxcsr = get_fpu_mxcsr(task); | 832 | mxcsr = get_fpu_mxcsr(task); |
833 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | 833 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { |
834 | case 0x000: | 834 | case 0x000: |
835 | default: | 835 | default: |
836 | break; | 836 | break; |
837 | case 0x001: /* Invalid Op */ | 837 | case 0x001: /* Invalid Op */ |
838 | info.si_code = FPE_FLTINV; | 838 | info.si_code = FPE_FLTINV; |
839 | break; | 839 | break; |
840 | case 0x002: /* Denormalize */ | 840 | case 0x002: /* Denormalize */ |
841 | case 0x010: /* Underflow */ | 841 | case 0x010: /* Underflow */ |
842 | info.si_code = FPE_FLTUND; | 842 | info.si_code = FPE_FLTUND; |
843 | break; | 843 | break; |
844 | case 0x004: /* Zero Divide */ | 844 | case 0x004: /* Zero Divide */ |
845 | info.si_code = FPE_FLTDIV; | 845 | info.si_code = FPE_FLTDIV; |
846 | break; | 846 | break; |
847 | case 0x008: /* Overflow */ | 847 | case 0x008: /* Overflow */ |
848 | info.si_code = FPE_FLTOVF; | 848 | info.si_code = FPE_FLTOVF; |
849 | break; | 849 | break; |
850 | case 0x020: /* Precision */ | 850 | case 0x020: /* Precision */ |
851 | info.si_code = FPE_FLTRES; | 851 | info.si_code = FPE_FLTRES; |
852 | break; | 852 | break; |
853 | } | 853 | } |
854 | force_sig_info(SIGFPE, &info, task); | 854 | force_sig_info(SIGFPE, &info, task); |
855 | } | 855 | } |
856 | 856 | ||
857 | fastcall void do_simd_coprocessor_error(struct pt_regs * regs, | 857 | fastcall void do_simd_coprocessor_error(struct pt_regs * regs, |
858 | long error_code) | 858 | long error_code) |
859 | { | 859 | { |
860 | if (cpu_has_xmm) { | 860 | if (cpu_has_xmm) { |
861 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | 861 | /* Handle SIMD FPU exceptions on PIII+ processors. */ |
862 | ignore_fpu_irq = 1; | 862 | ignore_fpu_irq = 1; |
863 | simd_math_error((void __user *)regs->eip); | 863 | simd_math_error((void __user *)regs->eip); |
864 | } else { | 864 | } else { |
865 | /* | 865 | /* |
866 | * Handle strange cache flush from user space exception | 866 | * Handle strange cache flush from user space exception |
867 | * in all other cases. This is undocumented behaviour. | 867 | * in all other cases. This is undocumented behaviour. |
868 | */ | 868 | */ |
869 | if (regs->eflags & VM_MASK) { | 869 | if (regs->eflags & VM_MASK) { |
870 | handle_vm86_fault((struct kernel_vm86_regs *)regs, | 870 | handle_vm86_fault((struct kernel_vm86_regs *)regs, |
871 | error_code); | 871 | error_code); |
872 | return; | 872 | return; |
873 | } | 873 | } |
874 | die_if_kernel("cache flush denied", regs, error_code); | 874 | die_if_kernel("cache flush denied", regs, error_code); |
875 | current->thread.trap_no = 19; | 875 | current->thread.trap_no = 19; |
876 | current->thread.error_code = error_code; | 876 | current->thread.error_code = error_code; |
877 | force_sig(SIGSEGV, current); | 877 | force_sig(SIGSEGV, current); |
878 | } | 878 | } |
879 | } | 879 | } |
880 | 880 | ||
881 | fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | 881 | fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, |
882 | long error_code) | 882 | long error_code) |
883 | { | 883 | { |
884 | #if 0 | 884 | #if 0 |
885 | /* No need to warn about this any longer. */ | 885 | /* No need to warn about this any longer. */ |
886 | printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | 886 | printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); |
887 | #endif | 887 | #endif |
888 | } | 888 | } |
889 | 889 | ||
890 | fastcall void setup_x86_bogus_stack(unsigned char * stk) | 890 | fastcall void setup_x86_bogus_stack(unsigned char * stk) |
891 | { | 891 | { |
892 | unsigned long *switch16_ptr, *switch32_ptr; | 892 | unsigned long *switch16_ptr, *switch32_ptr; |
893 | struct pt_regs *regs; | 893 | struct pt_regs *regs; |
894 | unsigned long stack_top, stack_bot; | 894 | unsigned long stack_top, stack_bot; |
895 | unsigned short iret_frame16_off; | 895 | unsigned short iret_frame16_off; |
896 | int cpu = smp_processor_id(); | 896 | int cpu = smp_processor_id(); |
897 | /* reserve the space on 32bit stack for the magic switch16 pointer */ | 897 | /* reserve the space on 32bit stack for the magic switch16 pointer */ |
898 | memmove(stk, stk + 8, sizeof(struct pt_regs)); | 898 | memmove(stk, stk + 8, sizeof(struct pt_regs)); |
899 | switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); | 899 | switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); |
900 | regs = (struct pt_regs *)stk; | 900 | regs = (struct pt_regs *)stk; |
901 | /* now the switch32 on 16bit stack */ | 901 | /* now the switch32 on 16bit stack */ |
902 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | 902 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); |
903 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | 903 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; |
904 | switch32_ptr = (unsigned long *)(stack_top - 8); | 904 | switch32_ptr = (unsigned long *)(stack_top - 8); |
905 | iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; | 905 | iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; |
906 | /* copy iret frame on 16bit stack */ | 906 | /* copy iret frame on 16bit stack */ |
907 | memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); | 907 | memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); |
908 | /* fill in the switch pointers */ | 908 | /* fill in the switch pointers */ |
909 | switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; | 909 | switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; |
910 | switch16_ptr[1] = __ESPFIX_SS; | 910 | switch16_ptr[1] = __ESPFIX_SS; |
911 | switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + | 911 | switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + |
912 | 8 - CPU_16BIT_STACK_SIZE; | 912 | 8 - CPU_16BIT_STACK_SIZE; |
913 | switch32_ptr[1] = __KERNEL_DS; | 913 | switch32_ptr[1] = __KERNEL_DS; |
914 | } | 914 | } |
915 | 915 | ||
916 | fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) | 916 | fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) |
917 | { | 917 | { |
918 | unsigned long *switch32_ptr; | 918 | unsigned long *switch32_ptr; |
919 | unsigned char *stack16, *stack32; | 919 | unsigned char *stack16, *stack32; |
920 | unsigned long stack_top, stack_bot; | 920 | unsigned long stack_top, stack_bot; |
921 | int len; | 921 | int len; |
922 | int cpu = smp_processor_id(); | 922 | int cpu = smp_processor_id(); |
923 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | 923 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); |
924 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | 924 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; |
925 | switch32_ptr = (unsigned long *)(stack_top - 8); | 925 | switch32_ptr = (unsigned long *)(stack_top - 8); |
926 | /* copy the data from 16bit stack to 32bit stack */ | 926 | /* copy the data from 16bit stack to 32bit stack */ |
927 | len = CPU_16BIT_STACK_SIZE - 8 - sp; | 927 | len = CPU_16BIT_STACK_SIZE - 8 - sp; |
928 | stack16 = (unsigned char *)(stack_bot + sp); | 928 | stack16 = (unsigned char *)(stack_bot + sp); |
929 | stack32 = (unsigned char *) | 929 | stack32 = (unsigned char *) |
930 | (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); | 930 | (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); |
931 | memcpy(stack32, stack16, len); | 931 | memcpy(stack32, stack16, len); |
932 | return stack32; | 932 | return stack32; |
933 | } | 933 | } |
934 | 934 | ||
935 | /* | 935 | /* |
936 | * 'math_state_restore()' saves the current math information in the | 936 | * 'math_state_restore()' saves the current math information in the |
937 | * old math state array, and gets the new ones from the current task | 937 | * old math state array, and gets the new ones from the current task |
938 | * | 938 | * |
939 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | 939 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. |
940 | * Don't touch unless you *really* know how it works. | 940 | * Don't touch unless you *really* know how it works. |
941 | * | 941 | * |
942 | * Must be called with kernel preemption disabled (in this case, | 942 | * Must be called with kernel preemption disabled (in this case, |
943 | * local interrupts are disabled at the call-site in entry.S). | 943 | * local interrupts are disabled at the call-site in entry.S). |
944 | */ | 944 | */ |
945 | asmlinkage void math_state_restore(struct pt_regs regs) | 945 | asmlinkage void math_state_restore(struct pt_regs regs) |
946 | { | 946 | { |
947 | struct thread_info *thread = current_thread_info(); | 947 | struct thread_info *thread = current_thread_info(); |
948 | struct task_struct *tsk = thread->task; | 948 | struct task_struct *tsk = thread->task; |
949 | 949 | ||
950 | clts(); /* Allow maths ops (or we recurse) */ | 950 | clts(); /* Allow maths ops (or we recurse) */ |
951 | if (!tsk_used_math(tsk)) | 951 | if (!tsk_used_math(tsk)) |
952 | init_fpu(tsk); | 952 | init_fpu(tsk); |
953 | restore_fpu(tsk); | 953 | restore_fpu(tsk); |
954 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 954 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
955 | } | 955 | } |
956 | 956 | ||
957 | #ifndef CONFIG_MATH_EMULATION | 957 | #ifndef CONFIG_MATH_EMULATION |
958 | 958 | ||
959 | asmlinkage void math_emulate(long arg) | 959 | asmlinkage void math_emulate(long arg) |
960 | { | 960 | { |
961 | printk("math-emulation not enabled and no coprocessor found.\n"); | 961 | printk("math-emulation not enabled and no coprocessor found.\n"); |
962 | printk("killing %s.\n",current->comm); | 962 | printk("killing %s.\n",current->comm); |
963 | force_sig(SIGFPE,current); | 963 | force_sig(SIGFPE,current); |
964 | schedule(); | 964 | schedule(); |
965 | } | 965 | } |
966 | 966 | ||
967 | #endif /* CONFIG_MATH_EMULATION */ | 967 | #endif /* CONFIG_MATH_EMULATION */ |
968 | 968 | ||
969 | #ifdef CONFIG_X86_F00F_BUG | 969 | #ifdef CONFIG_X86_F00F_BUG |
970 | void __init trap_init_f00f_bug(void) | 970 | void __init trap_init_f00f_bug(void) |
971 | { | 971 | { |
972 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | 972 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); |
973 | 973 | ||
974 | /* | 974 | /* |
975 | * Update the IDT descriptor and reload the IDT so that | 975 | * Update the IDT descriptor and reload the IDT so that |
976 | * it uses the read-only mapped virtual address. | 976 | * it uses the read-only mapped virtual address. |
977 | */ | 977 | */ |
978 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); | 978 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); |
979 | __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); | 979 | __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); |
980 | } | 980 | } |
981 | #endif | 981 | #endif |
982 | 982 | ||
983 | #define _set_gate(gate_addr,type,dpl,addr,seg) \ | 983 | #define _set_gate(gate_addr,type,dpl,addr,seg) \ |
984 | do { \ | 984 | do { \ |
985 | int __d0, __d1; \ | 985 | int __d0, __d1; \ |
986 | __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ | 986 | __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ |
987 | "movw %4,%%dx\n\t" \ | 987 | "movw %4,%%dx\n\t" \ |
988 | "movl %%eax,%0\n\t" \ | 988 | "movl %%eax,%0\n\t" \ |
989 | "movl %%edx,%1" \ | 989 | "movl %%edx,%1" \ |
990 | :"=m" (*((long *) (gate_addr))), \ | 990 | :"=m" (*((long *) (gate_addr))), \ |
991 | "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ | 991 | "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ |
992 | :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ | 992 | :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ |
993 | "3" ((char *) (addr)),"2" ((seg) << 16)); \ | 993 | "3" ((char *) (addr)),"2" ((seg) << 16)); \ |
994 | } while (0) | 994 | } while (0) |
995 | 995 | ||
996 | 996 | ||
997 | /* | 997 | /* |
998 | * This needs to use 'idt_table' rather than 'idt', and | 998 | * This needs to use 'idt_table' rather than 'idt', and |
999 | * thus use the _nonmapped_ version of the IDT, as the | 999 | * thus use the _nonmapped_ version of the IDT, as the |
1000 | * Pentium F0 0F bugfix can have resulted in the mapped | 1000 | * Pentium F0 0F bugfix can have resulted in the mapped |
1001 | * IDT being write-protected. | 1001 | * IDT being write-protected. |
1002 | */ | 1002 | */ |
1003 | void set_intr_gate(unsigned int n, void *addr) | 1003 | void set_intr_gate(unsigned int n, void *addr) |
1004 | { | 1004 | { |
1005 | _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); | 1005 | _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); |
1006 | } | 1006 | } |
1007 | 1007 | ||
1008 | /* | 1008 | /* |
1009 | * This routine sets up an interrupt gate at directory privilege level 3. | 1009 | * This routine sets up an interrupt gate at directory privilege level 3. |
1010 | */ | 1010 | */ |
1011 | static inline void set_system_intr_gate(unsigned int n, void *addr) | 1011 | static inline void set_system_intr_gate(unsigned int n, void *addr) |
1012 | { | 1012 | { |
1013 | _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); | 1013 | _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | static void __init set_trap_gate(unsigned int n, void *addr) | 1016 | static void __init set_trap_gate(unsigned int n, void *addr) |
1017 | { | 1017 | { |
1018 | _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); | 1018 | _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | static void __init set_system_gate(unsigned int n, void *addr) | 1021 | static void __init set_system_gate(unsigned int n, void *addr) |
1022 | { | 1022 | { |
1023 | _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); | 1023 | _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) | 1026 | static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) |
1027 | { | 1027 | { |
1028 | _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); | 1028 | _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); |
1029 | } | 1029 | } |
1030 | 1030 | ||
1031 | 1031 | ||
1032 | void __init trap_init(void) | 1032 | void __init trap_init(void) |
1033 | { | 1033 | { |
1034 | #ifdef CONFIG_EISA | 1034 | #ifdef CONFIG_EISA |
1035 | void __iomem *p = ioremap(0x0FFFD9, 4); | 1035 | void __iomem *p = ioremap(0x0FFFD9, 4); |
1036 | if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { | 1036 | if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { |
1037 | EISA_bus = 1; | 1037 | EISA_bus = 1; |
1038 | } | 1038 | } |
1039 | iounmap(p); | 1039 | iounmap(p); |
1040 | #endif | 1040 | #endif |
1041 | 1041 | ||
1042 | #ifdef CONFIG_X86_LOCAL_APIC | 1042 | #ifdef CONFIG_X86_LOCAL_APIC |
1043 | init_apic_mappings(); | 1043 | init_apic_mappings(); |
1044 | #endif | 1044 | #endif |
1045 | 1045 | ||
1046 | set_trap_gate(0,÷_error); | 1046 | set_trap_gate(0,÷_error); |
1047 | set_intr_gate(1,&debug); | 1047 | set_intr_gate(1,&debug); |
1048 | set_intr_gate(2,&nmi); | 1048 | set_intr_gate(2,&nmi); |
1049 | set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ | 1049 | set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ |
1050 | set_system_gate(4,&overflow); | 1050 | set_system_gate(4,&overflow); |
1051 | set_system_gate(5,&bounds); | 1051 | set_system_gate(5,&bounds); |
1052 | set_trap_gate(6,&invalid_op); | 1052 | set_trap_gate(6,&invalid_op); |
1053 | set_trap_gate(7,&device_not_available); | 1053 | set_trap_gate(7,&device_not_available); |
1054 | set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); | 1054 | set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); |
1055 | set_trap_gate(9,&coprocessor_segment_overrun); | 1055 | set_trap_gate(9,&coprocessor_segment_overrun); |
1056 | set_trap_gate(10,&invalid_TSS); | 1056 | set_trap_gate(10,&invalid_TSS); |
1057 | set_trap_gate(11,&segment_not_present); | 1057 | set_trap_gate(11,&segment_not_present); |
1058 | set_trap_gate(12,&stack_segment); | 1058 | set_trap_gate(12,&stack_segment); |
1059 | set_trap_gate(13,&general_protection); | 1059 | set_trap_gate(13,&general_protection); |
1060 | set_intr_gate(14,&page_fault); | 1060 | set_intr_gate(14,&page_fault); |
1061 | set_trap_gate(15,&spurious_interrupt_bug); | 1061 | set_trap_gate(15,&spurious_interrupt_bug); |
1062 | set_trap_gate(16,&coprocessor_error); | 1062 | set_trap_gate(16,&coprocessor_error); |
1063 | set_trap_gate(17,&alignment_check); | 1063 | set_trap_gate(17,&alignment_check); |
1064 | #ifdef CONFIG_X86_MCE | 1064 | #ifdef CONFIG_X86_MCE |
1065 | set_trap_gate(18,&machine_check); | 1065 | set_trap_gate(18,&machine_check); |
1066 | #endif | 1066 | #endif |
1067 | set_trap_gate(19,&simd_coprocessor_error); | 1067 | set_trap_gate(19,&simd_coprocessor_error); |
1068 | 1068 | ||
1069 | set_system_gate(SYSCALL_VECTOR,&system_call); | 1069 | set_system_gate(SYSCALL_VECTOR,&system_call); |
1070 | 1070 | ||
1071 | /* | 1071 | /* |
1072 | * Should be a barrier for any external CPU state. | 1072 | * Should be a barrier for any external CPU state. |
1073 | */ | 1073 | */ |
1074 | cpu_init(); | 1074 | cpu_init(); |
1075 | 1075 | ||
1076 | trap_init_hook(); | 1076 | trap_init_hook(); |
1077 | } | 1077 | } |
1078 | 1078 | ||
1079 | static int __init kstack_setup(char *s) | 1079 | static int __init kstack_setup(char *s) |
1080 | { | 1080 | { |
1081 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | 1081 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); |
1082 | return 0; | 1082 | return 0; |
1083 | } | 1083 | } |
1084 | __setup("kstack=", kstack_setup); | 1084 | __setup("kstack=", kstack_setup); |
1085 | 1085 |
arch/i386/lib/delay.c
1 | /* | 1 | /* |
2 | * Precise Delay Loops for i386 | 2 | * Precise Delay Loops for i386 |
3 | * | 3 | * |
4 | * Copyright (C) 1993 Linus Torvalds | 4 | * Copyright (C) 1993 Linus Torvalds |
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | 5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> |
6 | * | 6 | * |
7 | * The __delay function must _NOT_ be inlined as its execution time | 7 | * The __delay function must _NOT_ be inlined as its execution time |
8 | * depends wildly on alignment on many x86 processors. The additional | 8 | * depends wildly on alignment on many x86 processors. The additional |
9 | * jump magic is needed to get the timing stable on all the CPU's | 9 | * jump magic is needed to get the timing stable on all the CPU's |
10 | * we have to worry about. | 10 | * we have to worry about. |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
16 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
17 | #include <asm/delay.h> | 17 | #include <asm/delay.h> |
18 | #include <asm/timer.h> | 18 | #include <asm/timer.h> |
19 | 19 | ||
20 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
22 | #endif | 22 | #endif |
23 | 23 | ||
24 | extern struct timer_opts* timer; | 24 | extern struct timer_opts* timer; |
25 | 25 | ||
26 | void __delay(unsigned long loops) | 26 | void __delay(unsigned long loops) |
27 | { | 27 | { |
28 | cur_timer->delay(loops); | 28 | cur_timer->delay(loops); |
29 | } | 29 | } |
30 | 30 | ||
31 | inline void __const_udelay(unsigned long xloops) | 31 | inline void __const_udelay(unsigned long xloops) |
32 | { | 32 | { |
33 | int d0; | 33 | int d0; |
34 | xloops *= 4; | 34 | xloops *= 4; |
35 | __asm__("mull %0" | 35 | __asm__("mull %0" |
36 | :"=d" (xloops), "=&a" (d0) | 36 | :"=d" (xloops), "=&a" (d0) |
37 | :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4))); | 37 | :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); |
38 | __delay(++xloops); | 38 | __delay(++xloops); |
39 | } | 39 | } |
40 | 40 | ||
41 | void __udelay(unsigned long usecs) | 41 | void __udelay(unsigned long usecs) |
42 | { | 42 | { |
43 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | 43 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ |
44 | } | 44 | } |
45 | 45 | ||
46 | void __ndelay(unsigned long nsecs) | 46 | void __ndelay(unsigned long nsecs) |
47 | { | 47 | { |
48 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | 48 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ |
49 | } | 49 | } |
50 | 50 |
arch/ppc/lib/locks.c
1 | /* | 1 | /* |
2 | * Locks for smp ppc | 2 | * Locks for smp ppc |
3 | * | 3 | * |
4 | * Written by Cort Dougan (cort@cs.nmt.edu) | 4 | * Written by Cort Dougan (cort@cs.nmt.edu) |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <asm/ppc_asm.h> | 11 | #include <asm/ppc_asm.h> |
12 | #include <asm/smp.h> | 12 | #include <asm/smp.h> |
13 | 13 | ||
14 | #ifdef CONFIG_DEBUG_SPINLOCK | 14 | #ifdef CONFIG_DEBUG_SPINLOCK |
15 | 15 | ||
16 | #undef INIT_STUCK | 16 | #undef INIT_STUCK |
17 | #define INIT_STUCK 200000000 /*0xffffffff*/ | 17 | #define INIT_STUCK 200000000 /*0xffffffff*/ |
18 | 18 | ||
19 | /* | 19 | /* |
20 | * Try to acquire a spinlock. | 20 | * Try to acquire a spinlock. |
21 | * Only does the stwcx. if the load returned 0 - the Programming | 21 | * Only does the stwcx. if the load returned 0 - the Programming |
22 | * Environments Manual suggests not doing unnecessary stcwx.'s | 22 | * Environments Manual suggests not doing unnecessary stcwx.'s |
23 | * since they may inhibit forward progress by other CPUs in getting | 23 | * since they may inhibit forward progress by other CPUs in getting |
24 | * a lock. | 24 | * a lock. |
25 | */ | 25 | */ |
26 | static inline unsigned long __spin_trylock(volatile unsigned long *lock) | 26 | static inline unsigned long __spin_trylock(volatile unsigned long *lock) |
27 | { | 27 | { |
28 | unsigned long ret; | 28 | unsigned long ret; |
29 | 29 | ||
30 | __asm__ __volatile__ ("\n\ | 30 | __asm__ __volatile__ ("\n\ |
31 | 1: lwarx %0,0,%1\n\ | 31 | 1: lwarx %0,0,%1\n\ |
32 | cmpwi 0,%0,0\n\ | 32 | cmpwi 0,%0,0\n\ |
33 | bne 2f\n" | 33 | bne 2f\n" |
34 | PPC405_ERR77(0,%1) | 34 | PPC405_ERR77(0,%1) |
35 | " stwcx. %2,0,%1\n\ | 35 | " stwcx. %2,0,%1\n\ |
36 | bne- 1b\n\ | 36 | bne- 1b\n\ |
37 | isync\n\ | 37 | isync\n\ |
38 | 2:" | 38 | 2:" |
39 | : "=&r"(ret) | 39 | : "=&r"(ret) |
40 | : "r"(lock), "r"(1) | 40 | : "r"(lock), "r"(1) |
41 | : "cr0", "memory"); | 41 | : "cr0", "memory"); |
42 | 42 | ||
43 | return ret; | 43 | return ret; |
44 | } | 44 | } |
45 | 45 | ||
46 | void _raw_spin_lock(spinlock_t *lock) | 46 | void _raw_spin_lock(spinlock_t *lock) |
47 | { | 47 | { |
48 | int cpu = smp_processor_id(); | 48 | int cpu = smp_processor_id(); |
49 | unsigned int stuck = INIT_STUCK; | 49 | unsigned int stuck = INIT_STUCK; |
50 | while (__spin_trylock(&lock->lock)) { | 50 | while (__spin_trylock(&lock->lock)) { |
51 | while ((unsigned volatile long)lock->lock != 0) { | 51 | while ((unsigned volatile long)lock->lock != 0) { |
52 | if (!--stuck) { | 52 | if (!--stuck) { |
53 | printk("_spin_lock(%p) CPU#%d NIP %p" | 53 | printk("_spin_lock(%p) CPU#%d NIP %p" |
54 | " holder: cpu %ld pc %08lX\n", | 54 | " holder: cpu %ld pc %08lX\n", |
55 | lock, cpu, __builtin_return_address(0), | 55 | lock, cpu, __builtin_return_address(0), |
56 | lock->owner_cpu,lock->owner_pc); | 56 | lock->owner_cpu,lock->owner_pc); |
57 | stuck = INIT_STUCK; | 57 | stuck = INIT_STUCK; |
58 | /* steal the lock */ | 58 | /* steal the lock */ |
59 | /*xchg_u32((void *)&lock->lock,0);*/ | 59 | /*xchg_u32((void *)&lock->lock,0);*/ |
60 | } | 60 | } |
61 | } | 61 | } |
62 | } | 62 | } |
63 | lock->owner_pc = (unsigned long)__builtin_return_address(0); | 63 | lock->owner_pc = (unsigned long)__builtin_return_address(0); |
64 | lock->owner_cpu = cpu; | 64 | lock->owner_cpu = cpu; |
65 | } | 65 | } |
66 | EXPORT_SYMBOL(_raw_spin_lock); | 66 | EXPORT_SYMBOL(_raw_spin_lock); |
67 | 67 | ||
68 | int _raw_spin_trylock(spinlock_t *lock) | 68 | int _raw_spin_trylock(spinlock_t *lock) |
69 | { | 69 | { |
70 | if (__spin_trylock(&lock->lock)) | 70 | if (__spin_trylock(&lock->lock)) |
71 | return 0; | 71 | return 0; |
72 | lock->owner_cpu = smp_processor_id(); | 72 | lock->owner_cpu = smp_processor_id(); |
73 | lock->owner_pc = (unsigned long)__builtin_return_address(0); | 73 | lock->owner_pc = (unsigned long)__builtin_return_address(0); |
74 | return 1; | 74 | return 1; |
75 | } | 75 | } |
76 | EXPORT_SYMBOL(_raw_spin_trylock); | 76 | EXPORT_SYMBOL(_raw_spin_trylock); |
77 | 77 | ||
78 | void _raw_spin_unlock(spinlock_t *lp) | 78 | void _raw_spin_unlock(spinlock_t *lp) |
79 | { | 79 | { |
80 | if ( !lp->lock ) | 80 | if ( !lp->lock ) |
81 | printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", | 81 | printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", |
82 | lp, smp_processor_id(), __builtin_return_address(0), | 82 | lp, smp_processor_id(), __builtin_return_address(0), |
83 | current->comm, current->pid); | 83 | current->comm, current->pid); |
84 | if ( lp->owner_cpu != smp_processor_id() ) | 84 | if ( lp->owner_cpu != smp_processor_id() ) |
85 | printk("_spin_unlock(%p): cpu %d trying clear of cpu %d pc %lx val %lx\n", | 85 | printk("_spin_unlock(%p): cpu %d trying clear of cpu %d pc %lx val %lx\n", |
86 | lp, smp_processor_id(), (int)lp->owner_cpu, | 86 | lp, smp_processor_id(), (int)lp->owner_cpu, |
87 | lp->owner_pc,lp->lock); | 87 | lp->owner_pc,lp->lock); |
88 | lp->owner_pc = lp->owner_cpu = 0; | 88 | lp->owner_pc = lp->owner_cpu = 0; |
89 | wmb(); | 89 | wmb(); |
90 | lp->lock = 0; | 90 | lp->lock = 0; |
91 | } | 91 | } |
92 | EXPORT_SYMBOL(_raw_spin_unlock); | 92 | EXPORT_SYMBOL(_raw_spin_unlock); |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * For rwlocks, zero is unlocked, -1 is write-locked, | 95 | * For rwlocks, zero is unlocked, -1 is write-locked, |
96 | * positive is read-locked. | 96 | * positive is read-locked. |
97 | */ | 97 | */ |
98 | static __inline__ int __read_trylock(rwlock_t *rw) | 98 | static __inline__ int __read_trylock(rwlock_t *rw) |
99 | { | 99 | { |
100 | signed int tmp; | 100 | signed int tmp; |
101 | 101 | ||
102 | __asm__ __volatile__( | 102 | __asm__ __volatile__( |
103 | "2: lwarx %0,0,%1 # __read_trylock\n\ | 103 | "2: lwarx %0,0,%1 # __read_trylock\n\ |
104 | addic. %0,%0,1\n\ | 104 | addic. %0,%0,1\n\ |
105 | ble- 1f\n" | 105 | ble- 1f\n" |
106 | PPC405_ERR77(0,%1) | 106 | PPC405_ERR77(0,%1) |
107 | " stwcx. %0,0,%1\n\ | 107 | " stwcx. %0,0,%1\n\ |
108 | bne- 2b\n\ | 108 | bne- 2b\n\ |
109 | isync\n\ | 109 | isync\n\ |
110 | 1:" | 110 | 1:" |
111 | : "=&r"(tmp) | 111 | : "=&r"(tmp) |
112 | : "r"(&rw->lock) | 112 | : "r"(&rw->lock) |
113 | : "cr0", "memory"); | 113 | : "cr0", "memory"); |
114 | 114 | ||
115 | return tmp; | 115 | return tmp; |
116 | } | 116 | } |
117 | 117 | ||
118 | int _raw_read_trylock(rwlock_t *rw) | 118 | int _raw_read_trylock(rwlock_t *rw) |
119 | { | 119 | { |
120 | return __read_trylock(rw) > 0; | 120 | return __read_trylock(rw) > 0; |
121 | } | 121 | } |
122 | EXPORT_SYMBOL(_raw_read_trylock); | 122 | EXPORT_SYMBOL(_raw_read_trylock); |
123 | 123 | ||
124 | void _raw_read_lock(rwlock_t *rw) | 124 | void _raw_read_lock(rwlock_t *rw) |
125 | { | 125 | { |
126 | unsigned int stuck; | 126 | unsigned int stuck; |
127 | 127 | ||
128 | while (__read_trylock(rw) <= 0) { | 128 | while (__read_trylock(rw) <= 0) { |
129 | stuck = INIT_STUCK; | 129 | stuck = INIT_STUCK; |
130 | while (!read_can_lock(rw)) { | 130 | while (!read_can_lock(rw)) { |
131 | if (--stuck == 0) { | 131 | if (--stuck == 0) { |
132 | printk("_read_lock(%p) CPU#%d lock %d\n", | 132 | printk("_read_lock(%p) CPU#%d lock %d\n", |
133 | rw, _smp_processor_id(), rw->lock); | 133 | rw, raw_smp_processor_id(), rw->lock); |
134 | stuck = INIT_STUCK; | 134 | stuck = INIT_STUCK; |
135 | } | 135 | } |
136 | } | 136 | } |
137 | } | 137 | } |
138 | } | 138 | } |
139 | EXPORT_SYMBOL(_raw_read_lock); | 139 | EXPORT_SYMBOL(_raw_read_lock); |
140 | 140 | ||
141 | void _raw_read_unlock(rwlock_t *rw) | 141 | void _raw_read_unlock(rwlock_t *rw) |
142 | { | 142 | { |
143 | if ( rw->lock == 0 ) | 143 | if ( rw->lock == 0 ) |
144 | printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", | 144 | printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", |
145 | current->comm,current->pid,current->thread.regs->nip, | 145 | current->comm,current->pid,current->thread.regs->nip, |
146 | rw->lock); | 146 | rw->lock); |
147 | wmb(); | 147 | wmb(); |
148 | atomic_dec((atomic_t *) &(rw)->lock); | 148 | atomic_dec((atomic_t *) &(rw)->lock); |
149 | } | 149 | } |
150 | EXPORT_SYMBOL(_raw_read_unlock); | 150 | EXPORT_SYMBOL(_raw_read_unlock); |
151 | 151 | ||
152 | void _raw_write_lock(rwlock_t *rw) | 152 | void _raw_write_lock(rwlock_t *rw) |
153 | { | 153 | { |
154 | unsigned int stuck; | 154 | unsigned int stuck; |
155 | 155 | ||
156 | while (cmpxchg(&rw->lock, 0, -1) != 0) { | 156 | while (cmpxchg(&rw->lock, 0, -1) != 0) { |
157 | stuck = INIT_STUCK; | 157 | stuck = INIT_STUCK; |
158 | while (!write_can_lock(rw)) { | 158 | while (!write_can_lock(rw)) { |
159 | if (--stuck == 0) { | 159 | if (--stuck == 0) { |
160 | printk("write_lock(%p) CPU#%d lock %d)\n", | 160 | printk("write_lock(%p) CPU#%d lock %d)\n", |
161 | rw, _smp_processor_id(), rw->lock); | 161 | rw, raw_smp_processor_id(), rw->lock); |
162 | stuck = INIT_STUCK; | 162 | stuck = INIT_STUCK; |
163 | } | 163 | } |
164 | } | 164 | } |
165 | } | 165 | } |
166 | wmb(); | 166 | wmb(); |
167 | } | 167 | } |
168 | EXPORT_SYMBOL(_raw_write_lock); | 168 | EXPORT_SYMBOL(_raw_write_lock); |
169 | 169 | ||
170 | int _raw_write_trylock(rwlock_t *rw) | 170 | int _raw_write_trylock(rwlock_t *rw) |
171 | { | 171 | { |
172 | if (cmpxchg(&rw->lock, 0, -1) != 0) | 172 | if (cmpxchg(&rw->lock, 0, -1) != 0) |
173 | return 0; | 173 | return 0; |
174 | wmb(); | 174 | wmb(); |
175 | return 1; | 175 | return 1; |
176 | } | 176 | } |
177 | EXPORT_SYMBOL(_raw_write_trylock); | 177 | EXPORT_SYMBOL(_raw_write_trylock); |
178 | 178 | ||
179 | void _raw_write_unlock(rwlock_t *rw) | 179 | void _raw_write_unlock(rwlock_t *rw) |
180 | { | 180 | { |
181 | if (rw->lock >= 0) | 181 | if (rw->lock >= 0) |
182 | printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", | 182 | printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", |
183 | current->comm,current->pid,current->thread.regs->nip, | 183 | current->comm,current->pid,current->thread.regs->nip, |
184 | rw->lock); | 184 | rw->lock); |
185 | wmb(); | 185 | wmb(); |
186 | rw->lock = 0; | 186 | rw->lock = 0; |
187 | } | 187 | } |
188 | EXPORT_SYMBOL(_raw_write_unlock); | 188 | EXPORT_SYMBOL(_raw_write_unlock); |
189 | 189 | ||
190 | #endif | 190 | #endif |
191 | 191 |
arch/ppc64/kernel/idle.c
1 | /* | 1 | /* |
2 | * Idle daemon for PowerPC. Idle daemon will handle any action | 2 | * Idle daemon for PowerPC. Idle daemon will handle any action |
3 | * that needs to be taken when the system becomes idle. | 3 | * that needs to be taken when the system becomes idle. |
4 | * | 4 | * |
5 | * Originally Written by Cort Dougan (cort@cs.nmt.edu) | 5 | * Originally Written by Cort Dougan (cort@cs.nmt.edu) |
6 | * | 6 | * |
7 | * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com> | 7 | * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com> |
8 | * | 8 | * |
9 | * Additional shared processor, SMT, and firmware support | 9 | * Additional shared processor, SMT, and firmware support |
10 | * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com> | 10 | * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com> |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or | 12 | * This program is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU General Public License | 13 | * modify it under the terms of the GNU General Public License |
14 | * as published by the Free Software Foundation; either version | 14 | * as published by the Free Software Foundation; either version |
15 | * 2 of the License, or (at your option) any later version. | 15 | * 2 of the License, or (at your option) any later version. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/config.h> | 18 | #include <linux/config.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/cpu.h> | 22 | #include <linux/cpu.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/sysctl.h> | 24 | #include <linux/sysctl.h> |
25 | #include <linux/smp.h> | 25 | #include <linux/smp.h> |
26 | 26 | ||
27 | #include <asm/system.h> | 27 | #include <asm/system.h> |
28 | #include <asm/processor.h> | 28 | #include <asm/processor.h> |
29 | #include <asm/mmu.h> | 29 | #include <asm/mmu.h> |
30 | #include <asm/cputable.h> | 30 | #include <asm/cputable.h> |
31 | #include <asm/time.h> | 31 | #include <asm/time.h> |
32 | #include <asm/iSeries/HvCall.h> | 32 | #include <asm/iSeries/HvCall.h> |
33 | #include <asm/iSeries/ItLpQueue.h> | 33 | #include <asm/iSeries/ItLpQueue.h> |
34 | #include <asm/plpar_wrappers.h> | 34 | #include <asm/plpar_wrappers.h> |
35 | #include <asm/systemcfg.h> | 35 | #include <asm/systemcfg.h> |
36 | 36 | ||
37 | extern void power4_idle(void); | 37 | extern void power4_idle(void); |
38 | 38 | ||
39 | static int (*idle_loop)(void); | 39 | static int (*idle_loop)(void); |
40 | 40 | ||
41 | #ifdef CONFIG_PPC_ISERIES | 41 | #ifdef CONFIG_PPC_ISERIES |
42 | static unsigned long maxYieldTime = 0; | 42 | static unsigned long maxYieldTime = 0; |
43 | static unsigned long minYieldTime = 0xffffffffffffffffUL; | 43 | static unsigned long minYieldTime = 0xffffffffffffffffUL; |
44 | 44 | ||
45 | static void yield_shared_processor(void) | 45 | static void yield_shared_processor(void) |
46 | { | 46 | { |
47 | unsigned long tb; | 47 | unsigned long tb; |
48 | unsigned long yieldTime; | 48 | unsigned long yieldTime; |
49 | 49 | ||
50 | HvCall_setEnabledInterrupts(HvCall_MaskIPI | | 50 | HvCall_setEnabledInterrupts(HvCall_MaskIPI | |
51 | HvCall_MaskLpEvent | | 51 | HvCall_MaskLpEvent | |
52 | HvCall_MaskLpProd | | 52 | HvCall_MaskLpProd | |
53 | HvCall_MaskTimeout); | 53 | HvCall_MaskTimeout); |
54 | 54 | ||
55 | tb = get_tb(); | 55 | tb = get_tb(); |
56 | /* Compute future tb value when yield should expire */ | 56 | /* Compute future tb value when yield should expire */ |
57 | HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); | 57 | HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); |
58 | 58 | ||
59 | yieldTime = get_tb() - tb; | 59 | yieldTime = get_tb() - tb; |
60 | if (yieldTime > maxYieldTime) | 60 | if (yieldTime > maxYieldTime) |
61 | maxYieldTime = yieldTime; | 61 | maxYieldTime = yieldTime; |
62 | 62 | ||
63 | if (yieldTime < minYieldTime) | 63 | if (yieldTime < minYieldTime) |
64 | minYieldTime = yieldTime; | 64 | minYieldTime = yieldTime; |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * The decrementer stops during the yield. Force a fake decrementer | 67 | * The decrementer stops during the yield. Force a fake decrementer |
68 | * here and let the timer_interrupt code sort out the actual time. | 68 | * here and let the timer_interrupt code sort out the actual time. |
69 | */ | 69 | */ |
70 | get_paca()->lppaca.int_dword.fields.decr_int = 1; | 70 | get_paca()->lppaca.int_dword.fields.decr_int = 1; |
71 | process_iSeries_events(); | 71 | process_iSeries_events(); |
72 | } | 72 | } |
73 | 73 | ||
74 | static int iSeries_idle(void) | 74 | static int iSeries_idle(void) |
75 | { | 75 | { |
76 | struct paca_struct *lpaca; | 76 | struct paca_struct *lpaca; |
77 | long oldval; | 77 | long oldval; |
78 | 78 | ||
79 | /* ensure iSeries run light will be out when idle */ | 79 | /* ensure iSeries run light will be out when idle */ |
80 | ppc64_runlatch_off(); | 80 | ppc64_runlatch_off(); |
81 | 81 | ||
82 | lpaca = get_paca(); | 82 | lpaca = get_paca(); |
83 | 83 | ||
84 | while (1) { | 84 | while (1) { |
85 | if (lpaca->lppaca.shared_proc) { | 85 | if (lpaca->lppaca.shared_proc) { |
86 | if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) | 86 | if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) |
87 | process_iSeries_events(); | 87 | process_iSeries_events(); |
88 | if (!need_resched()) | 88 | if (!need_resched()) |
89 | yield_shared_processor(); | 89 | yield_shared_processor(); |
90 | } else { | 90 | } else { |
91 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); | 91 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); |
92 | 92 | ||
93 | if (!oldval) { | 93 | if (!oldval) { |
94 | set_thread_flag(TIF_POLLING_NRFLAG); | 94 | set_thread_flag(TIF_POLLING_NRFLAG); |
95 | 95 | ||
96 | while (!need_resched()) { | 96 | while (!need_resched()) { |
97 | HMT_medium(); | 97 | HMT_medium(); |
98 | if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) | 98 | if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr)) |
99 | process_iSeries_events(); | 99 | process_iSeries_events(); |
100 | HMT_low(); | 100 | HMT_low(); |
101 | } | 101 | } |
102 | 102 | ||
103 | HMT_medium(); | 103 | HMT_medium(); |
104 | clear_thread_flag(TIF_POLLING_NRFLAG); | 104 | clear_thread_flag(TIF_POLLING_NRFLAG); |
105 | } else { | 105 | } else { |
106 | set_need_resched(); | 106 | set_need_resched(); |
107 | } | 107 | } |
108 | } | 108 | } |
109 | 109 | ||
110 | ppc64_runlatch_on(); | 110 | ppc64_runlatch_on(); |
111 | schedule(); | 111 | schedule(); |
112 | ppc64_runlatch_off(); | 112 | ppc64_runlatch_off(); |
113 | } | 113 | } |
114 | 114 | ||
115 | return 0; | 115 | return 0; |
116 | } | 116 | } |
117 | 117 | ||
118 | #else | 118 | #else |
119 | 119 | ||
120 | static int default_idle(void) | 120 | static int default_idle(void) |
121 | { | 121 | { |
122 | long oldval; | 122 | long oldval; |
123 | unsigned int cpu = smp_processor_id(); | 123 | unsigned int cpu = smp_processor_id(); |
124 | 124 | ||
125 | while (1) { | 125 | while (1) { |
126 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); | 126 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); |
127 | 127 | ||
128 | if (!oldval) { | 128 | if (!oldval) { |
129 | set_thread_flag(TIF_POLLING_NRFLAG); | 129 | set_thread_flag(TIF_POLLING_NRFLAG); |
130 | 130 | ||
131 | while (!need_resched() && !cpu_is_offline(cpu)) { | 131 | while (!need_resched() && !cpu_is_offline(cpu)) { |
132 | barrier(); | 132 | barrier(); |
133 | /* | 133 | /* |
134 | * Go into low thread priority and possibly | 134 | * Go into low thread priority and possibly |
135 | * low power mode. | 135 | * low power mode. |
136 | */ | 136 | */ |
137 | HMT_low(); | 137 | HMT_low(); |
138 | HMT_very_low(); | 138 | HMT_very_low(); |
139 | } | 139 | } |
140 | 140 | ||
141 | HMT_medium(); | 141 | HMT_medium(); |
142 | clear_thread_flag(TIF_POLLING_NRFLAG); | 142 | clear_thread_flag(TIF_POLLING_NRFLAG); |
143 | } else { | 143 | } else { |
144 | set_need_resched(); | 144 | set_need_resched(); |
145 | } | 145 | } |
146 | 146 | ||
147 | schedule(); | 147 | schedule(); |
148 | if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) | 148 | if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) |
149 | cpu_die(); | 149 | cpu_die(); |
150 | } | 150 | } |
151 | 151 | ||
152 | return 0; | 152 | return 0; |
153 | } | 153 | } |
154 | 154 | ||
155 | #ifdef CONFIG_PPC_PSERIES | 155 | #ifdef CONFIG_PPC_PSERIES |
156 | 156 | ||
157 | DECLARE_PER_CPU(unsigned long, smt_snooze_delay); | 157 | DECLARE_PER_CPU(unsigned long, smt_snooze_delay); |
158 | 158 | ||
159 | int dedicated_idle(void) | 159 | int dedicated_idle(void) |
160 | { | 160 | { |
161 | long oldval; | 161 | long oldval; |
162 | struct paca_struct *lpaca = get_paca(), *ppaca; | 162 | struct paca_struct *lpaca = get_paca(), *ppaca; |
163 | unsigned long start_snooze; | 163 | unsigned long start_snooze; |
164 | unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); | 164 | unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); |
165 | unsigned int cpu = smp_processor_id(); | 165 | unsigned int cpu = smp_processor_id(); |
166 | 166 | ||
167 | ppaca = &paca[cpu ^ 1]; | 167 | ppaca = &paca[cpu ^ 1]; |
168 | 168 | ||
169 | while (1) { | 169 | while (1) { |
170 | /* | 170 | /* |
171 | * Indicate to the HV that we are idle. Now would be | 171 | * Indicate to the HV that we are idle. Now would be |
172 | * a good time to find other work to dispatch. | 172 | * a good time to find other work to dispatch. |
173 | */ | 173 | */ |
174 | lpaca->lppaca.idle = 1; | 174 | lpaca->lppaca.idle = 1; |
175 | 175 | ||
176 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); | 176 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); |
177 | if (!oldval) { | 177 | if (!oldval) { |
178 | set_thread_flag(TIF_POLLING_NRFLAG); | 178 | set_thread_flag(TIF_POLLING_NRFLAG); |
179 | start_snooze = __get_tb() + | 179 | start_snooze = __get_tb() + |
180 | *smt_snooze_delay * tb_ticks_per_usec; | 180 | *smt_snooze_delay * tb_ticks_per_usec; |
181 | while (!need_resched() && !cpu_is_offline(cpu)) { | 181 | while (!need_resched() && !cpu_is_offline(cpu)) { |
182 | /* | 182 | /* |
183 | * Go into low thread priority and possibly | 183 | * Go into low thread priority and possibly |
184 | * low power mode. | 184 | * low power mode. |
185 | */ | 185 | */ |
186 | HMT_low(); | 186 | HMT_low(); |
187 | HMT_very_low(); | 187 | HMT_very_low(); |
188 | 188 | ||
189 | if (*smt_snooze_delay == 0 || | 189 | if (*smt_snooze_delay == 0 || |
190 | __get_tb() < start_snooze) | 190 | __get_tb() < start_snooze) |
191 | continue; | 191 | continue; |
192 | 192 | ||
193 | HMT_medium(); | 193 | HMT_medium(); |
194 | 194 | ||
195 | if (!(ppaca->lppaca.idle)) { | 195 | if (!(ppaca->lppaca.idle)) { |
196 | local_irq_disable(); | 196 | local_irq_disable(); |
197 | 197 | ||
198 | /* | 198 | /* |
199 | * We are about to sleep the thread | 199 | * We are about to sleep the thread |
200 | * and so wont be polling any | 200 | * and so wont be polling any |
201 | * more. | 201 | * more. |
202 | */ | 202 | */ |
203 | clear_thread_flag(TIF_POLLING_NRFLAG); | 203 | clear_thread_flag(TIF_POLLING_NRFLAG); |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * SMT dynamic mode. Cede will result | 206 | * SMT dynamic mode. Cede will result |
207 | * in this thread going dormant, if the | 207 | * in this thread going dormant, if the |
208 | * partner thread is still doing work. | 208 | * partner thread is still doing work. |
209 | * Thread wakes up if partner goes idle, | 209 | * Thread wakes up if partner goes idle, |
210 | * an interrupt is presented, or a prod | 210 | * an interrupt is presented, or a prod |
211 | * occurs. Returning from the cede | 211 | * occurs. Returning from the cede |
212 | * enables external interrupts. | 212 | * enables external interrupts. |
213 | */ | 213 | */ |
214 | if (!need_resched()) | 214 | if (!need_resched()) |
215 | cede_processor(); | 215 | cede_processor(); |
216 | else | 216 | else |
217 | local_irq_enable(); | 217 | local_irq_enable(); |
218 | } else { | 218 | } else { |
219 | /* | 219 | /* |
220 | * Give the HV an opportunity at the | 220 | * Give the HV an opportunity at the |
221 | * processor, since we are not doing | 221 | * processor, since we are not doing |
222 | * any work. | 222 | * any work. |
223 | */ | 223 | */ |
224 | poll_pending(); | 224 | poll_pending(); |
225 | } | 225 | } |
226 | } | 226 | } |
227 | 227 | ||
228 | clear_thread_flag(TIF_POLLING_NRFLAG); | 228 | clear_thread_flag(TIF_POLLING_NRFLAG); |
229 | } else { | 229 | } else { |
230 | set_need_resched(); | 230 | set_need_resched(); |
231 | } | 231 | } |
232 | 232 | ||
233 | HMT_medium(); | 233 | HMT_medium(); |
234 | lpaca->lppaca.idle = 0; | 234 | lpaca->lppaca.idle = 0; |
235 | schedule(); | 235 | schedule(); |
236 | if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) | 236 | if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) |
237 | cpu_die(); | 237 | cpu_die(); |
238 | } | 238 | } |
239 | return 0; | 239 | return 0; |
240 | } | 240 | } |
241 | 241 | ||
242 | static int shared_idle(void) | 242 | static int shared_idle(void) |
243 | { | 243 | { |
244 | struct paca_struct *lpaca = get_paca(); | 244 | struct paca_struct *lpaca = get_paca(); |
245 | unsigned int cpu = smp_processor_id(); | 245 | unsigned int cpu = smp_processor_id(); |
246 | 246 | ||
247 | while (1) { | 247 | while (1) { |
248 | /* | 248 | /* |
249 | * Indicate to the HV that we are idle. Now would be | 249 | * Indicate to the HV that we are idle. Now would be |
250 | * a good time to find other work to dispatch. | 250 | * a good time to find other work to dispatch. |
251 | */ | 251 | */ |
252 | lpaca->lppaca.idle = 1; | 252 | lpaca->lppaca.idle = 1; |
253 | 253 | ||
254 | while (!need_resched() && !cpu_is_offline(cpu)) { | 254 | while (!need_resched() && !cpu_is_offline(cpu)) { |
255 | local_irq_disable(); | 255 | local_irq_disable(); |
256 | 256 | ||
257 | /* | 257 | /* |
258 | * Yield the processor to the hypervisor. We return if | 258 | * Yield the processor to the hypervisor. We return if |
259 | * an external interrupt occurs (which are driven prior | 259 | * an external interrupt occurs (which are driven prior |
260 | * to returning here) or if a prod occurs from another | 260 | * to returning here) or if a prod occurs from another |
261 | * processor. When returning here, external interrupts | 261 | * processor. When returning here, external interrupts |
262 | * are enabled. | 262 | * are enabled. |
263 | * | 263 | * |
264 | * Check need_resched() again with interrupts disabled | 264 | * Check need_resched() again with interrupts disabled |
265 | * to avoid a race. | 265 | * to avoid a race. |
266 | */ | 266 | */ |
267 | if (!need_resched()) | 267 | if (!need_resched()) |
268 | cede_processor(); | 268 | cede_processor(); |
269 | else | 269 | else |
270 | local_irq_enable(); | 270 | local_irq_enable(); |
271 | } | 271 | } |
272 | 272 | ||
273 | HMT_medium(); | 273 | HMT_medium(); |
274 | lpaca->lppaca.idle = 0; | 274 | lpaca->lppaca.idle = 0; |
275 | schedule(); | 275 | schedule(); |
276 | if (cpu_is_offline(smp_processor_id()) && | 276 | if (cpu_is_offline(smp_processor_id()) && |
277 | system_state == SYSTEM_RUNNING) | 277 | system_state == SYSTEM_RUNNING) |
278 | cpu_die(); | 278 | cpu_die(); |
279 | } | 279 | } |
280 | 280 | ||
281 | return 0; | 281 | return 0; |
282 | } | 282 | } |
283 | 283 | ||
284 | #endif /* CONFIG_PPC_PSERIES */ | 284 | #endif /* CONFIG_PPC_PSERIES */ |
285 | 285 | ||
286 | static int native_idle(void) | 286 | static int native_idle(void) |
287 | { | 287 | { |
288 | while(1) { | 288 | while(1) { |
289 | /* check CPU type here */ | 289 | /* check CPU type here */ |
290 | if (!need_resched()) | 290 | if (!need_resched()) |
291 | power4_idle(); | 291 | power4_idle(); |
292 | if (need_resched()) | 292 | if (need_resched()) |
293 | schedule(); | 293 | schedule(); |
294 | 294 | ||
295 | if (cpu_is_offline(_smp_processor_id()) && | 295 | if (cpu_is_offline(raw_smp_processor_id()) && |
296 | system_state == SYSTEM_RUNNING) | 296 | system_state == SYSTEM_RUNNING) |
297 | cpu_die(); | 297 | cpu_die(); |
298 | } | 298 | } |
299 | return 0; | 299 | return 0; |
300 | } | 300 | } |
301 | 301 | ||
302 | #endif /* CONFIG_PPC_ISERIES */ | 302 | #endif /* CONFIG_PPC_ISERIES */ |
303 | 303 | ||
304 | void cpu_idle(void) | 304 | void cpu_idle(void) |
305 | { | 305 | { |
306 | idle_loop(); | 306 | idle_loop(); |
307 | } | 307 | } |
308 | 308 | ||
309 | int powersave_nap; | 309 | int powersave_nap; |
310 | 310 | ||
311 | #ifdef CONFIG_SYSCTL | 311 | #ifdef CONFIG_SYSCTL |
312 | /* | 312 | /* |
313 | * Register the sysctl to set/clear powersave_nap. | 313 | * Register the sysctl to set/clear powersave_nap. |
314 | */ | 314 | */ |
315 | static ctl_table powersave_nap_ctl_table[]={ | 315 | static ctl_table powersave_nap_ctl_table[]={ |
316 | { | 316 | { |
317 | .ctl_name = KERN_PPC_POWERSAVE_NAP, | 317 | .ctl_name = KERN_PPC_POWERSAVE_NAP, |
318 | .procname = "powersave-nap", | 318 | .procname = "powersave-nap", |
319 | .data = &powersave_nap, | 319 | .data = &powersave_nap, |
320 | .maxlen = sizeof(int), | 320 | .maxlen = sizeof(int), |
321 | .mode = 0644, | 321 | .mode = 0644, |
322 | .proc_handler = &proc_dointvec, | 322 | .proc_handler = &proc_dointvec, |
323 | }, | 323 | }, |
324 | { 0, }, | 324 | { 0, }, |
325 | }; | 325 | }; |
326 | static ctl_table powersave_nap_sysctl_root[] = { | 326 | static ctl_table powersave_nap_sysctl_root[] = { |
327 | { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, }, | 327 | { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, }, |
328 | { 0,}, | 328 | { 0,}, |
329 | }; | 329 | }; |
330 | 330 | ||
331 | static int __init | 331 | static int __init |
332 | register_powersave_nap_sysctl(void) | 332 | register_powersave_nap_sysctl(void) |
333 | { | 333 | { |
334 | register_sysctl_table(powersave_nap_sysctl_root, 0); | 334 | register_sysctl_table(powersave_nap_sysctl_root, 0); |
335 | 335 | ||
336 | return 0; | 336 | return 0; |
337 | } | 337 | } |
338 | __initcall(register_powersave_nap_sysctl); | 338 | __initcall(register_powersave_nap_sysctl); |
339 | #endif | 339 | #endif |
340 | 340 | ||
341 | int idle_setup(void) | 341 | int idle_setup(void) |
342 | { | 342 | { |
343 | /* | 343 | /* |
344 | * Move that junk to each platform specific file, eventually define | 344 | * Move that junk to each platform specific file, eventually define |
345 | * a pSeries_idle for shared processor stuff | 345 | * a pSeries_idle for shared processor stuff |
346 | */ | 346 | */ |
347 | #ifdef CONFIG_PPC_ISERIES | 347 | #ifdef CONFIG_PPC_ISERIES |
348 | idle_loop = iSeries_idle; | 348 | idle_loop = iSeries_idle; |
349 | return 1; | 349 | return 1; |
350 | #else | 350 | #else |
351 | idle_loop = default_idle; | 351 | idle_loop = default_idle; |
352 | #endif | 352 | #endif |
353 | #ifdef CONFIG_PPC_PSERIES | 353 | #ifdef CONFIG_PPC_PSERIES |
354 | if (systemcfg->platform & PLATFORM_PSERIES) { | 354 | if (systemcfg->platform & PLATFORM_PSERIES) { |
355 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { | 355 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { |
356 | if (get_paca()->lppaca.shared_proc) { | 356 | if (get_paca()->lppaca.shared_proc) { |
357 | printk(KERN_INFO "Using shared processor idle loop\n"); | 357 | printk(KERN_INFO "Using shared processor idle loop\n"); |
358 | idle_loop = shared_idle; | 358 | idle_loop = shared_idle; |
359 | } else { | 359 | } else { |
360 | printk(KERN_INFO "Using dedicated idle loop\n"); | 360 | printk(KERN_INFO "Using dedicated idle loop\n"); |
361 | idle_loop = dedicated_idle; | 361 | idle_loop = dedicated_idle; |
362 | } | 362 | } |
363 | } else { | 363 | } else { |
364 | printk(KERN_INFO "Using default idle loop\n"); | 364 | printk(KERN_INFO "Using default idle loop\n"); |
365 | idle_loop = default_idle; | 365 | idle_loop = default_idle; |
366 | } | 366 | } |
367 | } | 367 | } |
368 | #endif /* CONFIG_PPC_PSERIES */ | 368 | #endif /* CONFIG_PPC_PSERIES */ |
369 | #ifndef CONFIG_PPC_ISERIES | 369 | #ifndef CONFIG_PPC_ISERIES |
370 | if (systemcfg->platform == PLATFORM_POWERMAC || | 370 | if (systemcfg->platform == PLATFORM_POWERMAC || |
371 | systemcfg->platform == PLATFORM_MAPLE) { | 371 | systemcfg->platform == PLATFORM_MAPLE) { |
372 | printk(KERN_INFO "Using native/NAP idle loop\n"); | 372 | printk(KERN_INFO "Using native/NAP idle loop\n"); |
373 | idle_loop = native_idle; | 373 | idle_loop = native_idle; |
374 | } | 374 | } |
375 | #endif /* CONFIG_PPC_ISERIES */ | 375 | #endif /* CONFIG_PPC_ISERIES */ |
376 | 376 | ||
377 | return 1; | 377 | return 1; |
378 | } | 378 | } |
379 | 379 |
arch/sh/lib/delay.c
1 | /* | 1 | /* |
2 | * Precise Delay Loops for SuperH | 2 | * Precise Delay Loops for SuperH |
3 | * | 3 | * |
4 | * Copyright (C) 1999 Niibe Yutaka & Kaz Kojima | 4 | * Copyright (C) 1999 Niibe Yutaka & Kaz Kojima |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/delay.h> | 8 | #include <linux/delay.h> |
9 | 9 | ||
10 | void __delay(unsigned long loops) | 10 | void __delay(unsigned long loops) |
11 | { | 11 | { |
12 | __asm__ __volatile__( | 12 | __asm__ __volatile__( |
13 | "tst %0, %0\n\t" | 13 | "tst %0, %0\n\t" |
14 | "1:\t" | 14 | "1:\t" |
15 | "bf/s 1b\n\t" | 15 | "bf/s 1b\n\t" |
16 | " dt %0" | 16 | " dt %0" |
17 | : "=r" (loops) | 17 | : "=r" (loops) |
18 | : "0" (loops) | 18 | : "0" (loops) |
19 | : "t"); | 19 | : "t"); |
20 | } | 20 | } |
21 | 21 | ||
22 | inline void __const_udelay(unsigned long xloops) | 22 | inline void __const_udelay(unsigned long xloops) |
23 | { | 23 | { |
24 | __asm__("dmulu.l %0, %2\n\t" | 24 | __asm__("dmulu.l %0, %2\n\t" |
25 | "sts mach, %0" | 25 | "sts mach, %0" |
26 | : "=r" (xloops) | 26 | : "=r" (xloops) |
27 | : "0" (xloops), "r" (cpu_data[_smp_processor_id()].loops_per_jiffy) | 27 | : "0" (xloops), "r" (cpu_data[raw_smp_processor_id()].loops_per_jiffy) |
28 | : "macl", "mach"); | 28 | : "macl", "mach"); |
29 | __delay(xloops * HZ); | 29 | __delay(xloops * HZ); |
30 | } | 30 | } |
31 | 31 | ||
32 | void __udelay(unsigned long usecs) | 32 | void __udelay(unsigned long usecs) |
33 | { | 33 | { |
34 | __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ | 34 | __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ |
35 | } | 35 | } |
36 | 36 | ||
37 | void __ndelay(unsigned long nsecs) | 37 | void __ndelay(unsigned long nsecs) |
38 | { | 38 | { |
39 | __const_udelay(nsecs * 0x00000005); | 39 | __const_udelay(nsecs * 0x00000005); |
40 | } | 40 | } |
41 | 41 | ||
42 | 42 |
arch/sparc64/lib/delay.c
1 | /* delay.c: Delay loops for sparc64 | 1 | /* delay.c: Delay loops for sparc64 |
2 | * | 2 | * |
3 | * Copyright (C) 2004 David S. Miller <davem@redhat.com> | 3 | * Copyright (C) 2004 David S. Miller <davem@redhat.com> |
4 | * | 4 | * |
5 | * Based heavily upon x86 variant which is: | 5 | * Based heavily upon x86 variant which is: |
6 | * Copyright (C) 1993 Linus Torvalds | 6 | * Copyright (C) 1993 Linus Torvalds |
7 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | 7 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
11 | 11 | ||
12 | void __delay(unsigned long loops) | 12 | void __delay(unsigned long loops) |
13 | { | 13 | { |
14 | __asm__ __volatile__( | 14 | __asm__ __volatile__( |
15 | " b,pt %%xcc, 1f\n" | 15 | " b,pt %%xcc, 1f\n" |
16 | " cmp %0, 0\n" | 16 | " cmp %0, 0\n" |
17 | " .align 32\n" | 17 | " .align 32\n" |
18 | "1:\n" | 18 | "1:\n" |
19 | " bne,pt %%xcc, 1b\n" | 19 | " bne,pt %%xcc, 1b\n" |
20 | " subcc %0, 1, %0\n" | 20 | " subcc %0, 1, %0\n" |
21 | : "=&r" (loops) | 21 | : "=&r" (loops) |
22 | : "0" (loops) | 22 | : "0" (loops) |
23 | : "cc"); | 23 | : "cc"); |
24 | } | 24 | } |
25 | 25 | ||
26 | /* We used to multiply by HZ after shifting down by 32 bits | 26 | /* We used to multiply by HZ after shifting down by 32 bits |
27 | * but that runs into problems for higher values of HZ and | 27 | * but that runs into problems for higher values of HZ and |
28 | * slow cpus. | 28 | * slow cpus. |
29 | */ | 29 | */ |
30 | void __const_udelay(unsigned long n) | 30 | void __const_udelay(unsigned long n) |
31 | { | 31 | { |
32 | n *= 4; | 32 | n *= 4; |
33 | 33 | ||
34 | n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4)); | 34 | n *= (cpu_data(raw_smp_processor_id()).udelay_val * (HZ/4)); |
35 | n >>= 32; | 35 | n >>= 32; |
36 | 36 | ||
37 | __delay(n + 1); | 37 | __delay(n + 1); |
38 | } | 38 | } |
39 | 39 | ||
40 | void __udelay(unsigned long n) | 40 | void __udelay(unsigned long n) |
41 | { | 41 | { |
42 | __const_udelay(n * 0x10c7UL); | 42 | __const_udelay(n * 0x10c7UL); |
43 | } | 43 | } |
44 | 44 | ||
45 | 45 | ||
46 | void __ndelay(unsigned long n) | 46 | void __ndelay(unsigned long n) |
47 | { | 47 | { |
48 | __const_udelay(n * 0x5UL); | 48 | __const_udelay(n * 0x5UL); |
49 | } | 49 | } |
50 | 50 |
arch/x86_64/lib/delay.c
1 | /* | 1 | /* |
2 | * Precise Delay Loops for x86-64 | 2 | * Precise Delay Loops for x86-64 |
3 | * | 3 | * |
4 | * Copyright (C) 1993 Linus Torvalds | 4 | * Copyright (C) 1993 Linus Torvalds |
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | 5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> |
6 | * | 6 | * |
7 | * The __delay function must _NOT_ be inlined as its execution time | 7 | * The __delay function must _NOT_ be inlined as its execution time |
8 | * depends wildly on alignment on many x86 processors. | 8 | * depends wildly on alignment on many x86 processors. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/config.h> | 11 | #include <linux/config.h> |
12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
13 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
14 | #include <asm/delay.h> | 14 | #include <asm/delay.h> |
15 | 15 | ||
16 | #ifdef CONFIG_SMP | 16 | #ifdef CONFIG_SMP |
17 | #include <asm/smp.h> | 17 | #include <asm/smp.h> |
18 | #endif | 18 | #endif |
19 | 19 | ||
20 | int x86_udelay_tsc = 0; /* Delay via TSC */ | 20 | int x86_udelay_tsc = 0; /* Delay via TSC */ |
21 | 21 | ||
22 | void __delay(unsigned long loops) | 22 | void __delay(unsigned long loops) |
23 | { | 23 | { |
24 | unsigned bclock, now; | 24 | unsigned bclock, now; |
25 | 25 | ||
26 | rdtscl(bclock); | 26 | rdtscl(bclock); |
27 | do | 27 | do |
28 | { | 28 | { |
29 | rep_nop(); | 29 | rep_nop(); |
30 | rdtscl(now); | 30 | rdtscl(now); |
31 | } | 31 | } |
32 | while((now-bclock) < loops); | 32 | while((now-bclock) < loops); |
33 | } | 33 | } |
34 | 34 | ||
35 | inline void __const_udelay(unsigned long xloops) | 35 | inline void __const_udelay(unsigned long xloops) |
36 | { | 36 | { |
37 | __delay(((xloops * cpu_data[_smp_processor_id()].loops_per_jiffy) >> 32) * HZ); | 37 | __delay(((xloops * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) * HZ); |
38 | } | 38 | } |
39 | 39 | ||
40 | void __udelay(unsigned long usecs) | 40 | void __udelay(unsigned long usecs) |
41 | { | 41 | { |
42 | __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ | 42 | __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ |
43 | } | 43 | } |
44 | 44 | ||
45 | void __ndelay(unsigned long nsecs) | 45 | void __ndelay(unsigned long nsecs) |
46 | { | 46 | { |
47 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | 47 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ |
48 | } | 48 | } |
49 | 49 |
drivers/acpi/processor_idle.c
1 | /* | 1 | /* |
2 | * processor_idle - idle state submodule to the ACPI processor driver | 2 | * processor_idle - idle state submodule to the ACPI processor driver |
3 | * | 3 | * |
4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> | 4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> |
5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | 5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> |
6 | * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de> | 6 | * Copyright (C) 2004 Dominik Brodowski <linux@brodo.de> |
7 | * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | 7 | * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
8 | * - Added processor hotplug support | 8 | * - Added processor hotplug support |
9 | * | 9 | * |
10 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 10 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or modify | 12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | 13 | * it under the terms of the GNU General Public License as published by |
14 | * the Free Software Foundation; either version 2 of the License, or (at | 14 | * the Free Software Foundation; either version 2 of the License, or (at |
15 | * your option) any later version. | 15 | * your option) any later version. |
16 | * | 16 | * |
17 | * This program is distributed in the hope that it will be useful, but | 17 | * This program is distributed in the hope that it will be useful, but |
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 18 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | * General Public License for more details. | 20 | * General Public License for more details. |
21 | * | 21 | * |
22 | * You should have received a copy of the GNU General Public License along | 22 | * You should have received a copy of the GNU General Public License along |
23 | * with this program; if not, write to the Free Software Foundation, Inc., | 23 | * with this program; if not, write to the Free Software Foundation, Inc., |
24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | 24 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. |
25 | * | 25 | * |
26 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 26 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/cpufreq.h> | 32 | #include <linux/cpufreq.h> |
33 | #include <linux/proc_fs.h> | 33 | #include <linux/proc_fs.h> |
34 | #include <linux/seq_file.h> | 34 | #include <linux/seq_file.h> |
35 | #include <linux/acpi.h> | 35 | #include <linux/acpi.h> |
36 | #include <linux/dmi.h> | 36 | #include <linux/dmi.h> |
37 | #include <linux/moduleparam.h> | 37 | #include <linux/moduleparam.h> |
38 | 38 | ||
39 | #include <asm/io.h> | 39 | #include <asm/io.h> |
40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
41 | 41 | ||
42 | #include <acpi/acpi_bus.h> | 42 | #include <acpi/acpi_bus.h> |
43 | #include <acpi/processor.h> | 43 | #include <acpi/processor.h> |
44 | 44 | ||
45 | #define ACPI_PROCESSOR_COMPONENT 0x01000000 | 45 | #define ACPI_PROCESSOR_COMPONENT 0x01000000 |
46 | #define ACPI_PROCESSOR_CLASS "processor" | 46 | #define ACPI_PROCESSOR_CLASS "processor" |
47 | #define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver" | 47 | #define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver" |
48 | #define _COMPONENT ACPI_PROCESSOR_COMPONENT | 48 | #define _COMPONENT ACPI_PROCESSOR_COMPONENT |
49 | ACPI_MODULE_NAME ("acpi_processor") | 49 | ACPI_MODULE_NAME ("acpi_processor") |
50 | 50 | ||
51 | #define ACPI_PROCESSOR_FILE_POWER "power" | 51 | #define ACPI_PROCESSOR_FILE_POWER "power" |
52 | 52 | ||
53 | #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) | 53 | #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) |
54 | #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ | 54 | #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ |
55 | #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ | 55 | #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ |
56 | 56 | ||
57 | static void (*pm_idle_save)(void); | 57 | static void (*pm_idle_save)(void); |
58 | module_param(max_cstate, uint, 0644); | 58 | module_param(max_cstate, uint, 0644); |
59 | 59 | ||
60 | static unsigned int nocst = 0; | 60 | static unsigned int nocst = 0; |
61 | module_param(nocst, uint, 0000); | 61 | module_param(nocst, uint, 0000); |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * bm_history -- bit-mask with a bit per jiffy of bus-master activity | 64 | * bm_history -- bit-mask with a bit per jiffy of bus-master activity |
65 | * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms | 65 | * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms |
66 | * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms | 66 | * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms |
67 | * 100 HZ: 0x0000000F: 4 jiffies = 40ms | 67 | * 100 HZ: 0x0000000F: 4 jiffies = 40ms |
68 | * reduce history for more aggressive entry into C3 | 68 | * reduce history for more aggressive entry into C3 |
69 | */ | 69 | */ |
70 | static unsigned int bm_history = (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); | 70 | static unsigned int bm_history = (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); |
71 | module_param(bm_history, uint, 0644); | 71 | module_param(bm_history, uint, 0644); |
72 | /* -------------------------------------------------------------------------- | 72 | /* -------------------------------------------------------------------------- |
73 | Power Management | 73 | Power Management |
74 | -------------------------------------------------------------------------- */ | 74 | -------------------------------------------------------------------------- */ |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. | 77 | * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. |
78 | * For now disable this. Probably a bug somewhere else. | 78 | * For now disable this. Probably a bug somewhere else. |
79 | * | 79 | * |
80 | * To skip this limit, boot/load with a large max_cstate limit. | 80 | * To skip this limit, boot/load with a large max_cstate limit. |
81 | */ | 81 | */ |
82 | static int no_c2c3(struct dmi_system_id *id) | 82 | static int no_c2c3(struct dmi_system_id *id) |
83 | { | 83 | { |
84 | if (max_cstate > ACPI_PROCESSOR_MAX_POWER) | 84 | if (max_cstate > ACPI_PROCESSOR_MAX_POWER) |
85 | return 0; | 85 | return 0; |
86 | 86 | ||
87 | printk(KERN_NOTICE PREFIX "%s detected - C2,C3 disabled." | 87 | printk(KERN_NOTICE PREFIX "%s detected - C2,C3 disabled." |
88 | " Override with \"processor.max_cstate=%d\"\n", id->ident, | 88 | " Override with \"processor.max_cstate=%d\"\n", id->ident, |
89 | ACPI_PROCESSOR_MAX_POWER + 1); | 89 | ACPI_PROCESSOR_MAX_POWER + 1); |
90 | 90 | ||
91 | max_cstate = 1; | 91 | max_cstate = 1; |
92 | 92 | ||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | 96 | ||
97 | 97 | ||
98 | 98 | ||
99 | static struct dmi_system_id __initdata processor_power_dmi_table[] = { | 99 | static struct dmi_system_id __initdata processor_power_dmi_table[] = { |
100 | { no_c2c3, "IBM ThinkPad R40e", { | 100 | { no_c2c3, "IBM ThinkPad R40e", { |
101 | DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), | 101 | DMI_MATCH(DMI_BIOS_VENDOR,"IBM"), |
102 | DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }}, | 102 | DMI_MATCH(DMI_BIOS_VERSION,"1SET60WW") }}, |
103 | { no_c2c3, "Medion 41700", { | 103 | { no_c2c3, "Medion 41700", { |
104 | DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), | 104 | DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), |
105 | DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J") }}, | 105 | DMI_MATCH(DMI_BIOS_VERSION,"R01-A1J") }}, |
106 | {}, | 106 | {}, |
107 | }; | 107 | }; |
108 | 108 | ||
109 | 109 | ||
110 | static inline u32 | 110 | static inline u32 |
111 | ticks_elapsed ( | 111 | ticks_elapsed ( |
112 | u32 t1, | 112 | u32 t1, |
113 | u32 t2) | 113 | u32 t2) |
114 | { | 114 | { |
115 | if (t2 >= t1) | 115 | if (t2 >= t1) |
116 | return (t2 - t1); | 116 | return (t2 - t1); |
117 | else if (!acpi_fadt.tmr_val_ext) | 117 | else if (!acpi_fadt.tmr_val_ext) |
118 | return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); | 118 | return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); |
119 | else | 119 | else |
120 | return ((0xFFFFFFFF - t1) + t2); | 120 | return ((0xFFFFFFFF - t1) + t2); |
121 | } | 121 | } |
122 | 122 | ||
123 | 123 | ||
124 | static void | 124 | static void |
125 | acpi_processor_power_activate ( | 125 | acpi_processor_power_activate ( |
126 | struct acpi_processor *pr, | 126 | struct acpi_processor *pr, |
127 | struct acpi_processor_cx *new) | 127 | struct acpi_processor_cx *new) |
128 | { | 128 | { |
129 | struct acpi_processor_cx *old; | 129 | struct acpi_processor_cx *old; |
130 | 130 | ||
131 | if (!pr || !new) | 131 | if (!pr || !new) |
132 | return; | 132 | return; |
133 | 133 | ||
134 | old = pr->power.state; | 134 | old = pr->power.state; |
135 | 135 | ||
136 | if (old) | 136 | if (old) |
137 | old->promotion.count = 0; | 137 | old->promotion.count = 0; |
138 | new->demotion.count = 0; | 138 | new->demotion.count = 0; |
139 | 139 | ||
140 | /* Cleanup from old state. */ | 140 | /* Cleanup from old state. */ |
141 | if (old) { | 141 | if (old) { |
142 | switch (old->type) { | 142 | switch (old->type) { |
143 | case ACPI_STATE_C3: | 143 | case ACPI_STATE_C3: |
144 | /* Disable bus master reload */ | 144 | /* Disable bus master reload */ |
145 | if (new->type != ACPI_STATE_C3) | 145 | if (new->type != ACPI_STATE_C3) |
146 | acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, ACPI_MTX_DO_NOT_LOCK); | 146 | acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, ACPI_MTX_DO_NOT_LOCK); |
147 | break; | 147 | break; |
148 | } | 148 | } |
149 | } | 149 | } |
150 | 150 | ||
151 | /* Prepare to use new state. */ | 151 | /* Prepare to use new state. */ |
152 | switch (new->type) { | 152 | switch (new->type) { |
153 | case ACPI_STATE_C3: | 153 | case ACPI_STATE_C3: |
154 | /* Enable bus master reload */ | 154 | /* Enable bus master reload */ |
155 | if (old->type != ACPI_STATE_C3) | 155 | if (old->type != ACPI_STATE_C3) |
156 | acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1, ACPI_MTX_DO_NOT_LOCK); | 156 | acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1, ACPI_MTX_DO_NOT_LOCK); |
157 | break; | 157 | break; |
158 | } | 158 | } |
159 | 159 | ||
160 | pr->power.state = new; | 160 | pr->power.state = new; |
161 | 161 | ||
162 | return; | 162 | return; |
163 | } | 163 | } |
164 | 164 | ||
165 | 165 | ||
166 | static void acpi_processor_idle (void) | 166 | static void acpi_processor_idle (void) |
167 | { | 167 | { |
168 | struct acpi_processor *pr = NULL; | 168 | struct acpi_processor *pr = NULL; |
169 | struct acpi_processor_cx *cx = NULL; | 169 | struct acpi_processor_cx *cx = NULL; |
170 | struct acpi_processor_cx *next_state = NULL; | 170 | struct acpi_processor_cx *next_state = NULL; |
171 | int sleep_ticks = 0; | 171 | int sleep_ticks = 0; |
172 | u32 t1, t2 = 0; | 172 | u32 t1, t2 = 0; |
173 | 173 | ||
174 | pr = processors[_smp_processor_id()]; | 174 | pr = processors[raw_smp_processor_id()]; |
175 | if (!pr) | 175 | if (!pr) |
176 | return; | 176 | return; |
177 | 177 | ||
178 | /* | 178 | /* |
179 | * Interrupts must be disabled during bus mastering calculations and | 179 | * Interrupts must be disabled during bus mastering calculations and |
180 | * for C2/C3 transitions. | 180 | * for C2/C3 transitions. |
181 | */ | 181 | */ |
182 | local_irq_disable(); | 182 | local_irq_disable(); |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * Check whether we truly need to go idle, or should | 185 | * Check whether we truly need to go idle, or should |
186 | * reschedule: | 186 | * reschedule: |
187 | */ | 187 | */ |
188 | if (unlikely(need_resched())) { | 188 | if (unlikely(need_resched())) { |
189 | local_irq_enable(); | 189 | local_irq_enable(); |
190 | return; | 190 | return; |
191 | } | 191 | } |
192 | 192 | ||
193 | cx = pr->power.state; | 193 | cx = pr->power.state; |
194 | if (!cx) | 194 | if (!cx) |
195 | goto easy_out; | 195 | goto easy_out; |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * Check BM Activity | 198 | * Check BM Activity |
199 | * ----------------- | 199 | * ----------------- |
200 | * Check for bus mastering activity (if required), record, and check | 200 | * Check for bus mastering activity (if required), record, and check |
201 | * for demotion. | 201 | * for demotion. |
202 | */ | 202 | */ |
203 | if (pr->flags.bm_check) { | 203 | if (pr->flags.bm_check) { |
204 | u32 bm_status = 0; | 204 | u32 bm_status = 0; |
205 | unsigned long diff = jiffies - pr->power.bm_check_timestamp; | 205 | unsigned long diff = jiffies - pr->power.bm_check_timestamp; |
206 | 206 | ||
207 | if (diff > 32) | 207 | if (diff > 32) |
208 | diff = 32; | 208 | diff = 32; |
209 | 209 | ||
210 | while (diff) { | 210 | while (diff) { |
211 | /* if we didn't get called, assume there was busmaster activity */ | 211 | /* if we didn't get called, assume there was busmaster activity */ |
212 | diff--; | 212 | diff--; |
213 | if (diff) | 213 | if (diff) |
214 | pr->power.bm_activity |= 0x1; | 214 | pr->power.bm_activity |= 0x1; |
215 | pr->power.bm_activity <<= 1; | 215 | pr->power.bm_activity <<= 1; |
216 | } | 216 | } |
217 | 217 | ||
218 | acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, | 218 | acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, |
219 | &bm_status, ACPI_MTX_DO_NOT_LOCK); | 219 | &bm_status, ACPI_MTX_DO_NOT_LOCK); |
220 | if (bm_status) { | 220 | if (bm_status) { |
221 | pr->power.bm_activity++; | 221 | pr->power.bm_activity++; |
222 | acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, | 222 | acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, |
223 | 1, ACPI_MTX_DO_NOT_LOCK); | 223 | 1, ACPI_MTX_DO_NOT_LOCK); |
224 | } | 224 | } |
225 | /* | 225 | /* |
226 | * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect | 226 | * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect |
227 | * the true state of bus mastering activity; forcing us to | 227 | * the true state of bus mastering activity; forcing us to |
228 | * manually check the BMIDEA bit of each IDE channel. | 228 | * manually check the BMIDEA bit of each IDE channel. |
229 | */ | 229 | */ |
230 | else if (errata.piix4.bmisx) { | 230 | else if (errata.piix4.bmisx) { |
231 | if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) | 231 | if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) |
232 | || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) | 232 | || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) |
233 | pr->power.bm_activity++; | 233 | pr->power.bm_activity++; |
234 | } | 234 | } |
235 | 235 | ||
236 | pr->power.bm_check_timestamp = jiffies; | 236 | pr->power.bm_check_timestamp = jiffies; |
237 | 237 | ||
238 | /* | 238 | /* |
239 | * Apply bus mastering demotion policy. Automatically demote | 239 | * Apply bus mastering demotion policy. Automatically demote |
240 | * to avoid a faulty transition. Note that the processor | 240 | * to avoid a faulty transition. Note that the processor |
241 | * won't enter a low-power state during this call (to this | 241 | * won't enter a low-power state during this call (to this |
242 | * funciton) but should upon the next. | 242 | * funciton) but should upon the next. |
243 | * | 243 | * |
244 | * TBD: A better policy might be to fallback to the demotion | 244 | * TBD: A better policy might be to fallback to the demotion |
245 | * state (use it for this quantum only) istead of | 245 | * state (use it for this quantum only) istead of |
246 | * demoting -- and rely on duration as our sole demotion | 246 | * demoting -- and rely on duration as our sole demotion |
247 | * qualification. This may, however, introduce DMA | 247 | * qualification. This may, however, introduce DMA |
248 | * issues (e.g. floppy DMA transfer overrun/underrun). | 248 | * issues (e.g. floppy DMA transfer overrun/underrun). |
249 | */ | 249 | */ |
250 | if (pr->power.bm_activity & cx->demotion.threshold.bm) { | 250 | if (pr->power.bm_activity & cx->demotion.threshold.bm) { |
251 | local_irq_enable(); | 251 | local_irq_enable(); |
252 | next_state = cx->demotion.state; | 252 | next_state = cx->demotion.state; |
253 | goto end; | 253 | goto end; |
254 | } | 254 | } |
255 | } | 255 | } |
256 | 256 | ||
257 | cx->usage++; | 257 | cx->usage++; |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * Sleep: | 260 | * Sleep: |
261 | * ------ | 261 | * ------ |
262 | * Invoke the current Cx state to put the processor to sleep. | 262 | * Invoke the current Cx state to put the processor to sleep. |
263 | */ | 263 | */ |
264 | switch (cx->type) { | 264 | switch (cx->type) { |
265 | 265 | ||
266 | case ACPI_STATE_C1: | 266 | case ACPI_STATE_C1: |
267 | /* | 267 | /* |
268 | * Invoke C1. | 268 | * Invoke C1. |
269 | * Use the appropriate idle routine, the one that would | 269 | * Use the appropriate idle routine, the one that would |
270 | * be used without acpi C-states. | 270 | * be used without acpi C-states. |
271 | */ | 271 | */ |
272 | if (pm_idle_save) | 272 | if (pm_idle_save) |
273 | pm_idle_save(); | 273 | pm_idle_save(); |
274 | else | 274 | else |
275 | safe_halt(); | 275 | safe_halt(); |
276 | /* | 276 | /* |
277 | * TBD: Can't get time duration while in C1, as resumes | 277 | * TBD: Can't get time duration while in C1, as resumes |
278 | * go to an ISR rather than here. Need to instrument | 278 | * go to an ISR rather than here. Need to instrument |
279 | * base interrupt handler. | 279 | * base interrupt handler. |
280 | */ | 280 | */ |
281 | sleep_ticks = 0xFFFFFFFF; | 281 | sleep_ticks = 0xFFFFFFFF; |
282 | break; | 282 | break; |
283 | 283 | ||
284 | case ACPI_STATE_C2: | 284 | case ACPI_STATE_C2: |
285 | /* Get start time (ticks) */ | 285 | /* Get start time (ticks) */ |
286 | t1 = inl(acpi_fadt.xpm_tmr_blk.address); | 286 | t1 = inl(acpi_fadt.xpm_tmr_blk.address); |
287 | /* Invoke C2 */ | 287 | /* Invoke C2 */ |
288 | inb(cx->address); | 288 | inb(cx->address); |
289 | /* Dummy op - must do something useless after P_LVL2 read */ | 289 | /* Dummy op - must do something useless after P_LVL2 read */ |
290 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); | 290 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); |
291 | /* Get end time (ticks) */ | 291 | /* Get end time (ticks) */ |
292 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); | 292 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); |
293 | /* Re-enable interrupts */ | 293 | /* Re-enable interrupts */ |
294 | local_irq_enable(); | 294 | local_irq_enable(); |
295 | /* Compute time (ticks) that we were actually asleep */ | 295 | /* Compute time (ticks) that we were actually asleep */ |
296 | sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; | 296 | sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; |
297 | break; | 297 | break; |
298 | 298 | ||
299 | case ACPI_STATE_C3: | 299 | case ACPI_STATE_C3: |
300 | /* Disable bus master arbitration */ | 300 | /* Disable bus master arbitration */ |
301 | acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); | 301 | acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, ACPI_MTX_DO_NOT_LOCK); |
302 | /* Get start time (ticks) */ | 302 | /* Get start time (ticks) */ |
303 | t1 = inl(acpi_fadt.xpm_tmr_blk.address); | 303 | t1 = inl(acpi_fadt.xpm_tmr_blk.address); |
304 | /* Invoke C3 */ | 304 | /* Invoke C3 */ |
305 | inb(cx->address); | 305 | inb(cx->address); |
306 | /* Dummy op - must do something useless after P_LVL3 read */ | 306 | /* Dummy op - must do something useless after P_LVL3 read */ |
307 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); | 307 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); |
308 | /* Get end time (ticks) */ | 308 | /* Get end time (ticks) */ |
309 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); | 309 | t2 = inl(acpi_fadt.xpm_tmr_blk.address); |
310 | /* Enable bus master arbitration */ | 310 | /* Enable bus master arbitration */ |
311 | acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK); | 311 | acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, ACPI_MTX_DO_NOT_LOCK); |
312 | /* Re-enable interrupts */ | 312 | /* Re-enable interrupts */ |
313 | local_irq_enable(); | 313 | local_irq_enable(); |
314 | /* Compute time (ticks) that we were actually asleep */ | 314 | /* Compute time (ticks) that we were actually asleep */ |
315 | sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; | 315 | sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD; |
316 | break; | 316 | break; |
317 | 317 | ||
318 | default: | 318 | default: |
319 | local_irq_enable(); | 319 | local_irq_enable(); |
320 | return; | 320 | return; |
321 | } | 321 | } |
322 | 322 | ||
323 | next_state = pr->power.state; | 323 | next_state = pr->power.state; |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * Promotion? | 326 | * Promotion? |
327 | * ---------- | 327 | * ---------- |
328 | * Track the number of longs (time asleep is greater than threshold) | 328 | * Track the number of longs (time asleep is greater than threshold) |
329 | * and promote when the count threshold is reached. Note that bus | 329 | * and promote when the count threshold is reached. Note that bus |
330 | * mastering activity may prevent promotions. | 330 | * mastering activity may prevent promotions. |
331 | * Do not promote above max_cstate. | 331 | * Do not promote above max_cstate. |
332 | */ | 332 | */ |
333 | if (cx->promotion.state && | 333 | if (cx->promotion.state && |
334 | ((cx->promotion.state - pr->power.states) <= max_cstate)) { | 334 | ((cx->promotion.state - pr->power.states) <= max_cstate)) { |
335 | if (sleep_ticks > cx->promotion.threshold.ticks) { | 335 | if (sleep_ticks > cx->promotion.threshold.ticks) { |
336 | cx->promotion.count++; | 336 | cx->promotion.count++; |
337 | cx->demotion.count = 0; | 337 | cx->demotion.count = 0; |
338 | if (cx->promotion.count >= cx->promotion.threshold.count) { | 338 | if (cx->promotion.count >= cx->promotion.threshold.count) { |
339 | if (pr->flags.bm_check) { | 339 | if (pr->flags.bm_check) { |
340 | if (!(pr->power.bm_activity & cx->promotion.threshold.bm)) { | 340 | if (!(pr->power.bm_activity & cx->promotion.threshold.bm)) { |
341 | next_state = cx->promotion.state; | 341 | next_state = cx->promotion.state; |
342 | goto end; | 342 | goto end; |
343 | } | 343 | } |
344 | } | 344 | } |
345 | else { | 345 | else { |
346 | next_state = cx->promotion.state; | 346 | next_state = cx->promotion.state; |
347 | goto end; | 347 | goto end; |
348 | } | 348 | } |
349 | } | 349 | } |
350 | } | 350 | } |
351 | } | 351 | } |
352 | 352 | ||
353 | /* | 353 | /* |
354 | * Demotion? | 354 | * Demotion? |
355 | * --------- | 355 | * --------- |
356 | * Track the number of shorts (time asleep is less than time threshold) | 356 | * Track the number of shorts (time asleep is less than time threshold) |
357 | * and demote when the usage threshold is reached. | 357 | * and demote when the usage threshold is reached. |
358 | */ | 358 | */ |
359 | if (cx->demotion.state) { | 359 | if (cx->demotion.state) { |
360 | if (sleep_ticks < cx->demotion.threshold.ticks) { | 360 | if (sleep_ticks < cx->demotion.threshold.ticks) { |
361 | cx->demotion.count++; | 361 | cx->demotion.count++; |
362 | cx->promotion.count = 0; | 362 | cx->promotion.count = 0; |
363 | if (cx->demotion.count >= cx->demotion.threshold.count) { | 363 | if (cx->demotion.count >= cx->demotion.threshold.count) { |
364 | next_state = cx->demotion.state; | 364 | next_state = cx->demotion.state; |
365 | goto end; | 365 | goto end; |
366 | } | 366 | } |
367 | } | 367 | } |
368 | } | 368 | } |
369 | 369 | ||
370 | end: | 370 | end: |
371 | /* | 371 | /* |
372 | * Demote if current state exceeds max_cstate | 372 | * Demote if current state exceeds max_cstate |
373 | */ | 373 | */ |
374 | if ((pr->power.state - pr->power.states) > max_cstate) { | 374 | if ((pr->power.state - pr->power.states) > max_cstate) { |
375 | if (cx->demotion.state) | 375 | if (cx->demotion.state) |
376 | next_state = cx->demotion.state; | 376 | next_state = cx->demotion.state; |
377 | } | 377 | } |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * New Cx State? | 380 | * New Cx State? |
381 | * ------------- | 381 | * ------------- |
382 | * If we're going to start using a new Cx state we must clean up | 382 | * If we're going to start using a new Cx state we must clean up |
383 | * from the previous and prepare to use the new. | 383 | * from the previous and prepare to use the new. |
384 | */ | 384 | */ |
385 | if (next_state != pr->power.state) | 385 | if (next_state != pr->power.state) |
386 | acpi_processor_power_activate(pr, next_state); | 386 | acpi_processor_power_activate(pr, next_state); |
387 | 387 | ||
388 | return; | 388 | return; |
389 | 389 | ||
390 | easy_out: | 390 | easy_out: |
391 | /* do C1 instead of busy loop */ | 391 | /* do C1 instead of busy loop */ |
392 | if (pm_idle_save) | 392 | if (pm_idle_save) |
393 | pm_idle_save(); | 393 | pm_idle_save(); |
394 | else | 394 | else |
395 | safe_halt(); | 395 | safe_halt(); |
396 | return; | 396 | return; |
397 | } | 397 | } |
398 | 398 | ||
399 | 399 | ||
400 | static int | 400 | static int |
401 | acpi_processor_set_power_policy ( | 401 | acpi_processor_set_power_policy ( |
402 | struct acpi_processor *pr) | 402 | struct acpi_processor *pr) |
403 | { | 403 | { |
404 | unsigned int i; | 404 | unsigned int i; |
405 | unsigned int state_is_set = 0; | 405 | unsigned int state_is_set = 0; |
406 | struct acpi_processor_cx *lower = NULL; | 406 | struct acpi_processor_cx *lower = NULL; |
407 | struct acpi_processor_cx *higher = NULL; | 407 | struct acpi_processor_cx *higher = NULL; |
408 | struct acpi_processor_cx *cx; | 408 | struct acpi_processor_cx *cx; |
409 | 409 | ||
410 | ACPI_FUNCTION_TRACE("acpi_processor_set_power_policy"); | 410 | ACPI_FUNCTION_TRACE("acpi_processor_set_power_policy"); |
411 | 411 | ||
412 | if (!pr) | 412 | if (!pr) |
413 | return_VALUE(-EINVAL); | 413 | return_VALUE(-EINVAL); |
414 | 414 | ||
415 | /* | 415 | /* |
416 | * This function sets the default Cx state policy (OS idle handler). | 416 | * This function sets the default Cx state policy (OS idle handler). |
417 | * Our scheme is to promote quickly to C2 but more conservatively | 417 | * Our scheme is to promote quickly to C2 but more conservatively |
418 | * to C3. We're favoring C2 for its characteristics of low latency | 418 | * to C3. We're favoring C2 for its characteristics of low latency |
419 | * (quick response), good power savings, and ability to allow bus | 419 | * (quick response), good power savings, and ability to allow bus |
420 | * mastering activity. Note that the Cx state policy is completely | 420 | * mastering activity. Note that the Cx state policy is completely |
421 | * customizable and can be altered dynamically. | 421 | * customizable and can be altered dynamically. |
422 | */ | 422 | */ |
423 | 423 | ||
424 | /* startup state */ | 424 | /* startup state */ |
425 | for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) { | 425 | for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) { |
426 | cx = &pr->power.states[i]; | 426 | cx = &pr->power.states[i]; |
427 | if (!cx->valid) | 427 | if (!cx->valid) |
428 | continue; | 428 | continue; |
429 | 429 | ||
430 | if (!state_is_set) | 430 | if (!state_is_set) |
431 | pr->power.state = cx; | 431 | pr->power.state = cx; |
432 | state_is_set++; | 432 | state_is_set++; |
433 | break; | 433 | break; |
434 | } | 434 | } |
435 | 435 | ||
436 | if (!state_is_set) | 436 | if (!state_is_set) |
437 | return_VALUE(-ENODEV); | 437 | return_VALUE(-ENODEV); |
438 | 438 | ||
439 | /* demotion */ | 439 | /* demotion */ |
440 | for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) { | 440 | for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) { |
441 | cx = &pr->power.states[i]; | 441 | cx = &pr->power.states[i]; |
442 | if (!cx->valid) | 442 | if (!cx->valid) |
443 | continue; | 443 | continue; |
444 | 444 | ||
445 | if (lower) { | 445 | if (lower) { |
446 | cx->demotion.state = lower; | 446 | cx->demotion.state = lower; |
447 | cx->demotion.threshold.ticks = cx->latency_ticks; | 447 | cx->demotion.threshold.ticks = cx->latency_ticks; |
448 | cx->demotion.threshold.count = 1; | 448 | cx->demotion.threshold.count = 1; |
449 | if (cx->type == ACPI_STATE_C3) | 449 | if (cx->type == ACPI_STATE_C3) |
450 | cx->demotion.threshold.bm = bm_history; | 450 | cx->demotion.threshold.bm = bm_history; |
451 | } | 451 | } |
452 | 452 | ||
453 | lower = cx; | 453 | lower = cx; |
454 | } | 454 | } |
455 | 455 | ||
456 | /* promotion */ | 456 | /* promotion */ |
457 | for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { | 457 | for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { |
458 | cx = &pr->power.states[i]; | 458 | cx = &pr->power.states[i]; |
459 | if (!cx->valid) | 459 | if (!cx->valid) |
460 | continue; | 460 | continue; |
461 | 461 | ||
462 | if (higher) { | 462 | if (higher) { |
463 | cx->promotion.state = higher; | 463 | cx->promotion.state = higher; |
464 | cx->promotion.threshold.ticks = cx->latency_ticks; | 464 | cx->promotion.threshold.ticks = cx->latency_ticks; |
465 | if (cx->type >= ACPI_STATE_C2) | 465 | if (cx->type >= ACPI_STATE_C2) |
466 | cx->promotion.threshold.count = 4; | 466 | cx->promotion.threshold.count = 4; |
467 | else | 467 | else |
468 | cx->promotion.threshold.count = 10; | 468 | cx->promotion.threshold.count = 10; |
469 | if (higher->type == ACPI_STATE_C3) | 469 | if (higher->type == ACPI_STATE_C3) |
470 | cx->promotion.threshold.bm = bm_history; | 470 | cx->promotion.threshold.bm = bm_history; |
471 | } | 471 | } |
472 | 472 | ||
473 | higher = cx; | 473 | higher = cx; |
474 | } | 474 | } |
475 | 475 | ||
476 | return_VALUE(0); | 476 | return_VALUE(0); |
477 | } | 477 | } |
478 | 478 | ||
479 | 479 | ||
480 | static int acpi_processor_get_power_info_fadt (struct acpi_processor *pr) | 480 | static int acpi_processor_get_power_info_fadt (struct acpi_processor *pr) |
481 | { | 481 | { |
482 | int i; | 482 | int i; |
483 | 483 | ||
484 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_fadt"); | 484 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_fadt"); |
485 | 485 | ||
486 | if (!pr) | 486 | if (!pr) |
487 | return_VALUE(-EINVAL); | 487 | return_VALUE(-EINVAL); |
488 | 488 | ||
489 | if (!pr->pblk) | 489 | if (!pr->pblk) |
490 | return_VALUE(-ENODEV); | 490 | return_VALUE(-ENODEV); |
491 | 491 | ||
492 | for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++) | 492 | for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++) |
493 | memset(pr->power.states, 0, sizeof(struct acpi_processor_cx)); | 493 | memset(pr->power.states, 0, sizeof(struct acpi_processor_cx)); |
494 | 494 | ||
495 | /* if info is obtained from pblk/fadt, type equals state */ | 495 | /* if info is obtained from pblk/fadt, type equals state */ |
496 | pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; | 496 | pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1; |
497 | pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; | 497 | pr->power.states[ACPI_STATE_C2].type = ACPI_STATE_C2; |
498 | pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; | 498 | pr->power.states[ACPI_STATE_C3].type = ACPI_STATE_C3; |
499 | 499 | ||
500 | /* the C0 state only exists as a filler in our array, | 500 | /* the C0 state only exists as a filler in our array, |
501 | * and all processors need to support C1 */ | 501 | * and all processors need to support C1 */ |
502 | pr->power.states[ACPI_STATE_C0].valid = 1; | 502 | pr->power.states[ACPI_STATE_C0].valid = 1; |
503 | pr->power.states[ACPI_STATE_C1].valid = 1; | 503 | pr->power.states[ACPI_STATE_C1].valid = 1; |
504 | 504 | ||
505 | /* determine C2 and C3 address from pblk */ | 505 | /* determine C2 and C3 address from pblk */ |
506 | pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; | 506 | pr->power.states[ACPI_STATE_C2].address = pr->pblk + 4; |
507 | pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; | 507 | pr->power.states[ACPI_STATE_C3].address = pr->pblk + 5; |
508 | 508 | ||
509 | /* determine latencies from FADT */ | 509 | /* determine latencies from FADT */ |
510 | pr->power.states[ACPI_STATE_C2].latency = acpi_fadt.plvl2_lat; | 510 | pr->power.states[ACPI_STATE_C2].latency = acpi_fadt.plvl2_lat; |
511 | pr->power.states[ACPI_STATE_C3].latency = acpi_fadt.plvl3_lat; | 511 | pr->power.states[ACPI_STATE_C3].latency = acpi_fadt.plvl3_lat; |
512 | 512 | ||
513 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 513 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
514 | "lvl2[0x%08x] lvl3[0x%08x]\n", | 514 | "lvl2[0x%08x] lvl3[0x%08x]\n", |
515 | pr->power.states[ACPI_STATE_C2].address, | 515 | pr->power.states[ACPI_STATE_C2].address, |
516 | pr->power.states[ACPI_STATE_C3].address)); | 516 | pr->power.states[ACPI_STATE_C3].address)); |
517 | 517 | ||
518 | return_VALUE(0); | 518 | return_VALUE(0); |
519 | } | 519 | } |
520 | 520 | ||
521 | 521 | ||
522 | static int acpi_processor_get_power_info_cst (struct acpi_processor *pr) | 522 | static int acpi_processor_get_power_info_cst (struct acpi_processor *pr) |
523 | { | 523 | { |
524 | acpi_status status = 0; | 524 | acpi_status status = 0; |
525 | acpi_integer count; | 525 | acpi_integer count; |
526 | int i; | 526 | int i; |
527 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | 527 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; |
528 | union acpi_object *cst; | 528 | union acpi_object *cst; |
529 | 529 | ||
530 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_cst"); | 530 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_info_cst"); |
531 | 531 | ||
532 | if (errata.smp) | 532 | if (errata.smp) |
533 | return_VALUE(-ENODEV); | 533 | return_VALUE(-ENODEV); |
534 | 534 | ||
535 | if (nocst) | 535 | if (nocst) |
536 | return_VALUE(-ENODEV); | 536 | return_VALUE(-ENODEV); |
537 | 537 | ||
538 | pr->power.count = 0; | 538 | pr->power.count = 0; |
539 | for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++) | 539 | for (i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++) |
540 | memset(pr->power.states, 0, sizeof(struct acpi_processor_cx)); | 540 | memset(pr->power.states, 0, sizeof(struct acpi_processor_cx)); |
541 | 541 | ||
542 | status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); | 542 | status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer); |
543 | if (ACPI_FAILURE(status)) { | 543 | if (ACPI_FAILURE(status)) { |
544 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); | 544 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No _CST, giving up\n")); |
545 | return_VALUE(-ENODEV); | 545 | return_VALUE(-ENODEV); |
546 | } | 546 | } |
547 | 547 | ||
548 | cst = (union acpi_object *) buffer.pointer; | 548 | cst = (union acpi_object *) buffer.pointer; |
549 | 549 | ||
550 | /* There must be at least 2 elements */ | 550 | /* There must be at least 2 elements */ |
551 | if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { | 551 | if (!cst || (cst->type != ACPI_TYPE_PACKAGE) || cst->package.count < 2) { |
552 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "not enough elements in _CST\n")); | 552 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "not enough elements in _CST\n")); |
553 | status = -EFAULT; | 553 | status = -EFAULT; |
554 | goto end; | 554 | goto end; |
555 | } | 555 | } |
556 | 556 | ||
557 | count = cst->package.elements[0].integer.value; | 557 | count = cst->package.elements[0].integer.value; |
558 | 558 | ||
559 | /* Validate number of power states. */ | 559 | /* Validate number of power states. */ |
560 | if (count < 1 || count != cst->package.count - 1) { | 560 | if (count < 1 || count != cst->package.count - 1) { |
561 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "count given by _CST is not valid\n")); | 561 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "count given by _CST is not valid\n")); |
562 | status = -EFAULT; | 562 | status = -EFAULT; |
563 | goto end; | 563 | goto end; |
564 | } | 564 | } |
565 | 565 | ||
566 | /* We support up to ACPI_PROCESSOR_MAX_POWER. */ | 566 | /* We support up to ACPI_PROCESSOR_MAX_POWER. */ |
567 | if (count > ACPI_PROCESSOR_MAX_POWER) { | 567 | if (count > ACPI_PROCESSOR_MAX_POWER) { |
568 | printk(KERN_WARNING "Limiting number of power states to max (%d)\n", ACPI_PROCESSOR_MAX_POWER); | 568 | printk(KERN_WARNING "Limiting number of power states to max (%d)\n", ACPI_PROCESSOR_MAX_POWER); |
569 | printk(KERN_WARNING "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); | 569 | printk(KERN_WARNING "Please increase ACPI_PROCESSOR_MAX_POWER if needed.\n"); |
570 | count = ACPI_PROCESSOR_MAX_POWER; | 570 | count = ACPI_PROCESSOR_MAX_POWER; |
571 | } | 571 | } |
572 | 572 | ||
573 | /* Tell driver that at least _CST is supported. */ | 573 | /* Tell driver that at least _CST is supported. */ |
574 | pr->flags.has_cst = 1; | 574 | pr->flags.has_cst = 1; |
575 | 575 | ||
576 | for (i = 1; i <= count; i++) { | 576 | for (i = 1; i <= count; i++) { |
577 | union acpi_object *element; | 577 | union acpi_object *element; |
578 | union acpi_object *obj; | 578 | union acpi_object *obj; |
579 | struct acpi_power_register *reg; | 579 | struct acpi_power_register *reg; |
580 | struct acpi_processor_cx cx; | 580 | struct acpi_processor_cx cx; |
581 | 581 | ||
582 | memset(&cx, 0, sizeof(cx)); | 582 | memset(&cx, 0, sizeof(cx)); |
583 | 583 | ||
584 | element = (union acpi_object *) &(cst->package.elements[i]); | 584 | element = (union acpi_object *) &(cst->package.elements[i]); |
585 | if (element->type != ACPI_TYPE_PACKAGE) | 585 | if (element->type != ACPI_TYPE_PACKAGE) |
586 | continue; | 586 | continue; |
587 | 587 | ||
588 | if (element->package.count != 4) | 588 | if (element->package.count != 4) |
589 | continue; | 589 | continue; |
590 | 590 | ||
591 | obj = (union acpi_object *) &(element->package.elements[0]); | 591 | obj = (union acpi_object *) &(element->package.elements[0]); |
592 | 592 | ||
593 | if (obj->type != ACPI_TYPE_BUFFER) | 593 | if (obj->type != ACPI_TYPE_BUFFER) |
594 | continue; | 594 | continue; |
595 | 595 | ||
596 | reg = (struct acpi_power_register *) obj->buffer.pointer; | 596 | reg = (struct acpi_power_register *) obj->buffer.pointer; |
597 | 597 | ||
598 | if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && | 598 | if (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO && |
599 | (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) | 599 | (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) |
600 | continue; | 600 | continue; |
601 | 601 | ||
602 | cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ? | 602 | cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ? |
603 | 0 : reg->address; | 603 | 0 : reg->address; |
604 | 604 | ||
605 | /* There should be an easy way to extract an integer... */ | 605 | /* There should be an easy way to extract an integer... */ |
606 | obj = (union acpi_object *) &(element->package.elements[1]); | 606 | obj = (union acpi_object *) &(element->package.elements[1]); |
607 | if (obj->type != ACPI_TYPE_INTEGER) | 607 | if (obj->type != ACPI_TYPE_INTEGER) |
608 | continue; | 608 | continue; |
609 | 609 | ||
610 | cx.type = obj->integer.value; | 610 | cx.type = obj->integer.value; |
611 | 611 | ||
612 | if ((cx.type != ACPI_STATE_C1) && | 612 | if ((cx.type != ACPI_STATE_C1) && |
613 | (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) | 613 | (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) |
614 | continue; | 614 | continue; |
615 | 615 | ||
616 | if ((cx.type < ACPI_STATE_C1) || | 616 | if ((cx.type < ACPI_STATE_C1) || |
617 | (cx.type > ACPI_STATE_C3)) | 617 | (cx.type > ACPI_STATE_C3)) |
618 | continue; | 618 | continue; |
619 | 619 | ||
620 | obj = (union acpi_object *) &(element->package.elements[2]); | 620 | obj = (union acpi_object *) &(element->package.elements[2]); |
621 | if (obj->type != ACPI_TYPE_INTEGER) | 621 | if (obj->type != ACPI_TYPE_INTEGER) |
622 | continue; | 622 | continue; |
623 | 623 | ||
624 | cx.latency = obj->integer.value; | 624 | cx.latency = obj->integer.value; |
625 | 625 | ||
626 | obj = (union acpi_object *) &(element->package.elements[3]); | 626 | obj = (union acpi_object *) &(element->package.elements[3]); |
627 | if (obj->type != ACPI_TYPE_INTEGER) | 627 | if (obj->type != ACPI_TYPE_INTEGER) |
628 | continue; | 628 | continue; |
629 | 629 | ||
630 | cx.power = obj->integer.value; | 630 | cx.power = obj->integer.value; |
631 | 631 | ||
632 | (pr->power.count)++; | 632 | (pr->power.count)++; |
633 | memcpy(&(pr->power.states[pr->power.count]), &cx, sizeof(cx)); | 633 | memcpy(&(pr->power.states[pr->power.count]), &cx, sizeof(cx)); |
634 | } | 634 | } |
635 | 635 | ||
636 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", pr->power.count)); | 636 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d power states\n", pr->power.count)); |
637 | 637 | ||
638 | /* Validate number of power states discovered */ | 638 | /* Validate number of power states discovered */ |
639 | if (pr->power.count < 2) | 639 | if (pr->power.count < 2) |
640 | status = -ENODEV; | 640 | status = -ENODEV; |
641 | 641 | ||
642 | end: | 642 | end: |
643 | acpi_os_free(buffer.pointer); | 643 | acpi_os_free(buffer.pointer); |
644 | 644 | ||
645 | return_VALUE(status); | 645 | return_VALUE(status); |
646 | } | 646 | } |
647 | 647 | ||
648 | 648 | ||
649 | static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) | 649 | static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) |
650 | { | 650 | { |
651 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c2"); | 651 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c2"); |
652 | 652 | ||
653 | if (!cx->address) | 653 | if (!cx->address) |
654 | return_VOID; | 654 | return_VOID; |
655 | 655 | ||
656 | /* | 656 | /* |
657 | * C2 latency must be less than or equal to 100 | 657 | * C2 latency must be less than or equal to 100 |
658 | * microseconds. | 658 | * microseconds. |
659 | */ | 659 | */ |
660 | else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { | 660 | else if (cx->latency > ACPI_PROCESSOR_MAX_C2_LATENCY) { |
661 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 661 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
662 | "latency too large [%d]\n", | 662 | "latency too large [%d]\n", |
663 | cx->latency)); | 663 | cx->latency)); |
664 | return_VOID; | 664 | return_VOID; |
665 | } | 665 | } |
666 | 666 | ||
667 | /* We're (currently) only supporting C2 on UP */ | 667 | /* We're (currently) only supporting C2 on UP */ |
668 | else if (errata.smp) { | 668 | else if (errata.smp) { |
669 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 669 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
670 | "C2 not supported in SMP mode\n")); | 670 | "C2 not supported in SMP mode\n")); |
671 | return_VOID; | 671 | return_VOID; |
672 | } | 672 | } |
673 | 673 | ||
674 | /* | 674 | /* |
675 | * Otherwise we've met all of our C2 requirements. | 675 | * Otherwise we've met all of our C2 requirements. |
676 | * Normalize the C2 latency to expidite policy | 676 | * Normalize the C2 latency to expidite policy |
677 | */ | 677 | */ |
678 | cx->valid = 1; | 678 | cx->valid = 1; |
679 | cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); | 679 | cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); |
680 | 680 | ||
681 | return_VOID; | 681 | return_VOID; |
682 | } | 682 | } |
683 | 683 | ||
684 | 684 | ||
685 | static void acpi_processor_power_verify_c3( | 685 | static void acpi_processor_power_verify_c3( |
686 | struct acpi_processor *pr, | 686 | struct acpi_processor *pr, |
687 | struct acpi_processor_cx *cx) | 687 | struct acpi_processor_cx *cx) |
688 | { | 688 | { |
689 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c3"); | 689 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_verify_c3"); |
690 | 690 | ||
691 | if (!cx->address) | 691 | if (!cx->address) |
692 | return_VOID; | 692 | return_VOID; |
693 | 693 | ||
694 | /* | 694 | /* |
695 | * C3 latency must be less than or equal to 1000 | 695 | * C3 latency must be less than or equal to 1000 |
696 | * microseconds. | 696 | * microseconds. |
697 | */ | 697 | */ |
698 | else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { | 698 | else if (cx->latency > ACPI_PROCESSOR_MAX_C3_LATENCY) { |
699 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 699 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
700 | "latency too large [%d]\n", | 700 | "latency too large [%d]\n", |
701 | cx->latency)); | 701 | cx->latency)); |
702 | return_VOID; | 702 | return_VOID; |
703 | } | 703 | } |
704 | 704 | ||
705 | /* bus mastering control is necessary */ | 705 | /* bus mastering control is necessary */ |
706 | else if (!pr->flags.bm_control) { | 706 | else if (!pr->flags.bm_control) { |
707 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 707 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
708 | "C3 support requires bus mastering control\n")); | 708 | "C3 support requires bus mastering control\n")); |
709 | return_VOID; | 709 | return_VOID; |
710 | } | 710 | } |
711 | 711 | ||
712 | /* We're (currently) only supporting C2 on UP */ | 712 | /* We're (currently) only supporting C2 on UP */ |
713 | else if (errata.smp) { | 713 | else if (errata.smp) { |
714 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 714 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
715 | "C3 not supported in SMP mode\n")); | 715 | "C3 not supported in SMP mode\n")); |
716 | return_VOID; | 716 | return_VOID; |
717 | } | 717 | } |
718 | 718 | ||
719 | /* | 719 | /* |
720 | * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) | 720 | * PIIX4 Erratum #18: We don't support C3 when Type-F (fast) |
721 | * DMA transfers are used by any ISA device to avoid livelock. | 721 | * DMA transfers are used by any ISA device to avoid livelock. |
722 | * Note that we could disable Type-F DMA (as recommended by | 722 | * Note that we could disable Type-F DMA (as recommended by |
723 | * the erratum), but this is known to disrupt certain ISA | 723 | * the erratum), but this is known to disrupt certain ISA |
724 | * devices thus we take the conservative approach. | 724 | * devices thus we take the conservative approach. |
725 | */ | 725 | */ |
726 | else if (errata.piix4.fdma) { | 726 | else if (errata.piix4.fdma) { |
727 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | 727 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, |
728 | "C3 not supported on PIIX4 with Type-F DMA\n")); | 728 | "C3 not supported on PIIX4 with Type-F DMA\n")); |
729 | return_VOID; | 729 | return_VOID; |
730 | } | 730 | } |
731 | 731 | ||
732 | /* | 732 | /* |
733 | * Otherwise we've met all of our C3 requirements. | 733 | * Otherwise we've met all of our C3 requirements. |
734 | * Normalize the C3 latency to expidite policy. Enable | 734 | * Normalize the C3 latency to expidite policy. Enable |
735 | * checking of bus mastering status (bm_check) so we can | 735 | * checking of bus mastering status (bm_check) so we can |
736 | * use this in our C3 policy | 736 | * use this in our C3 policy |
737 | */ | 737 | */ |
738 | cx->valid = 1; | 738 | cx->valid = 1; |
739 | cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); | 739 | cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); |
740 | pr->flags.bm_check = 1; | 740 | pr->flags.bm_check = 1; |
741 | 741 | ||
742 | return_VOID; | 742 | return_VOID; |
743 | } | 743 | } |
744 | 744 | ||
745 | 745 | ||
746 | static int acpi_processor_power_verify(struct acpi_processor *pr) | 746 | static int acpi_processor_power_verify(struct acpi_processor *pr) |
747 | { | 747 | { |
748 | unsigned int i; | 748 | unsigned int i; |
749 | unsigned int working = 0; | 749 | unsigned int working = 0; |
750 | 750 | ||
751 | for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) { | 751 | for (i=1; i < ACPI_PROCESSOR_MAX_POWER; i++) { |
752 | struct acpi_processor_cx *cx = &pr->power.states[i]; | 752 | struct acpi_processor_cx *cx = &pr->power.states[i]; |
753 | 753 | ||
754 | switch (cx->type) { | 754 | switch (cx->type) { |
755 | case ACPI_STATE_C1: | 755 | case ACPI_STATE_C1: |
756 | cx->valid = 1; | 756 | cx->valid = 1; |
757 | break; | 757 | break; |
758 | 758 | ||
759 | case ACPI_STATE_C2: | 759 | case ACPI_STATE_C2: |
760 | acpi_processor_power_verify_c2(cx); | 760 | acpi_processor_power_verify_c2(cx); |
761 | break; | 761 | break; |
762 | 762 | ||
763 | case ACPI_STATE_C3: | 763 | case ACPI_STATE_C3: |
764 | acpi_processor_power_verify_c3(pr, cx); | 764 | acpi_processor_power_verify_c3(pr, cx); |
765 | break; | 765 | break; |
766 | } | 766 | } |
767 | 767 | ||
768 | if (cx->valid) | 768 | if (cx->valid) |
769 | working++; | 769 | working++; |
770 | } | 770 | } |
771 | 771 | ||
772 | return (working); | 772 | return (working); |
773 | } | 773 | } |
774 | 774 | ||
775 | static int acpi_processor_get_power_info ( | 775 | static int acpi_processor_get_power_info ( |
776 | struct acpi_processor *pr) | 776 | struct acpi_processor *pr) |
777 | { | 777 | { |
778 | unsigned int i; | 778 | unsigned int i; |
779 | int result; | 779 | int result; |
780 | 780 | ||
781 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_info"); | 781 | ACPI_FUNCTION_TRACE("acpi_processor_get_power_info"); |
782 | 782 | ||
783 | /* NOTE: the idle thread may not be running while calling | 783 | /* NOTE: the idle thread may not be running while calling |
784 | * this function */ | 784 | * this function */ |
785 | 785 | ||
786 | result = acpi_processor_get_power_info_cst(pr); | 786 | result = acpi_processor_get_power_info_cst(pr); |
787 | if ((result) || (acpi_processor_power_verify(pr) < 2)) { | 787 | if ((result) || (acpi_processor_power_verify(pr) < 2)) { |
788 | result = acpi_processor_get_power_info_fadt(pr); | 788 | result = acpi_processor_get_power_info_fadt(pr); |
789 | if (result) | 789 | if (result) |
790 | return_VALUE(result); | 790 | return_VALUE(result); |
791 | 791 | ||
792 | if (acpi_processor_power_verify(pr) < 2) | 792 | if (acpi_processor_power_verify(pr) < 2) |
793 | return_VALUE(-ENODEV); | 793 | return_VALUE(-ENODEV); |
794 | } | 794 | } |
795 | 795 | ||
796 | /* | 796 | /* |
797 | * Set Default Policy | 797 | * Set Default Policy |
798 | * ------------------ | 798 | * ------------------ |
799 | * Now that we know which states are supported, set the default | 799 | * Now that we know which states are supported, set the default |
800 | * policy. Note that this policy can be changed dynamically | 800 | * policy. Note that this policy can be changed dynamically |
801 | * (e.g. encourage deeper sleeps to conserve battery life when | 801 | * (e.g. encourage deeper sleeps to conserve battery life when |
802 | * not on AC). | 802 | * not on AC). |
803 | */ | 803 | */ |
804 | result = acpi_processor_set_power_policy(pr); | 804 | result = acpi_processor_set_power_policy(pr); |
805 | if (result) | 805 | if (result) |
806 | return_VALUE(result); | 806 | return_VALUE(result); |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * if one state of type C2 or C3 is available, mark this | 809 | * if one state of type C2 or C3 is available, mark this |
810 | * CPU as being "idle manageable" | 810 | * CPU as being "idle manageable" |
811 | */ | 811 | */ |
812 | for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { | 812 | for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { |
813 | if (pr->power.states[i].valid) | 813 | if (pr->power.states[i].valid) |
814 | pr->power.count = i; | 814 | pr->power.count = i; |
815 | if ((pr->power.states[i].valid) && | 815 | if ((pr->power.states[i].valid) && |
816 | (pr->power.states[i].type >= ACPI_STATE_C2)) | 816 | (pr->power.states[i].type >= ACPI_STATE_C2)) |
817 | pr->flags.power = 1; | 817 | pr->flags.power = 1; |
818 | } | 818 | } |
819 | 819 | ||
820 | return_VALUE(0); | 820 | return_VALUE(0); |
821 | } | 821 | } |
822 | 822 | ||
823 | int acpi_processor_cst_has_changed (struct acpi_processor *pr) | 823 | int acpi_processor_cst_has_changed (struct acpi_processor *pr) |
824 | { | 824 | { |
825 | int result = 0; | 825 | int result = 0; |
826 | 826 | ||
827 | ACPI_FUNCTION_TRACE("acpi_processor_cst_has_changed"); | 827 | ACPI_FUNCTION_TRACE("acpi_processor_cst_has_changed"); |
828 | 828 | ||
829 | if (!pr) | 829 | if (!pr) |
830 | return_VALUE(-EINVAL); | 830 | return_VALUE(-EINVAL); |
831 | 831 | ||
832 | if (errata.smp || nocst) { | 832 | if (errata.smp || nocst) { |
833 | return_VALUE(-ENODEV); | 833 | return_VALUE(-ENODEV); |
834 | } | 834 | } |
835 | 835 | ||
836 | if (!pr->flags.power_setup_done) | 836 | if (!pr->flags.power_setup_done) |
837 | return_VALUE(-ENODEV); | 837 | return_VALUE(-ENODEV); |
838 | 838 | ||
839 | /* Fall back to the default idle loop */ | 839 | /* Fall back to the default idle loop */ |
840 | pm_idle = pm_idle_save; | 840 | pm_idle = pm_idle_save; |
841 | synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ | 841 | synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ |
842 | 842 | ||
843 | pr->flags.power = 0; | 843 | pr->flags.power = 0; |
844 | result = acpi_processor_get_power_info(pr); | 844 | result = acpi_processor_get_power_info(pr); |
845 | if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) | 845 | if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) |
846 | pm_idle = acpi_processor_idle; | 846 | pm_idle = acpi_processor_idle; |
847 | 847 | ||
848 | return_VALUE(result); | 848 | return_VALUE(result); |
849 | } | 849 | } |
850 | 850 | ||
851 | /* proc interface */ | 851 | /* proc interface */ |
852 | 852 | ||
853 | static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) | 853 | static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset) |
854 | { | 854 | { |
855 | struct acpi_processor *pr = (struct acpi_processor *)seq->private; | 855 | struct acpi_processor *pr = (struct acpi_processor *)seq->private; |
856 | unsigned int i; | 856 | unsigned int i; |
857 | 857 | ||
858 | ACPI_FUNCTION_TRACE("acpi_processor_power_seq_show"); | 858 | ACPI_FUNCTION_TRACE("acpi_processor_power_seq_show"); |
859 | 859 | ||
860 | if (!pr) | 860 | if (!pr) |
861 | goto end; | 861 | goto end; |
862 | 862 | ||
863 | seq_printf(seq, "active state: C%zd\n" | 863 | seq_printf(seq, "active state: C%zd\n" |
864 | "max_cstate: C%d\n" | 864 | "max_cstate: C%d\n" |
865 | "bus master activity: %08x\n", | 865 | "bus master activity: %08x\n", |
866 | pr->power.state ? pr->power.state - pr->power.states : 0, | 866 | pr->power.state ? pr->power.state - pr->power.states : 0, |
867 | max_cstate, | 867 | max_cstate, |
868 | (unsigned)pr->power.bm_activity); | 868 | (unsigned)pr->power.bm_activity); |
869 | 869 | ||
870 | seq_puts(seq, "states:\n"); | 870 | seq_puts(seq, "states:\n"); |
871 | 871 | ||
872 | for (i = 1; i <= pr->power.count; i++) { | 872 | for (i = 1; i <= pr->power.count; i++) { |
873 | seq_printf(seq, " %cC%d: ", | 873 | seq_printf(seq, " %cC%d: ", |
874 | (&pr->power.states[i] == pr->power.state?'*':' '), i); | 874 | (&pr->power.states[i] == pr->power.state?'*':' '), i); |
875 | 875 | ||
876 | if (!pr->power.states[i].valid) { | 876 | if (!pr->power.states[i].valid) { |
877 | seq_puts(seq, "<not supported>\n"); | 877 | seq_puts(seq, "<not supported>\n"); |
878 | continue; | 878 | continue; |
879 | } | 879 | } |
880 | 880 | ||
881 | switch (pr->power.states[i].type) { | 881 | switch (pr->power.states[i].type) { |
882 | case ACPI_STATE_C1: | 882 | case ACPI_STATE_C1: |
883 | seq_printf(seq, "type[C1] "); | 883 | seq_printf(seq, "type[C1] "); |
884 | break; | 884 | break; |
885 | case ACPI_STATE_C2: | 885 | case ACPI_STATE_C2: |
886 | seq_printf(seq, "type[C2] "); | 886 | seq_printf(seq, "type[C2] "); |
887 | break; | 887 | break; |
888 | case ACPI_STATE_C3: | 888 | case ACPI_STATE_C3: |
889 | seq_printf(seq, "type[C3] "); | 889 | seq_printf(seq, "type[C3] "); |
890 | break; | 890 | break; |
891 | default: | 891 | default: |
892 | seq_printf(seq, "type[--] "); | 892 | seq_printf(seq, "type[--] "); |
893 | break; | 893 | break; |
894 | } | 894 | } |
895 | 895 | ||
896 | if (pr->power.states[i].promotion.state) | 896 | if (pr->power.states[i].promotion.state) |
897 | seq_printf(seq, "promotion[C%zd] ", | 897 | seq_printf(seq, "promotion[C%zd] ", |
898 | (pr->power.states[i].promotion.state - | 898 | (pr->power.states[i].promotion.state - |
899 | pr->power.states)); | 899 | pr->power.states)); |
900 | else | 900 | else |
901 | seq_puts(seq, "promotion[--] "); | 901 | seq_puts(seq, "promotion[--] "); |
902 | 902 | ||
903 | if (pr->power.states[i].demotion.state) | 903 | if (pr->power.states[i].demotion.state) |
904 | seq_printf(seq, "demotion[C%zd] ", | 904 | seq_printf(seq, "demotion[C%zd] ", |
905 | (pr->power.states[i].demotion.state - | 905 | (pr->power.states[i].demotion.state - |
906 | pr->power.states)); | 906 | pr->power.states)); |
907 | else | 907 | else |
908 | seq_puts(seq, "demotion[--] "); | 908 | seq_puts(seq, "demotion[--] "); |
909 | 909 | ||
910 | seq_printf(seq, "latency[%03d] usage[%08d]\n", | 910 | seq_printf(seq, "latency[%03d] usage[%08d]\n", |
911 | pr->power.states[i].latency, | 911 | pr->power.states[i].latency, |
912 | pr->power.states[i].usage); | 912 | pr->power.states[i].usage); |
913 | } | 913 | } |
914 | 914 | ||
915 | end: | 915 | end: |
916 | return_VALUE(0); | 916 | return_VALUE(0); |
917 | } | 917 | } |
918 | 918 | ||
919 | static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) | 919 | static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) |
920 | { | 920 | { |
921 | return single_open(file, acpi_processor_power_seq_show, | 921 | return single_open(file, acpi_processor_power_seq_show, |
922 | PDE(inode)->data); | 922 | PDE(inode)->data); |
923 | } | 923 | } |
924 | 924 | ||
925 | static struct file_operations acpi_processor_power_fops = { | 925 | static struct file_operations acpi_processor_power_fops = { |
926 | .open = acpi_processor_power_open_fs, | 926 | .open = acpi_processor_power_open_fs, |
927 | .read = seq_read, | 927 | .read = seq_read, |
928 | .llseek = seq_lseek, | 928 | .llseek = seq_lseek, |
929 | .release = single_release, | 929 | .release = single_release, |
930 | }; | 930 | }; |
931 | 931 | ||
932 | 932 | ||
933 | int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) | 933 | int acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) |
934 | { | 934 | { |
935 | acpi_status status = 0; | 935 | acpi_status status = 0; |
936 | static int first_run = 0; | 936 | static int first_run = 0; |
937 | struct proc_dir_entry *entry = NULL; | 937 | struct proc_dir_entry *entry = NULL; |
938 | unsigned int i; | 938 | unsigned int i; |
939 | 939 | ||
940 | ACPI_FUNCTION_TRACE("acpi_processor_power_init"); | 940 | ACPI_FUNCTION_TRACE("acpi_processor_power_init"); |
941 | 941 | ||
942 | if (!first_run) { | 942 | if (!first_run) { |
943 | dmi_check_system(processor_power_dmi_table); | 943 | dmi_check_system(processor_power_dmi_table); |
944 | if (max_cstate < ACPI_C_STATES_MAX) | 944 | if (max_cstate < ACPI_C_STATES_MAX) |
945 | printk(KERN_NOTICE "ACPI: processor limited to max C-state %d\n", max_cstate); | 945 | printk(KERN_NOTICE "ACPI: processor limited to max C-state %d\n", max_cstate); |
946 | first_run++; | 946 | first_run++; |
947 | } | 947 | } |
948 | 948 | ||
949 | if (!errata.smp && (pr->id == 0) && acpi_fadt.cst_cnt && !nocst) { | 949 | if (!errata.smp && (pr->id == 0) && acpi_fadt.cst_cnt && !nocst) { |
950 | status = acpi_os_write_port(acpi_fadt.smi_cmd, acpi_fadt.cst_cnt, 8); | 950 | status = acpi_os_write_port(acpi_fadt.smi_cmd, acpi_fadt.cst_cnt, 8); |
951 | if (ACPI_FAILURE(status)) { | 951 | if (ACPI_FAILURE(status)) { |
952 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, | 952 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, |
953 | "Notifying BIOS of _CST ability failed\n")); | 953 | "Notifying BIOS of _CST ability failed\n")); |
954 | } | 954 | } |
955 | } | 955 | } |
956 | 956 | ||
957 | acpi_processor_get_power_info(pr); | 957 | acpi_processor_get_power_info(pr); |
958 | 958 | ||
959 | /* | 959 | /* |
960 | * Install the idle handler if processor power management is supported. | 960 | * Install the idle handler if processor power management is supported. |
961 | * Note that we use previously set idle handler will be used on | 961 | * Note that we use previously set idle handler will be used on |
962 | * platforms that only support C1. | 962 | * platforms that only support C1. |
963 | */ | 963 | */ |
964 | if ((pr->flags.power) && (!boot_option_idle_override)) { | 964 | if ((pr->flags.power) && (!boot_option_idle_override)) { |
965 | printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); | 965 | printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); |
966 | for (i = 1; i <= pr->power.count; i++) | 966 | for (i = 1; i <= pr->power.count; i++) |
967 | if (pr->power.states[i].valid) | 967 | if (pr->power.states[i].valid) |
968 | printk(" C%d[C%d]", i, pr->power.states[i].type); | 968 | printk(" C%d[C%d]", i, pr->power.states[i].type); |
969 | printk(")\n"); | 969 | printk(")\n"); |
970 | 970 | ||
971 | if (pr->id == 0) { | 971 | if (pr->id == 0) { |
972 | pm_idle_save = pm_idle; | 972 | pm_idle_save = pm_idle; |
973 | pm_idle = acpi_processor_idle; | 973 | pm_idle = acpi_processor_idle; |
974 | } | 974 | } |
975 | } | 975 | } |
976 | 976 | ||
977 | /* 'power' [R] */ | 977 | /* 'power' [R] */ |
978 | entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, | 978 | entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, |
979 | S_IRUGO, acpi_device_dir(device)); | 979 | S_IRUGO, acpi_device_dir(device)); |
980 | if (!entry) | 980 | if (!entry) |
981 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, | 981 | ACPI_DEBUG_PRINT((ACPI_DB_ERROR, |
982 | "Unable to create '%s' fs entry\n", | 982 | "Unable to create '%s' fs entry\n", |
983 | ACPI_PROCESSOR_FILE_POWER)); | 983 | ACPI_PROCESSOR_FILE_POWER)); |
984 | else { | 984 | else { |
985 | entry->proc_fops = &acpi_processor_power_fops; | 985 | entry->proc_fops = &acpi_processor_power_fops; |
986 | entry->data = acpi_driver_data(device); | 986 | entry->data = acpi_driver_data(device); |
987 | entry->owner = THIS_MODULE; | 987 | entry->owner = THIS_MODULE; |
988 | } | 988 | } |
989 | 989 | ||
990 | pr->flags.power_setup_done = 1; | 990 | pr->flags.power_setup_done = 1; |
991 | 991 | ||
992 | return_VALUE(0); | 992 | return_VALUE(0); |
993 | } | 993 | } |
994 | 994 | ||
995 | int acpi_processor_power_exit(struct acpi_processor *pr, struct acpi_device *device) | 995 | int acpi_processor_power_exit(struct acpi_processor *pr, struct acpi_device *device) |
996 | { | 996 | { |
997 | ACPI_FUNCTION_TRACE("acpi_processor_power_exit"); | 997 | ACPI_FUNCTION_TRACE("acpi_processor_power_exit"); |
998 | 998 | ||
999 | pr->flags.power_setup_done = 0; | 999 | pr->flags.power_setup_done = 0; |
1000 | 1000 | ||
1001 | if (acpi_device_dir(device)) | 1001 | if (acpi_device_dir(device)) |
1002 | remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,acpi_device_dir(device)); | 1002 | remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,acpi_device_dir(device)); |
1003 | 1003 | ||
1004 | /* Unregister the idle handler when processor #0 is removed. */ | 1004 | /* Unregister the idle handler when processor #0 is removed. */ |
1005 | if (pr->id == 0) { | 1005 | if (pr->id == 0) { |
1006 | pm_idle = pm_idle_save; | 1006 | pm_idle = pm_idle_save; |
1007 | 1007 | ||
1008 | /* | 1008 | /* |
1009 | * We are about to unload the current idle thread pm callback | 1009 | * We are about to unload the current idle thread pm callback |
1010 | * (pm_idle), Wait for all processors to update cached/local | 1010 | * (pm_idle), Wait for all processors to update cached/local |
1011 | * copies of pm_idle before proceeding. | 1011 | * copies of pm_idle before proceeding. |
1012 | */ | 1012 | */ |
1013 | cpu_idle_wait(); | 1013 | cpu_idle_wait(); |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | return_VALUE(0); | 1016 | return_VALUE(0); |
1017 | } | 1017 | } |
1018 | 1018 |
drivers/input/gameport/gameport.c
1 | /* | 1 | /* |
2 | * Generic gameport layer | 2 | * Generic gameport layer |
3 | * | 3 | * |
4 | * Copyright (c) 1999-2002 Vojtech Pavlik | 4 | * Copyright (c) 1999-2002 Vojtech Pavlik |
5 | * Copyright (c) 2005 Dmitry Torokhov | 5 | * Copyright (c) 2005 Dmitry Torokhov |
6 | */ | 6 | */ |
7 | 7 | ||
8 | /* | 8 | /* |
9 | * This program is free software; you can redistribute it and/or modify it | 9 | * This program is free software; you can redistribute it and/or modify it |
10 | * under the terms of the GNU General Public License version 2 as published by | 10 | * under the terms of the GNU General Public License version 2 as published by |
11 | * the Free Software Foundation. | 11 | * the Free Software Foundation. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/stddef.h> | 14 | #include <linux/stddef.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/ioport.h> | 16 | #include <linux/ioport.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/gameport.h> | 18 | #include <linux/gameport.h> |
19 | #include <linux/wait.h> | 19 | #include <linux/wait.h> |
20 | #include <linux/completion.h> | 20 | #include <linux/completion.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/smp_lock.h> | 22 | #include <linux/smp_lock.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | 25 | ||
26 | /*#include <asm/io.h>*/ | 26 | /*#include <asm/io.h>*/ |
27 | 27 | ||
28 | MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); | 28 | MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); |
29 | MODULE_DESCRIPTION("Generic gameport layer"); | 29 | MODULE_DESCRIPTION("Generic gameport layer"); |
30 | MODULE_LICENSE("GPL"); | 30 | MODULE_LICENSE("GPL"); |
31 | 31 | ||
32 | EXPORT_SYMBOL(__gameport_register_port); | 32 | EXPORT_SYMBOL(__gameport_register_port); |
33 | EXPORT_SYMBOL(gameport_unregister_port); | 33 | EXPORT_SYMBOL(gameport_unregister_port); |
34 | EXPORT_SYMBOL(__gameport_register_driver); | 34 | EXPORT_SYMBOL(__gameport_register_driver); |
35 | EXPORT_SYMBOL(gameport_unregister_driver); | 35 | EXPORT_SYMBOL(gameport_unregister_driver); |
36 | EXPORT_SYMBOL(gameport_open); | 36 | EXPORT_SYMBOL(gameport_open); |
37 | EXPORT_SYMBOL(gameport_close); | 37 | EXPORT_SYMBOL(gameport_close); |
38 | EXPORT_SYMBOL(gameport_rescan); | 38 | EXPORT_SYMBOL(gameport_rescan); |
39 | EXPORT_SYMBOL(gameport_cooked_read); | 39 | EXPORT_SYMBOL(gameport_cooked_read); |
40 | EXPORT_SYMBOL(gameport_set_name); | 40 | EXPORT_SYMBOL(gameport_set_name); |
41 | EXPORT_SYMBOL(gameport_set_phys); | 41 | EXPORT_SYMBOL(gameport_set_phys); |
42 | EXPORT_SYMBOL(gameport_start_polling); | 42 | EXPORT_SYMBOL(gameport_start_polling); |
43 | EXPORT_SYMBOL(gameport_stop_polling); | 43 | EXPORT_SYMBOL(gameport_stop_polling); |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * gameport_sem protects entire gameport subsystem and is taken | 46 | * gameport_sem protects entire gameport subsystem and is taken |
47 | * every time gameport port or driver registrered or unregistered. | 47 | * every time gameport port or driver registrered or unregistered. |
48 | */ | 48 | */ |
49 | static DECLARE_MUTEX(gameport_sem); | 49 | static DECLARE_MUTEX(gameport_sem); |
50 | 50 | ||
51 | static LIST_HEAD(gameport_list); | 51 | static LIST_HEAD(gameport_list); |
52 | 52 | ||
53 | static struct bus_type gameport_bus = { | 53 | static struct bus_type gameport_bus = { |
54 | .name = "gameport", | 54 | .name = "gameport", |
55 | }; | 55 | }; |
56 | 56 | ||
57 | static void gameport_add_port(struct gameport *gameport); | 57 | static void gameport_add_port(struct gameport *gameport); |
58 | static void gameport_destroy_port(struct gameport *gameport); | 58 | static void gameport_destroy_port(struct gameport *gameport); |
59 | static void gameport_reconnect_port(struct gameport *gameport); | 59 | static void gameport_reconnect_port(struct gameport *gameport); |
60 | static void gameport_disconnect_port(struct gameport *gameport); | 60 | static void gameport_disconnect_port(struct gameport *gameport); |
61 | 61 | ||
62 | #if defined(__i386__) | 62 | #if defined(__i386__) |
63 | 63 | ||
64 | #define DELTA(x,y) ((y)-(x)+((y)<(x)?1193182/HZ:0)) | 64 | #define DELTA(x,y) ((y)-(x)+((y)<(x)?1193182/HZ:0)) |
65 | #define GET_TIME(x) do { x = get_time_pit(); } while (0) | 65 | #define GET_TIME(x) do { x = get_time_pit(); } while (0) |
66 | 66 | ||
67 | static unsigned int get_time_pit(void) | 67 | static unsigned int get_time_pit(void) |
68 | { | 68 | { |
69 | extern spinlock_t i8253_lock; | 69 | extern spinlock_t i8253_lock; |
70 | unsigned long flags; | 70 | unsigned long flags; |
71 | unsigned int count; | 71 | unsigned int count; |
72 | 72 | ||
73 | spin_lock_irqsave(&i8253_lock, flags); | 73 | spin_lock_irqsave(&i8253_lock, flags); |
74 | outb_p(0x00, 0x43); | 74 | outb_p(0x00, 0x43); |
75 | count = inb_p(0x40); | 75 | count = inb_p(0x40); |
76 | count |= inb_p(0x40) << 8; | 76 | count |= inb_p(0x40) << 8; |
77 | spin_unlock_irqrestore(&i8253_lock, flags); | 77 | spin_unlock_irqrestore(&i8253_lock, flags); |
78 | 78 | ||
79 | return count; | 79 | return count; |
80 | } | 80 | } |
81 | 81 | ||
82 | #endif | 82 | #endif |
83 | 83 | ||
84 | 84 | ||
85 | 85 | ||
86 | /* | 86 | /* |
87 | * gameport_measure_speed() measures the gameport i/o speed. | 87 | * gameport_measure_speed() measures the gameport i/o speed. |
88 | */ | 88 | */ |
89 | 89 | ||
90 | static int gameport_measure_speed(struct gameport *gameport) | 90 | static int gameport_measure_speed(struct gameport *gameport) |
91 | { | 91 | { |
92 | #if defined(__i386__) | 92 | #if defined(__i386__) |
93 | 93 | ||
94 | unsigned int i, t, t1, t2, t3, tx; | 94 | unsigned int i, t, t1, t2, t3, tx; |
95 | unsigned long flags; | 95 | unsigned long flags; |
96 | 96 | ||
97 | if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) | 97 | if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) |
98 | return 0; | 98 | return 0; |
99 | 99 | ||
100 | tx = 1 << 30; | 100 | tx = 1 << 30; |
101 | 101 | ||
102 | for(i = 0; i < 50; i++) { | 102 | for(i = 0; i < 50; i++) { |
103 | local_irq_save(flags); | 103 | local_irq_save(flags); |
104 | GET_TIME(t1); | 104 | GET_TIME(t1); |
105 | for (t = 0; t < 50; t++) gameport_read(gameport); | 105 | for (t = 0; t < 50; t++) gameport_read(gameport); |
106 | GET_TIME(t2); | 106 | GET_TIME(t2); |
107 | GET_TIME(t3); | 107 | GET_TIME(t3); |
108 | local_irq_restore(flags); | 108 | local_irq_restore(flags); |
109 | udelay(i * 10); | 109 | udelay(i * 10); |
110 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | 110 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; |
111 | } | 111 | } |
112 | 112 | ||
113 | gameport_close(gameport); | 113 | gameport_close(gameport); |
114 | return 59659 / (tx < 1 ? 1 : tx); | 114 | return 59659 / (tx < 1 ? 1 : tx); |
115 | 115 | ||
116 | #elif defined (__x86_64__) | 116 | #elif defined (__x86_64__) |
117 | 117 | ||
118 | unsigned int i, t; | 118 | unsigned int i, t; |
119 | unsigned long tx, t1, t2, flags; | 119 | unsigned long tx, t1, t2, flags; |
120 | 120 | ||
121 | if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) | 121 | if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) |
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | tx = 1 << 30; | 124 | tx = 1 << 30; |
125 | 125 | ||
126 | for(i = 0; i < 50; i++) { | 126 | for(i = 0; i < 50; i++) { |
127 | local_irq_save(flags); | 127 | local_irq_save(flags); |
128 | rdtscl(t1); | 128 | rdtscl(t1); |
129 | for (t = 0; t < 50; t++) gameport_read(gameport); | 129 | for (t = 0; t < 50; t++) gameport_read(gameport); |
130 | rdtscl(t2); | 130 | rdtscl(t2); |
131 | local_irq_restore(flags); | 131 | local_irq_restore(flags); |
132 | udelay(i * 10); | 132 | udelay(i * 10); |
133 | if (t2 - t1 < tx) tx = t2 - t1; | 133 | if (t2 - t1 < tx) tx = t2 - t1; |
134 | } | 134 | } |
135 | 135 | ||
136 | gameport_close(gameport); | 136 | gameport_close(gameport); |
137 | return (cpu_data[_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); | 137 | return (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); |
138 | 138 | ||
139 | #else | 139 | #else |
140 | 140 | ||
141 | unsigned int j, t = 0; | 141 | unsigned int j, t = 0; |
142 | 142 | ||
143 | if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) | 143 | if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) |
144 | return 0; | 144 | return 0; |
145 | 145 | ||
146 | j = jiffies; while (j == jiffies); | 146 | j = jiffies; while (j == jiffies); |
147 | j = jiffies; while (j == jiffies) { t++; gameport_read(gameport); } | 147 | j = jiffies; while (j == jiffies) { t++; gameport_read(gameport); } |
148 | 148 | ||
149 | gameport_close(gameport); | 149 | gameport_close(gameport); |
150 | return t * HZ / 1000; | 150 | return t * HZ / 1000; |
151 | 151 | ||
152 | #endif | 152 | #endif |
153 | } | 153 | } |
154 | 154 | ||
155 | void gameport_start_polling(struct gameport *gameport) | 155 | void gameport_start_polling(struct gameport *gameport) |
156 | { | 156 | { |
157 | spin_lock(&gameport->timer_lock); | 157 | spin_lock(&gameport->timer_lock); |
158 | 158 | ||
159 | if (!gameport->poll_cnt++) { | 159 | if (!gameport->poll_cnt++) { |
160 | BUG_ON(!gameport->poll_handler); | 160 | BUG_ON(!gameport->poll_handler); |
161 | BUG_ON(!gameport->poll_interval); | 161 | BUG_ON(!gameport->poll_interval); |
162 | mod_timer(&gameport->poll_timer, jiffies + msecs_to_jiffies(gameport->poll_interval)); | 162 | mod_timer(&gameport->poll_timer, jiffies + msecs_to_jiffies(gameport->poll_interval)); |
163 | } | 163 | } |
164 | 164 | ||
165 | spin_unlock(&gameport->timer_lock); | 165 | spin_unlock(&gameport->timer_lock); |
166 | } | 166 | } |
167 | 167 | ||
168 | void gameport_stop_polling(struct gameport *gameport) | 168 | void gameport_stop_polling(struct gameport *gameport) |
169 | { | 169 | { |
170 | spin_lock(&gameport->timer_lock); | 170 | spin_lock(&gameport->timer_lock); |
171 | 171 | ||
172 | if (!--gameport->poll_cnt) | 172 | if (!--gameport->poll_cnt) |
173 | del_timer(&gameport->poll_timer); | 173 | del_timer(&gameport->poll_timer); |
174 | 174 | ||
175 | spin_unlock(&gameport->timer_lock); | 175 | spin_unlock(&gameport->timer_lock); |
176 | } | 176 | } |
177 | 177 | ||
178 | static void gameport_run_poll_handler(unsigned long d) | 178 | static void gameport_run_poll_handler(unsigned long d) |
179 | { | 179 | { |
180 | struct gameport *gameport = (struct gameport *)d; | 180 | struct gameport *gameport = (struct gameport *)d; |
181 | 181 | ||
182 | gameport->poll_handler(gameport); | 182 | gameport->poll_handler(gameport); |
183 | if (gameport->poll_cnt) | 183 | if (gameport->poll_cnt) |
184 | mod_timer(&gameport->poll_timer, jiffies + msecs_to_jiffies(gameport->poll_interval)); | 184 | mod_timer(&gameport->poll_timer, jiffies + msecs_to_jiffies(gameport->poll_interval)); |
185 | } | 185 | } |
186 | 186 | ||
187 | /* | 187 | /* |
188 | * Basic gameport -> driver core mappings | 188 | * Basic gameport -> driver core mappings |
189 | */ | 189 | */ |
190 | 190 | ||
191 | static void gameport_bind_driver(struct gameport *gameport, struct gameport_driver *drv) | 191 | static void gameport_bind_driver(struct gameport *gameport, struct gameport_driver *drv) |
192 | { | 192 | { |
193 | down_write(&gameport_bus.subsys.rwsem); | 193 | down_write(&gameport_bus.subsys.rwsem); |
194 | 194 | ||
195 | gameport->dev.driver = &drv->driver; | 195 | gameport->dev.driver = &drv->driver; |
196 | if (drv->connect(gameport, drv)) { | 196 | if (drv->connect(gameport, drv)) { |
197 | gameport->dev.driver = NULL; | 197 | gameport->dev.driver = NULL; |
198 | goto out; | 198 | goto out; |
199 | } | 199 | } |
200 | device_bind_driver(&gameport->dev); | 200 | device_bind_driver(&gameport->dev); |
201 | out: | 201 | out: |
202 | up_write(&gameport_bus.subsys.rwsem); | 202 | up_write(&gameport_bus.subsys.rwsem); |
203 | } | 203 | } |
204 | 204 | ||
205 | static void gameport_release_driver(struct gameport *gameport) | 205 | static void gameport_release_driver(struct gameport *gameport) |
206 | { | 206 | { |
207 | down_write(&gameport_bus.subsys.rwsem); | 207 | down_write(&gameport_bus.subsys.rwsem); |
208 | device_release_driver(&gameport->dev); | 208 | device_release_driver(&gameport->dev); |
209 | up_write(&gameport_bus.subsys.rwsem); | 209 | up_write(&gameport_bus.subsys.rwsem); |
210 | } | 210 | } |
211 | 211 | ||
212 | static void gameport_find_driver(struct gameport *gameport) | 212 | static void gameport_find_driver(struct gameport *gameport) |
213 | { | 213 | { |
214 | down_write(&gameport_bus.subsys.rwsem); | 214 | down_write(&gameport_bus.subsys.rwsem); |
215 | device_attach(&gameport->dev); | 215 | device_attach(&gameport->dev); |
216 | up_write(&gameport_bus.subsys.rwsem); | 216 | up_write(&gameport_bus.subsys.rwsem); |
217 | } | 217 | } |
218 | 218 | ||
219 | 219 | ||
220 | /* | 220 | /* |
221 | * Gameport event processing. | 221 | * Gameport event processing. |
222 | */ | 222 | */ |
223 | 223 | ||
224 | enum gameport_event_type { | 224 | enum gameport_event_type { |
225 | GAMEPORT_RESCAN, | 225 | GAMEPORT_RESCAN, |
226 | GAMEPORT_RECONNECT, | 226 | GAMEPORT_RECONNECT, |
227 | GAMEPORT_REGISTER_PORT, | 227 | GAMEPORT_REGISTER_PORT, |
228 | GAMEPORT_REGISTER_DRIVER, | 228 | GAMEPORT_REGISTER_DRIVER, |
229 | }; | 229 | }; |
230 | 230 | ||
231 | struct gameport_event { | 231 | struct gameport_event { |
232 | enum gameport_event_type type; | 232 | enum gameport_event_type type; |
233 | void *object; | 233 | void *object; |
234 | struct module *owner; | 234 | struct module *owner; |
235 | struct list_head node; | 235 | struct list_head node; |
236 | }; | 236 | }; |
237 | 237 | ||
238 | static DEFINE_SPINLOCK(gameport_event_lock); /* protects gameport_event_list */ | 238 | static DEFINE_SPINLOCK(gameport_event_lock); /* protects gameport_event_list */ |
239 | static LIST_HEAD(gameport_event_list); | 239 | static LIST_HEAD(gameport_event_list); |
240 | static DECLARE_WAIT_QUEUE_HEAD(gameport_wait); | 240 | static DECLARE_WAIT_QUEUE_HEAD(gameport_wait); |
241 | static DECLARE_COMPLETION(gameport_exited); | 241 | static DECLARE_COMPLETION(gameport_exited); |
242 | static int gameport_pid; | 242 | static int gameport_pid; |
243 | 243 | ||
244 | static void gameport_queue_event(void *object, struct module *owner, | 244 | static void gameport_queue_event(void *object, struct module *owner, |
245 | enum gameport_event_type event_type) | 245 | enum gameport_event_type event_type) |
246 | { | 246 | { |
247 | unsigned long flags; | 247 | unsigned long flags; |
248 | struct gameport_event *event; | 248 | struct gameport_event *event; |
249 | 249 | ||
250 | spin_lock_irqsave(&gameport_event_lock, flags); | 250 | spin_lock_irqsave(&gameport_event_lock, flags); |
251 | 251 | ||
252 | /* | 252 | /* |
253 | * Scan event list for the other events for the same gameport port, | 253 | * Scan event list for the other events for the same gameport port, |
254 | * starting with the most recent one. If event is the same we | 254 | * starting with the most recent one. If event is the same we |
255 | * do not need add new one. If event is of different type we | 255 | * do not need add new one. If event is of different type we |
256 | * need to add this event and should not look further because | 256 | * need to add this event and should not look further because |
257 | * we need to preseve sequence of distinct events. | 257 | * we need to preseve sequence of distinct events. |
258 | */ | 258 | */ |
259 | list_for_each_entry_reverse(event, &gameport_event_list, node) { | 259 | list_for_each_entry_reverse(event, &gameport_event_list, node) { |
260 | if (event->object == object) { | 260 | if (event->object == object) { |
261 | if (event->type == event_type) | 261 | if (event->type == event_type) |
262 | goto out; | 262 | goto out; |
263 | break; | 263 | break; |
264 | } | 264 | } |
265 | } | 265 | } |
266 | 266 | ||
267 | if ((event = kmalloc(sizeof(struct gameport_event), GFP_ATOMIC))) { | 267 | if ((event = kmalloc(sizeof(struct gameport_event), GFP_ATOMIC))) { |
268 | if (!try_module_get(owner)) { | 268 | if (!try_module_get(owner)) { |
269 | printk(KERN_WARNING "gameport: Can't get module reference, dropping event %d\n", event_type); | 269 | printk(KERN_WARNING "gameport: Can't get module reference, dropping event %d\n", event_type); |
270 | goto out; | 270 | goto out; |
271 | } | 271 | } |
272 | 272 | ||
273 | event->type = event_type; | 273 | event->type = event_type; |
274 | event->object = object; | 274 | event->object = object; |
275 | event->owner = owner; | 275 | event->owner = owner; |
276 | 276 | ||
277 | list_add_tail(&event->node, &gameport_event_list); | 277 | list_add_tail(&event->node, &gameport_event_list); |
278 | wake_up(&gameport_wait); | 278 | wake_up(&gameport_wait); |
279 | } else { | 279 | } else { |
280 | printk(KERN_ERR "gameport: Not enough memory to queue event %d\n", event_type); | 280 | printk(KERN_ERR "gameport: Not enough memory to queue event %d\n", event_type); |
281 | } | 281 | } |
282 | out: | 282 | out: |
283 | spin_unlock_irqrestore(&gameport_event_lock, flags); | 283 | spin_unlock_irqrestore(&gameport_event_lock, flags); |
284 | } | 284 | } |
285 | 285 | ||
286 | static void gameport_free_event(struct gameport_event *event) | 286 | static void gameport_free_event(struct gameport_event *event) |
287 | { | 287 | { |
288 | module_put(event->owner); | 288 | module_put(event->owner); |
289 | kfree(event); | 289 | kfree(event); |
290 | } | 290 | } |
291 | 291 | ||
292 | static void gameport_remove_duplicate_events(struct gameport_event *event) | 292 | static void gameport_remove_duplicate_events(struct gameport_event *event) |
293 | { | 293 | { |
294 | struct list_head *node, *next; | 294 | struct list_head *node, *next; |
295 | struct gameport_event *e; | 295 | struct gameport_event *e; |
296 | unsigned long flags; | 296 | unsigned long flags; |
297 | 297 | ||
298 | spin_lock_irqsave(&gameport_event_lock, flags); | 298 | spin_lock_irqsave(&gameport_event_lock, flags); |
299 | 299 | ||
300 | list_for_each_safe(node, next, &gameport_event_list) { | 300 | list_for_each_safe(node, next, &gameport_event_list) { |
301 | e = list_entry(node, struct gameport_event, node); | 301 | e = list_entry(node, struct gameport_event, node); |
302 | if (event->object == e->object) { | 302 | if (event->object == e->object) { |
303 | /* | 303 | /* |
304 | * If this event is of different type we should not | 304 | * If this event is of different type we should not |
305 | * look further - we only suppress duplicate events | 305 | * look further - we only suppress duplicate events |
306 | * that were sent back-to-back. | 306 | * that were sent back-to-back. |
307 | */ | 307 | */ |
308 | if (event->type != e->type) | 308 | if (event->type != e->type) |
309 | break; | 309 | break; |
310 | 310 | ||
311 | list_del_init(node); | 311 | list_del_init(node); |
312 | gameport_free_event(e); | 312 | gameport_free_event(e); |
313 | } | 313 | } |
314 | } | 314 | } |
315 | 315 | ||
316 | spin_unlock_irqrestore(&gameport_event_lock, flags); | 316 | spin_unlock_irqrestore(&gameport_event_lock, flags); |
317 | } | 317 | } |
318 | 318 | ||
319 | 319 | ||
320 | static struct gameport_event *gameport_get_event(void) | 320 | static struct gameport_event *gameport_get_event(void) |
321 | { | 321 | { |
322 | struct gameport_event *event; | 322 | struct gameport_event *event; |
323 | struct list_head *node; | 323 | struct list_head *node; |
324 | unsigned long flags; | 324 | unsigned long flags; |
325 | 325 | ||
326 | spin_lock_irqsave(&gameport_event_lock, flags); | 326 | spin_lock_irqsave(&gameport_event_lock, flags); |
327 | 327 | ||
328 | if (list_empty(&gameport_event_list)) { | 328 | if (list_empty(&gameport_event_list)) { |
329 | spin_unlock_irqrestore(&gameport_event_lock, flags); | 329 | spin_unlock_irqrestore(&gameport_event_lock, flags); |
330 | return NULL; | 330 | return NULL; |
331 | } | 331 | } |
332 | 332 | ||
333 | node = gameport_event_list.next; | 333 | node = gameport_event_list.next; |
334 | event = list_entry(node, struct gameport_event, node); | 334 | event = list_entry(node, struct gameport_event, node); |
335 | list_del_init(node); | 335 | list_del_init(node); |
336 | 336 | ||
337 | spin_unlock_irqrestore(&gameport_event_lock, flags); | 337 | spin_unlock_irqrestore(&gameport_event_lock, flags); |
338 | 338 | ||
339 | return event; | 339 | return event; |
340 | } | 340 | } |
341 | 341 | ||
342 | static void gameport_handle_events(void) | 342 | static void gameport_handle_events(void) |
343 | { | 343 | { |
344 | struct gameport_event *event; | 344 | struct gameport_event *event; |
345 | struct gameport_driver *gameport_drv; | 345 | struct gameport_driver *gameport_drv; |
346 | 346 | ||
347 | down(&gameport_sem); | 347 | down(&gameport_sem); |
348 | 348 | ||
349 | while ((event = gameport_get_event())) { | 349 | while ((event = gameport_get_event())) { |
350 | 350 | ||
351 | switch (event->type) { | 351 | switch (event->type) { |
352 | case GAMEPORT_REGISTER_PORT: | 352 | case GAMEPORT_REGISTER_PORT: |
353 | gameport_add_port(event->object); | 353 | gameport_add_port(event->object); |
354 | break; | 354 | break; |
355 | 355 | ||
356 | case GAMEPORT_RECONNECT: | 356 | case GAMEPORT_RECONNECT: |
357 | gameport_reconnect_port(event->object); | 357 | gameport_reconnect_port(event->object); |
358 | break; | 358 | break; |
359 | 359 | ||
360 | case GAMEPORT_RESCAN: | 360 | case GAMEPORT_RESCAN: |
361 | gameport_disconnect_port(event->object); | 361 | gameport_disconnect_port(event->object); |
362 | gameport_find_driver(event->object); | 362 | gameport_find_driver(event->object); |
363 | break; | 363 | break; |
364 | 364 | ||
365 | case GAMEPORT_REGISTER_DRIVER: | 365 | case GAMEPORT_REGISTER_DRIVER: |
366 | gameport_drv = event->object; | 366 | gameport_drv = event->object; |
367 | driver_register(&gameport_drv->driver); | 367 | driver_register(&gameport_drv->driver); |
368 | break; | 368 | break; |
369 | 369 | ||
370 | default: | 370 | default: |
371 | break; | 371 | break; |
372 | } | 372 | } |
373 | 373 | ||
374 | gameport_remove_duplicate_events(event); | 374 | gameport_remove_duplicate_events(event); |
375 | gameport_free_event(event); | 375 | gameport_free_event(event); |
376 | } | 376 | } |
377 | 377 | ||
378 | up(&gameport_sem); | 378 | up(&gameport_sem); |
379 | } | 379 | } |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Remove all events that have been submitted for a given gameport port. | 382 | * Remove all events that have been submitted for a given gameport port. |
383 | */ | 383 | */ |
384 | static void gameport_remove_pending_events(struct gameport *gameport) | 384 | static void gameport_remove_pending_events(struct gameport *gameport) |
385 | { | 385 | { |
386 | struct list_head *node, *next; | 386 | struct list_head *node, *next; |
387 | struct gameport_event *event; | 387 | struct gameport_event *event; |
388 | unsigned long flags; | 388 | unsigned long flags; |
389 | 389 | ||
390 | spin_lock_irqsave(&gameport_event_lock, flags); | 390 | spin_lock_irqsave(&gameport_event_lock, flags); |
391 | 391 | ||
392 | list_for_each_safe(node, next, &gameport_event_list) { | 392 | list_for_each_safe(node, next, &gameport_event_list) { |
393 | event = list_entry(node, struct gameport_event, node); | 393 | event = list_entry(node, struct gameport_event, node); |
394 | if (event->object == gameport) { | 394 | if (event->object == gameport) { |
395 | list_del_init(node); | 395 | list_del_init(node); |
396 | gameport_free_event(event); | 396 | gameport_free_event(event); |
397 | } | 397 | } |
398 | } | 398 | } |
399 | 399 | ||
400 | spin_unlock_irqrestore(&gameport_event_lock, flags); | 400 | spin_unlock_irqrestore(&gameport_event_lock, flags); |
401 | } | 401 | } |
402 | 402 | ||
403 | /* | 403 | /* |
404 | * Destroy child gameport port (if any) that has not been fully registered yet. | 404 | * Destroy child gameport port (if any) that has not been fully registered yet. |
405 | * | 405 | * |
406 | * Note that we rely on the fact that port can have only one child and therefore | 406 | * Note that we rely on the fact that port can have only one child and therefore |
407 | * only one child registration request can be pending. Additionally, children | 407 | * only one child registration request can be pending. Additionally, children |
408 | * are registered by driver's connect() handler so there can't be a grandchild | 408 | * are registered by driver's connect() handler so there can't be a grandchild |
409 | * pending registration together with a child. | 409 | * pending registration together with a child. |
410 | */ | 410 | */ |
411 | static struct gameport *gameport_get_pending_child(struct gameport *parent) | 411 | static struct gameport *gameport_get_pending_child(struct gameport *parent) |
412 | { | 412 | { |
413 | struct gameport_event *event; | 413 | struct gameport_event *event; |
414 | struct gameport *gameport, *child = NULL; | 414 | struct gameport *gameport, *child = NULL; |
415 | unsigned long flags; | 415 | unsigned long flags; |
416 | 416 | ||
417 | spin_lock_irqsave(&gameport_event_lock, flags); | 417 | spin_lock_irqsave(&gameport_event_lock, flags); |
418 | 418 | ||
419 | list_for_each_entry(event, &gameport_event_list, node) { | 419 | list_for_each_entry(event, &gameport_event_list, node) { |
420 | if (event->type == GAMEPORT_REGISTER_PORT) { | 420 | if (event->type == GAMEPORT_REGISTER_PORT) { |
421 | gameport = event->object; | 421 | gameport = event->object; |
422 | if (gameport->parent == parent) { | 422 | if (gameport->parent == parent) { |
423 | child = gameport; | 423 | child = gameport; |
424 | break; | 424 | break; |
425 | } | 425 | } |
426 | } | 426 | } |
427 | } | 427 | } |
428 | 428 | ||
429 | spin_unlock_irqrestore(&gameport_event_lock, flags); | 429 | spin_unlock_irqrestore(&gameport_event_lock, flags); |
430 | return child; | 430 | return child; |
431 | } | 431 | } |
432 | 432 | ||
433 | static int gameport_thread(void *nothing) | 433 | static int gameport_thread(void *nothing) |
434 | { | 434 | { |
435 | lock_kernel(); | 435 | lock_kernel(); |
436 | daemonize("kgameportd"); | 436 | daemonize("kgameportd"); |
437 | allow_signal(SIGTERM); | 437 | allow_signal(SIGTERM); |
438 | 438 | ||
439 | do { | 439 | do { |
440 | gameport_handle_events(); | 440 | gameport_handle_events(); |
441 | wait_event_interruptible(gameport_wait, !list_empty(&gameport_event_list)); | 441 | wait_event_interruptible(gameport_wait, !list_empty(&gameport_event_list)); |
442 | try_to_freeze(PF_FREEZE); | 442 | try_to_freeze(PF_FREEZE); |
443 | } while (!signal_pending(current)); | 443 | } while (!signal_pending(current)); |
444 | 444 | ||
445 | printk(KERN_DEBUG "gameport: kgameportd exiting\n"); | 445 | printk(KERN_DEBUG "gameport: kgameportd exiting\n"); |
446 | 446 | ||
447 | unlock_kernel(); | 447 | unlock_kernel(); |
448 | complete_and_exit(&gameport_exited, 0); | 448 | complete_and_exit(&gameport_exited, 0); |
449 | } | 449 | } |
450 | 450 | ||
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Gameport port operations | 453 | * Gameport port operations |
454 | */ | 454 | */ |
455 | 455 | ||
456 | static ssize_t gameport_show_description(struct device *dev, struct device_attribute *attr, char *buf) | 456 | static ssize_t gameport_show_description(struct device *dev, struct device_attribute *attr, char *buf) |
457 | { | 457 | { |
458 | struct gameport *gameport = to_gameport_port(dev); | 458 | struct gameport *gameport = to_gameport_port(dev); |
459 | return sprintf(buf, "%s\n", gameport->name); | 459 | return sprintf(buf, "%s\n", gameport->name); |
460 | } | 460 | } |
461 | 461 | ||
462 | static ssize_t gameport_rebind_driver(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) | 462 | static ssize_t gameport_rebind_driver(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) |
463 | { | 463 | { |
464 | struct gameport *gameport = to_gameport_port(dev); | 464 | struct gameport *gameport = to_gameport_port(dev); |
465 | struct device_driver *drv; | 465 | struct device_driver *drv; |
466 | int retval; | 466 | int retval; |
467 | 467 | ||
468 | retval = down_interruptible(&gameport_sem); | 468 | retval = down_interruptible(&gameport_sem); |
469 | if (retval) | 469 | if (retval) |
470 | return retval; | 470 | return retval; |
471 | 471 | ||
472 | retval = count; | 472 | retval = count; |
473 | if (!strncmp(buf, "none", count)) { | 473 | if (!strncmp(buf, "none", count)) { |
474 | gameport_disconnect_port(gameport); | 474 | gameport_disconnect_port(gameport); |
475 | } else if (!strncmp(buf, "reconnect", count)) { | 475 | } else if (!strncmp(buf, "reconnect", count)) { |
476 | gameport_reconnect_port(gameport); | 476 | gameport_reconnect_port(gameport); |
477 | } else if (!strncmp(buf, "rescan", count)) { | 477 | } else if (!strncmp(buf, "rescan", count)) { |
478 | gameport_disconnect_port(gameport); | 478 | gameport_disconnect_port(gameport); |
479 | gameport_find_driver(gameport); | 479 | gameport_find_driver(gameport); |
480 | } else if ((drv = driver_find(buf, &gameport_bus)) != NULL) { | 480 | } else if ((drv = driver_find(buf, &gameport_bus)) != NULL) { |
481 | gameport_disconnect_port(gameport); | 481 | gameport_disconnect_port(gameport); |
482 | gameport_bind_driver(gameport, to_gameport_driver(drv)); | 482 | gameport_bind_driver(gameport, to_gameport_driver(drv)); |
483 | put_driver(drv); | 483 | put_driver(drv); |
484 | } else { | 484 | } else { |
485 | retval = -EINVAL; | 485 | retval = -EINVAL; |
486 | } | 486 | } |
487 | 487 | ||
488 | up(&gameport_sem); | 488 | up(&gameport_sem); |
489 | 489 | ||
490 | return retval; | 490 | return retval; |
491 | } | 491 | } |
492 | 492 | ||
493 | static struct device_attribute gameport_device_attrs[] = { | 493 | static struct device_attribute gameport_device_attrs[] = { |
494 | __ATTR(description, S_IRUGO, gameport_show_description, NULL), | 494 | __ATTR(description, S_IRUGO, gameport_show_description, NULL), |
495 | __ATTR(drvctl, S_IWUSR, NULL, gameport_rebind_driver), | 495 | __ATTR(drvctl, S_IWUSR, NULL, gameport_rebind_driver), |
496 | __ATTR_NULL | 496 | __ATTR_NULL |
497 | }; | 497 | }; |
498 | 498 | ||
499 | static void gameport_release_port(struct device *dev) | 499 | static void gameport_release_port(struct device *dev) |
500 | { | 500 | { |
501 | struct gameport *gameport = to_gameport_port(dev); | 501 | struct gameport *gameport = to_gameport_port(dev); |
502 | 502 | ||
503 | kfree(gameport); | 503 | kfree(gameport); |
504 | module_put(THIS_MODULE); | 504 | module_put(THIS_MODULE); |
505 | } | 505 | } |
506 | 506 | ||
507 | void gameport_set_phys(struct gameport *gameport, const char *fmt, ...) | 507 | void gameport_set_phys(struct gameport *gameport, const char *fmt, ...) |
508 | { | 508 | { |
509 | va_list args; | 509 | va_list args; |
510 | 510 | ||
511 | va_start(args, fmt); | 511 | va_start(args, fmt); |
512 | vsnprintf(gameport->phys, sizeof(gameport->phys), fmt, args); | 512 | vsnprintf(gameport->phys, sizeof(gameport->phys), fmt, args); |
513 | va_end(args); | 513 | va_end(args); |
514 | } | 514 | } |
515 | 515 | ||
516 | /* | 516 | /* |
517 | * Prepare gameport port for registration. | 517 | * Prepare gameport port for registration. |
518 | */ | 518 | */ |
519 | static void gameport_init_port(struct gameport *gameport) | 519 | static void gameport_init_port(struct gameport *gameport) |
520 | { | 520 | { |
521 | static atomic_t gameport_no = ATOMIC_INIT(0); | 521 | static atomic_t gameport_no = ATOMIC_INIT(0); |
522 | 522 | ||
523 | __module_get(THIS_MODULE); | 523 | __module_get(THIS_MODULE); |
524 | 524 | ||
525 | init_MUTEX(&gameport->drv_sem); | 525 | init_MUTEX(&gameport->drv_sem); |
526 | device_initialize(&gameport->dev); | 526 | device_initialize(&gameport->dev); |
527 | snprintf(gameport->dev.bus_id, sizeof(gameport->dev.bus_id), | 527 | snprintf(gameport->dev.bus_id, sizeof(gameport->dev.bus_id), |
528 | "gameport%lu", (unsigned long)atomic_inc_return(&gameport_no) - 1); | 528 | "gameport%lu", (unsigned long)atomic_inc_return(&gameport_no) - 1); |
529 | gameport->dev.bus = &gameport_bus; | 529 | gameport->dev.bus = &gameport_bus; |
530 | gameport->dev.release = gameport_release_port; | 530 | gameport->dev.release = gameport_release_port; |
531 | if (gameport->parent) | 531 | if (gameport->parent) |
532 | gameport->dev.parent = &gameport->parent->dev; | 532 | gameport->dev.parent = &gameport->parent->dev; |
533 | 533 | ||
534 | spin_lock_init(&gameport->timer_lock); | 534 | spin_lock_init(&gameport->timer_lock); |
535 | init_timer(&gameport->poll_timer); | 535 | init_timer(&gameport->poll_timer); |
536 | gameport->poll_timer.function = gameport_run_poll_handler; | 536 | gameport->poll_timer.function = gameport_run_poll_handler; |
537 | gameport->poll_timer.data = (unsigned long)gameport; | 537 | gameport->poll_timer.data = (unsigned long)gameport; |
538 | } | 538 | } |
539 | 539 | ||
540 | /* | 540 | /* |
541 | * Complete gameport port registration. | 541 | * Complete gameport port registration. |
542 | * Driver core will attempt to find appropriate driver for the port. | 542 | * Driver core will attempt to find appropriate driver for the port. |
543 | */ | 543 | */ |
544 | static void gameport_add_port(struct gameport *gameport) | 544 | static void gameport_add_port(struct gameport *gameport) |
545 | { | 545 | { |
546 | if (gameport->parent) | 546 | if (gameport->parent) |
547 | gameport->parent->child = gameport; | 547 | gameport->parent->child = gameport; |
548 | 548 | ||
549 | gameport->speed = gameport_measure_speed(gameport); | 549 | gameport->speed = gameport_measure_speed(gameport); |
550 | 550 | ||
551 | list_add_tail(&gameport->node, &gameport_list); | 551 | list_add_tail(&gameport->node, &gameport_list); |
552 | 552 | ||
553 | if (gameport->io) | 553 | if (gameport->io) |
554 | printk(KERN_INFO "gameport: %s is %s, io %#x, speed %dkHz\n", | 554 | printk(KERN_INFO "gameport: %s is %s, io %#x, speed %dkHz\n", |
555 | gameport->name, gameport->phys, gameport->io, gameport->speed); | 555 | gameport->name, gameport->phys, gameport->io, gameport->speed); |
556 | else | 556 | else |
557 | printk(KERN_INFO "gameport: %s is %s, speed %dkHz\n", | 557 | printk(KERN_INFO "gameport: %s is %s, speed %dkHz\n", |
558 | gameport->name, gameport->phys, gameport->speed); | 558 | gameport->name, gameport->phys, gameport->speed); |
559 | 559 | ||
560 | device_add(&gameport->dev); | 560 | device_add(&gameport->dev); |
561 | gameport->registered = 1; | 561 | gameport->registered = 1; |
562 | } | 562 | } |
563 | 563 | ||
564 | /* | 564 | /* |
565 | * gameport_destroy_port() completes deregistration process and removes | 565 | * gameport_destroy_port() completes deregistration process and removes |
566 | * port from the system | 566 | * port from the system |
567 | */ | 567 | */ |
568 | static void gameport_destroy_port(struct gameport *gameport) | 568 | static void gameport_destroy_port(struct gameport *gameport) |
569 | { | 569 | { |
570 | struct gameport *child; | 570 | struct gameport *child; |
571 | 571 | ||
572 | child = gameport_get_pending_child(gameport); | 572 | child = gameport_get_pending_child(gameport); |
573 | if (child) { | 573 | if (child) { |
574 | gameport_remove_pending_events(child); | 574 | gameport_remove_pending_events(child); |
575 | put_device(&child->dev); | 575 | put_device(&child->dev); |
576 | } | 576 | } |
577 | 577 | ||
578 | if (gameport->parent) { | 578 | if (gameport->parent) { |
579 | gameport->parent->child = NULL; | 579 | gameport->parent->child = NULL; |
580 | gameport->parent = NULL; | 580 | gameport->parent = NULL; |
581 | } | 581 | } |
582 | 582 | ||
583 | if (gameport->registered) { | 583 | if (gameport->registered) { |
584 | device_del(&gameport->dev); | 584 | device_del(&gameport->dev); |
585 | list_del_init(&gameport->node); | 585 | list_del_init(&gameport->node); |
586 | gameport->registered = 0; | 586 | gameport->registered = 0; |
587 | } | 587 | } |
588 | 588 | ||
589 | gameport_remove_pending_events(gameport); | 589 | gameport_remove_pending_events(gameport); |
590 | put_device(&gameport->dev); | 590 | put_device(&gameport->dev); |
591 | } | 591 | } |
592 | 592 | ||
593 | /* | 593 | /* |
594 | * Reconnect gameport port and all its children (re-initialize attached devices) | 594 | * Reconnect gameport port and all its children (re-initialize attached devices) |
595 | */ | 595 | */ |
596 | static void gameport_reconnect_port(struct gameport *gameport) | 596 | static void gameport_reconnect_port(struct gameport *gameport) |
597 | { | 597 | { |
598 | do { | 598 | do { |
599 | if (!gameport->drv || !gameport->drv->reconnect || gameport->drv->reconnect(gameport)) { | 599 | if (!gameport->drv || !gameport->drv->reconnect || gameport->drv->reconnect(gameport)) { |
600 | gameport_disconnect_port(gameport); | 600 | gameport_disconnect_port(gameport); |
601 | gameport_find_driver(gameport); | 601 | gameport_find_driver(gameport); |
602 | /* Ok, old children are now gone, we are done */ | 602 | /* Ok, old children are now gone, we are done */ |
603 | break; | 603 | break; |
604 | } | 604 | } |
605 | gameport = gameport->child; | 605 | gameport = gameport->child; |
606 | } while (gameport); | 606 | } while (gameport); |
607 | } | 607 | } |
608 | 608 | ||
609 | /* | 609 | /* |
610 | * gameport_disconnect_port() unbinds a port from its driver. As a side effect | 610 | * gameport_disconnect_port() unbinds a port from its driver. As a side effect |
611 | * all child ports are unbound and destroyed. | 611 | * all child ports are unbound and destroyed. |
612 | */ | 612 | */ |
613 | static void gameport_disconnect_port(struct gameport *gameport) | 613 | static void gameport_disconnect_port(struct gameport *gameport) |
614 | { | 614 | { |
615 | struct gameport *s, *parent; | 615 | struct gameport *s, *parent; |
616 | 616 | ||
617 | if (gameport->child) { | 617 | if (gameport->child) { |
618 | /* | 618 | /* |
619 | * Children ports should be disconnected and destroyed | 619 | * Children ports should be disconnected and destroyed |
620 | * first, staring with the leaf one, since we don't want | 620 | * first, staring with the leaf one, since we don't want |
621 | * to do recursion | 621 | * to do recursion |
622 | */ | 622 | */ |
623 | for (s = gameport; s->child; s = s->child) | 623 | for (s = gameport; s->child; s = s->child) |
624 | /* empty */; | 624 | /* empty */; |
625 | 625 | ||
626 | do { | 626 | do { |
627 | parent = s->parent; | 627 | parent = s->parent; |
628 | 628 | ||
629 | gameport_release_driver(s); | 629 | gameport_release_driver(s); |
630 | gameport_destroy_port(s); | 630 | gameport_destroy_port(s); |
631 | } while ((s = parent) != gameport); | 631 | } while ((s = parent) != gameport); |
632 | } | 632 | } |
633 | 633 | ||
634 | /* | 634 | /* |
635 | * Ok, no children left, now disconnect this port | 635 | * Ok, no children left, now disconnect this port |
636 | */ | 636 | */ |
637 | gameport_release_driver(gameport); | 637 | gameport_release_driver(gameport); |
638 | } | 638 | } |
639 | 639 | ||
640 | void gameport_rescan(struct gameport *gameport) | 640 | void gameport_rescan(struct gameport *gameport) |
641 | { | 641 | { |
642 | gameport_queue_event(gameport, NULL, GAMEPORT_RESCAN); | 642 | gameport_queue_event(gameport, NULL, GAMEPORT_RESCAN); |
643 | } | 643 | } |
644 | 644 | ||
645 | void gameport_reconnect(struct gameport *gameport) | 645 | void gameport_reconnect(struct gameport *gameport) |
646 | { | 646 | { |
647 | gameport_queue_event(gameport, NULL, GAMEPORT_RECONNECT); | 647 | gameport_queue_event(gameport, NULL, GAMEPORT_RECONNECT); |
648 | } | 648 | } |
649 | 649 | ||
650 | /* | 650 | /* |
651 | * Submits register request to kgameportd for subsequent execution. | 651 | * Submits register request to kgameportd for subsequent execution. |
652 | * Note that port registration is always asynchronous. | 652 | * Note that port registration is always asynchronous. |
653 | */ | 653 | */ |
654 | void __gameport_register_port(struct gameport *gameport, struct module *owner) | 654 | void __gameport_register_port(struct gameport *gameport, struct module *owner) |
655 | { | 655 | { |
656 | gameport_init_port(gameport); | 656 | gameport_init_port(gameport); |
657 | gameport_queue_event(gameport, owner, GAMEPORT_REGISTER_PORT); | 657 | gameport_queue_event(gameport, owner, GAMEPORT_REGISTER_PORT); |
658 | } | 658 | } |
659 | 659 | ||
660 | /* | 660 | /* |
661 | * Synchronously unregisters gameport port. | 661 | * Synchronously unregisters gameport port. |
662 | */ | 662 | */ |
663 | void gameport_unregister_port(struct gameport *gameport) | 663 | void gameport_unregister_port(struct gameport *gameport) |
664 | { | 664 | { |
665 | down(&gameport_sem); | 665 | down(&gameport_sem); |
666 | gameport_disconnect_port(gameport); | 666 | gameport_disconnect_port(gameport); |
667 | gameport_destroy_port(gameport); | 667 | gameport_destroy_port(gameport); |
668 | up(&gameport_sem); | 668 | up(&gameport_sem); |
669 | } | 669 | } |
670 | 670 | ||
671 | 671 | ||
672 | /* | 672 | /* |
673 | * Gameport driver operations | 673 | * Gameport driver operations |
674 | */ | 674 | */ |
675 | 675 | ||
676 | static ssize_t gameport_driver_show_description(struct device_driver *drv, char *buf) | 676 | static ssize_t gameport_driver_show_description(struct device_driver *drv, char *buf) |
677 | { | 677 | { |
678 | struct gameport_driver *driver = to_gameport_driver(drv); | 678 | struct gameport_driver *driver = to_gameport_driver(drv); |
679 | return sprintf(buf, "%s\n", driver->description ? driver->description : "(none)"); | 679 | return sprintf(buf, "%s\n", driver->description ? driver->description : "(none)"); |
680 | } | 680 | } |
681 | 681 | ||
682 | static struct driver_attribute gameport_driver_attrs[] = { | 682 | static struct driver_attribute gameport_driver_attrs[] = { |
683 | __ATTR(description, S_IRUGO, gameport_driver_show_description, NULL), | 683 | __ATTR(description, S_IRUGO, gameport_driver_show_description, NULL), |
684 | __ATTR_NULL | 684 | __ATTR_NULL |
685 | }; | 685 | }; |
686 | 686 | ||
687 | static int gameport_driver_probe(struct device *dev) | 687 | static int gameport_driver_probe(struct device *dev) |
688 | { | 688 | { |
689 | struct gameport *gameport = to_gameport_port(dev); | 689 | struct gameport *gameport = to_gameport_port(dev); |
690 | struct gameport_driver *drv = to_gameport_driver(dev->driver); | 690 | struct gameport_driver *drv = to_gameport_driver(dev->driver); |
691 | 691 | ||
692 | drv->connect(gameport, drv); | 692 | drv->connect(gameport, drv); |
693 | return gameport->drv ? 0 : -ENODEV; | 693 | return gameport->drv ? 0 : -ENODEV; |
694 | } | 694 | } |
695 | 695 | ||
696 | static int gameport_driver_remove(struct device *dev) | 696 | static int gameport_driver_remove(struct device *dev) |
697 | { | 697 | { |
698 | struct gameport *gameport = to_gameport_port(dev); | 698 | struct gameport *gameport = to_gameport_port(dev); |
699 | struct gameport_driver *drv = to_gameport_driver(dev->driver); | 699 | struct gameport_driver *drv = to_gameport_driver(dev->driver); |
700 | 700 | ||
701 | drv->disconnect(gameport); | 701 | drv->disconnect(gameport); |
702 | return 0; | 702 | return 0; |
703 | } | 703 | } |
704 | 704 | ||
705 | void __gameport_register_driver(struct gameport_driver *drv, struct module *owner) | 705 | void __gameport_register_driver(struct gameport_driver *drv, struct module *owner) |
706 | { | 706 | { |
707 | drv->driver.bus = &gameport_bus; | 707 | drv->driver.bus = &gameport_bus; |
708 | drv->driver.probe = gameport_driver_probe; | 708 | drv->driver.probe = gameport_driver_probe; |
709 | drv->driver.remove = gameport_driver_remove; | 709 | drv->driver.remove = gameport_driver_remove; |
710 | gameport_queue_event(drv, owner, GAMEPORT_REGISTER_DRIVER); | 710 | gameport_queue_event(drv, owner, GAMEPORT_REGISTER_DRIVER); |
711 | } | 711 | } |
712 | 712 | ||
713 | void gameport_unregister_driver(struct gameport_driver *drv) | 713 | void gameport_unregister_driver(struct gameport_driver *drv) |
714 | { | 714 | { |
715 | struct gameport *gameport; | 715 | struct gameport *gameport; |
716 | 716 | ||
717 | down(&gameport_sem); | 717 | down(&gameport_sem); |
718 | drv->ignore = 1; /* so gameport_find_driver ignores it */ | 718 | drv->ignore = 1; /* so gameport_find_driver ignores it */ |
719 | 719 | ||
720 | start_over: | 720 | start_over: |
721 | list_for_each_entry(gameport, &gameport_list, node) { | 721 | list_for_each_entry(gameport, &gameport_list, node) { |
722 | if (gameport->drv == drv) { | 722 | if (gameport->drv == drv) { |
723 | gameport_disconnect_port(gameport); | 723 | gameport_disconnect_port(gameport); |
724 | gameport_find_driver(gameport); | 724 | gameport_find_driver(gameport); |
725 | /* we could've deleted some ports, restart */ | 725 | /* we could've deleted some ports, restart */ |
726 | goto start_over; | 726 | goto start_over; |
727 | } | 727 | } |
728 | } | 728 | } |
729 | 729 | ||
730 | driver_unregister(&drv->driver); | 730 | driver_unregister(&drv->driver); |
731 | up(&gameport_sem); | 731 | up(&gameport_sem); |
732 | } | 732 | } |
733 | 733 | ||
734 | static int gameport_bus_match(struct device *dev, struct device_driver *drv) | 734 | static int gameport_bus_match(struct device *dev, struct device_driver *drv) |
735 | { | 735 | { |
736 | struct gameport_driver *gameport_drv = to_gameport_driver(drv); | 736 | struct gameport_driver *gameport_drv = to_gameport_driver(drv); |
737 | 737 | ||
738 | return !gameport_drv->ignore; | 738 | return !gameport_drv->ignore; |
739 | } | 739 | } |
740 | 740 | ||
741 | static void gameport_set_drv(struct gameport *gameport, struct gameport_driver *drv) | 741 | static void gameport_set_drv(struct gameport *gameport, struct gameport_driver *drv) |
742 | { | 742 | { |
743 | down(&gameport->drv_sem); | 743 | down(&gameport->drv_sem); |
744 | gameport->drv = drv; | 744 | gameport->drv = drv; |
745 | up(&gameport->drv_sem); | 745 | up(&gameport->drv_sem); |
746 | } | 746 | } |
747 | 747 | ||
748 | int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode) | 748 | int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode) |
749 | { | 749 | { |
750 | 750 | ||
751 | if (gameport->open) { | 751 | if (gameport->open) { |
752 | if (gameport->open(gameport, mode)) { | 752 | if (gameport->open(gameport, mode)) { |
753 | return -1; | 753 | return -1; |
754 | } | 754 | } |
755 | } else { | 755 | } else { |
756 | if (mode != GAMEPORT_MODE_RAW) | 756 | if (mode != GAMEPORT_MODE_RAW) |
757 | return -1; | 757 | return -1; |
758 | } | 758 | } |
759 | 759 | ||
760 | gameport_set_drv(gameport, drv); | 760 | gameport_set_drv(gameport, drv); |
761 | return 0; | 761 | return 0; |
762 | } | 762 | } |
763 | 763 | ||
764 | void gameport_close(struct gameport *gameport) | 764 | void gameport_close(struct gameport *gameport) |
765 | { | 765 | { |
766 | del_timer_sync(&gameport->poll_timer); | 766 | del_timer_sync(&gameport->poll_timer); |
767 | gameport->poll_handler = NULL; | 767 | gameport->poll_handler = NULL; |
768 | gameport->poll_interval = 0; | 768 | gameport->poll_interval = 0; |
769 | gameport_set_drv(gameport, NULL); | 769 | gameport_set_drv(gameport, NULL); |
770 | if (gameport->close) | 770 | if (gameport->close) |
771 | gameport->close(gameport); | 771 | gameport->close(gameport); |
772 | } | 772 | } |
773 | 773 | ||
774 | static int __init gameport_init(void) | 774 | static int __init gameport_init(void) |
775 | { | 775 | { |
776 | if (!(gameport_pid = kernel_thread(gameport_thread, NULL, CLONE_KERNEL))) { | 776 | if (!(gameport_pid = kernel_thread(gameport_thread, NULL, CLONE_KERNEL))) { |
777 | printk(KERN_ERR "gameport: Failed to start kgameportd\n"); | 777 | printk(KERN_ERR "gameport: Failed to start kgameportd\n"); |
778 | return -1; | 778 | return -1; |
779 | } | 779 | } |
780 | 780 | ||
781 | gameport_bus.dev_attrs = gameport_device_attrs; | 781 | gameport_bus.dev_attrs = gameport_device_attrs; |
782 | gameport_bus.drv_attrs = gameport_driver_attrs; | 782 | gameport_bus.drv_attrs = gameport_driver_attrs; |
783 | gameport_bus.match = gameport_bus_match; | 783 | gameport_bus.match = gameport_bus_match; |
784 | bus_register(&gameport_bus); | 784 | bus_register(&gameport_bus); |
785 | 785 | ||
786 | return 0; | 786 | return 0; |
787 | } | 787 | } |
788 | 788 | ||
789 | static void __exit gameport_exit(void) | 789 | static void __exit gameport_exit(void) |
790 | { | 790 | { |
791 | bus_unregister(&gameport_bus); | 791 | bus_unregister(&gameport_bus); |
792 | kill_proc(gameport_pid, SIGTERM, 1); | 792 | kill_proc(gameport_pid, SIGTERM, 1); |
793 | wait_for_completion(&gameport_exited); | 793 | wait_for_completion(&gameport_exited); |
794 | } | 794 | } |
795 | 795 | ||
796 | module_init(gameport_init); | 796 | module_init(gameport_init); |
797 | module_exit(gameport_exit); | 797 | module_exit(gameport_exit); |
798 | 798 |
drivers/oprofile/buffer_sync.c
1 | /** | 1 | /** |
2 | * @file buffer_sync.c | 2 | * @file buffer_sync.c |
3 | * | 3 | * |
4 | * @remark Copyright 2002 OProfile authors | 4 | * @remark Copyright 2002 OProfile authors |
5 | * @remark Read the file COPYING | 5 | * @remark Read the file COPYING |
6 | * | 6 | * |
7 | * @author John Levon <levon@movementarian.org> | 7 | * @author John Levon <levon@movementarian.org> |
8 | * | 8 | * |
9 | * This is the core of the buffer management. Each | 9 | * This is the core of the buffer management. Each |
10 | * CPU buffer is processed and entered into the | 10 | * CPU buffer is processed and entered into the |
11 | * global event buffer. Such processing is necessary | 11 | * global event buffer. Such processing is necessary |
12 | * in several circumstances, mentioned below. | 12 | * in several circumstances, mentioned below. |
13 | * | 13 | * |
14 | * The processing does the job of converting the | 14 | * The processing does the job of converting the |
15 | * transitory EIP value into a persistent dentry/offset | 15 | * transitory EIP value into a persistent dentry/offset |
16 | * value that the profiler can record at its leisure. | 16 | * value that the profiler can record at its leisure. |
17 | * | 17 | * |
18 | * See fs/dcookies.c for a description of the dentry/offset | 18 | * See fs/dcookies.c for a description of the dentry/offset |
19 | * objects. | 19 | * objects. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/workqueue.h> | 23 | #include <linux/workqueue.h> |
24 | #include <linux/notifier.h> | 24 | #include <linux/notifier.h> |
25 | #include <linux/dcookies.h> | 25 | #include <linux/dcookies.h> |
26 | #include <linux/profile.h> | 26 | #include <linux/profile.h> |
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
29 | 29 | ||
30 | #include "oprofile_stats.h" | 30 | #include "oprofile_stats.h" |
31 | #include "event_buffer.h" | 31 | #include "event_buffer.h" |
32 | #include "cpu_buffer.h" | 32 | #include "cpu_buffer.h" |
33 | #include "buffer_sync.h" | 33 | #include "buffer_sync.h" |
34 | 34 | ||
35 | static LIST_HEAD(dying_tasks); | 35 | static LIST_HEAD(dying_tasks); |
36 | static LIST_HEAD(dead_tasks); | 36 | static LIST_HEAD(dead_tasks); |
37 | static cpumask_t marked_cpus = CPU_MASK_NONE; | 37 | static cpumask_t marked_cpus = CPU_MASK_NONE; |
38 | static DEFINE_SPINLOCK(task_mortuary); | 38 | static DEFINE_SPINLOCK(task_mortuary); |
39 | static void process_task_mortuary(void); | 39 | static void process_task_mortuary(void); |
40 | 40 | ||
41 | 41 | ||
42 | /* Take ownership of the task struct and place it on the | 42 | /* Take ownership of the task struct and place it on the |
43 | * list for processing. Only after two full buffer syncs | 43 | * list for processing. Only after two full buffer syncs |
44 | * does the task eventually get freed, because by then | 44 | * does the task eventually get freed, because by then |
45 | * we are sure we will not reference it again. | 45 | * we are sure we will not reference it again. |
46 | */ | 46 | */ |
47 | static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) | 47 | static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) |
48 | { | 48 | { |
49 | struct task_struct * task = data; | 49 | struct task_struct * task = data; |
50 | spin_lock(&task_mortuary); | 50 | spin_lock(&task_mortuary); |
51 | list_add(&task->tasks, &dying_tasks); | 51 | list_add(&task->tasks, &dying_tasks); |
52 | spin_unlock(&task_mortuary); | 52 | spin_unlock(&task_mortuary); |
53 | return NOTIFY_OK; | 53 | return NOTIFY_OK; |
54 | } | 54 | } |
55 | 55 | ||
56 | 56 | ||
57 | /* The task is on its way out. A sync of the buffer means we can catch | 57 | /* The task is on its way out. A sync of the buffer means we can catch |
58 | * any remaining samples for this task. | 58 | * any remaining samples for this task. |
59 | */ | 59 | */ |
60 | static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) | 60 | static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) |
61 | { | 61 | { |
62 | /* To avoid latency problems, we only process the current CPU, | 62 | /* To avoid latency problems, we only process the current CPU, |
63 | * hoping that most samples for the task are on this CPU | 63 | * hoping that most samples for the task are on this CPU |
64 | */ | 64 | */ |
65 | sync_buffer(_smp_processor_id()); | 65 | sync_buffer(raw_smp_processor_id()); |
66 | return 0; | 66 | return 0; |
67 | } | 67 | } |
68 | 68 | ||
69 | 69 | ||
70 | /* The task is about to try a do_munmap(). We peek at what it's going to | 70 | /* The task is about to try a do_munmap(). We peek at what it's going to |
71 | * do, and if it's an executable region, process the samples first, so | 71 | * do, and if it's an executable region, process the samples first, so |
72 | * we don't lose any. This does not have to be exact, it's a QoI issue | 72 | * we don't lose any. This does not have to be exact, it's a QoI issue |
73 | * only. | 73 | * only. |
74 | */ | 74 | */ |
75 | static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) | 75 | static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) |
76 | { | 76 | { |
77 | unsigned long addr = (unsigned long)data; | 77 | unsigned long addr = (unsigned long)data; |
78 | struct mm_struct * mm = current->mm; | 78 | struct mm_struct * mm = current->mm; |
79 | struct vm_area_struct * mpnt; | 79 | struct vm_area_struct * mpnt; |
80 | 80 | ||
81 | down_read(&mm->mmap_sem); | 81 | down_read(&mm->mmap_sem); |
82 | 82 | ||
83 | mpnt = find_vma(mm, addr); | 83 | mpnt = find_vma(mm, addr); |
84 | if (mpnt && mpnt->vm_file && (mpnt->vm_flags & VM_EXEC)) { | 84 | if (mpnt && mpnt->vm_file && (mpnt->vm_flags & VM_EXEC)) { |
85 | up_read(&mm->mmap_sem); | 85 | up_read(&mm->mmap_sem); |
86 | /* To avoid latency problems, we only process the current CPU, | 86 | /* To avoid latency problems, we only process the current CPU, |
87 | * hoping that most samples for the task are on this CPU | 87 | * hoping that most samples for the task are on this CPU |
88 | */ | 88 | */ |
89 | sync_buffer(_smp_processor_id()); | 89 | sync_buffer(raw_smp_processor_id()); |
90 | return 0; | 90 | return 0; |
91 | } | 91 | } |
92 | 92 | ||
93 | up_read(&mm->mmap_sem); | 93 | up_read(&mm->mmap_sem); |
94 | return 0; | 94 | return 0; |
95 | } | 95 | } |
96 | 96 | ||
97 | 97 | ||
98 | /* We need to be told about new modules so we don't attribute to a previously | 98 | /* We need to be told about new modules so we don't attribute to a previously |
99 | * loaded module, or drop the samples on the floor. | 99 | * loaded module, or drop the samples on the floor. |
100 | */ | 100 | */ |
101 | static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) | 101 | static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) |
102 | { | 102 | { |
103 | #ifdef CONFIG_MODULES | 103 | #ifdef CONFIG_MODULES |
104 | if (val != MODULE_STATE_COMING) | 104 | if (val != MODULE_STATE_COMING) |
105 | return 0; | 105 | return 0; |
106 | 106 | ||
107 | /* FIXME: should we process all CPU buffers ? */ | 107 | /* FIXME: should we process all CPU buffers ? */ |
108 | down(&buffer_sem); | 108 | down(&buffer_sem); |
109 | add_event_entry(ESCAPE_CODE); | 109 | add_event_entry(ESCAPE_CODE); |
110 | add_event_entry(MODULE_LOADED_CODE); | 110 | add_event_entry(MODULE_LOADED_CODE); |
111 | up(&buffer_sem); | 111 | up(&buffer_sem); |
112 | #endif | 112 | #endif |
113 | return 0; | 113 | return 0; |
114 | } | 114 | } |
115 | 115 | ||
116 | 116 | ||
117 | static struct notifier_block task_free_nb = { | 117 | static struct notifier_block task_free_nb = { |
118 | .notifier_call = task_free_notify, | 118 | .notifier_call = task_free_notify, |
119 | }; | 119 | }; |
120 | 120 | ||
121 | static struct notifier_block task_exit_nb = { | 121 | static struct notifier_block task_exit_nb = { |
122 | .notifier_call = task_exit_notify, | 122 | .notifier_call = task_exit_notify, |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static struct notifier_block munmap_nb = { | 125 | static struct notifier_block munmap_nb = { |
126 | .notifier_call = munmap_notify, | 126 | .notifier_call = munmap_notify, |
127 | }; | 127 | }; |
128 | 128 | ||
129 | static struct notifier_block module_load_nb = { | 129 | static struct notifier_block module_load_nb = { |
130 | .notifier_call = module_load_notify, | 130 | .notifier_call = module_load_notify, |
131 | }; | 131 | }; |
132 | 132 | ||
133 | 133 | ||
134 | static void end_sync(void) | 134 | static void end_sync(void) |
135 | { | 135 | { |
136 | end_cpu_work(); | 136 | end_cpu_work(); |
137 | /* make sure we don't leak task structs */ | 137 | /* make sure we don't leak task structs */ |
138 | process_task_mortuary(); | 138 | process_task_mortuary(); |
139 | process_task_mortuary(); | 139 | process_task_mortuary(); |
140 | } | 140 | } |
141 | 141 | ||
142 | 142 | ||
143 | int sync_start(void) | 143 | int sync_start(void) |
144 | { | 144 | { |
145 | int err; | 145 | int err; |
146 | 146 | ||
147 | start_cpu_work(); | 147 | start_cpu_work(); |
148 | 148 | ||
149 | err = task_handoff_register(&task_free_nb); | 149 | err = task_handoff_register(&task_free_nb); |
150 | if (err) | 150 | if (err) |
151 | goto out1; | 151 | goto out1; |
152 | err = profile_event_register(PROFILE_TASK_EXIT, &task_exit_nb); | 152 | err = profile_event_register(PROFILE_TASK_EXIT, &task_exit_nb); |
153 | if (err) | 153 | if (err) |
154 | goto out2; | 154 | goto out2; |
155 | err = profile_event_register(PROFILE_MUNMAP, &munmap_nb); | 155 | err = profile_event_register(PROFILE_MUNMAP, &munmap_nb); |
156 | if (err) | 156 | if (err) |
157 | goto out3; | 157 | goto out3; |
158 | err = register_module_notifier(&module_load_nb); | 158 | err = register_module_notifier(&module_load_nb); |
159 | if (err) | 159 | if (err) |
160 | goto out4; | 160 | goto out4; |
161 | 161 | ||
162 | out: | 162 | out: |
163 | return err; | 163 | return err; |
164 | out4: | 164 | out4: |
165 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); | 165 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); |
166 | out3: | 166 | out3: |
167 | profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); | 167 | profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); |
168 | out2: | 168 | out2: |
169 | task_handoff_unregister(&task_free_nb); | 169 | task_handoff_unregister(&task_free_nb); |
170 | out1: | 170 | out1: |
171 | end_sync(); | 171 | end_sync(); |
172 | goto out; | 172 | goto out; |
173 | } | 173 | } |
174 | 174 | ||
175 | 175 | ||
176 | void sync_stop(void) | 176 | void sync_stop(void) |
177 | { | 177 | { |
178 | unregister_module_notifier(&module_load_nb); | 178 | unregister_module_notifier(&module_load_nb); |
179 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); | 179 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); |
180 | profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); | 180 | profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); |
181 | task_handoff_unregister(&task_free_nb); | 181 | task_handoff_unregister(&task_free_nb); |
182 | end_sync(); | 182 | end_sync(); |
183 | } | 183 | } |
184 | 184 | ||
185 | 185 | ||
186 | /* Optimisation. We can manage without taking the dcookie sem | 186 | /* Optimisation. We can manage without taking the dcookie sem |
187 | * because we cannot reach this code without at least one | 187 | * because we cannot reach this code without at least one |
188 | * dcookie user still being registered (namely, the reader | 188 | * dcookie user still being registered (namely, the reader |
189 | * of the event buffer). */ | 189 | * of the event buffer). */ |
190 | static inline unsigned long fast_get_dcookie(struct dentry * dentry, | 190 | static inline unsigned long fast_get_dcookie(struct dentry * dentry, |
191 | struct vfsmount * vfsmnt) | 191 | struct vfsmount * vfsmnt) |
192 | { | 192 | { |
193 | unsigned long cookie; | 193 | unsigned long cookie; |
194 | 194 | ||
195 | if (dentry->d_cookie) | 195 | if (dentry->d_cookie) |
196 | return (unsigned long)dentry; | 196 | return (unsigned long)dentry; |
197 | get_dcookie(dentry, vfsmnt, &cookie); | 197 | get_dcookie(dentry, vfsmnt, &cookie); |
198 | return cookie; | 198 | return cookie; |
199 | } | 199 | } |
200 | 200 | ||
201 | 201 | ||
202 | /* Look up the dcookie for the task's first VM_EXECUTABLE mapping, | 202 | /* Look up the dcookie for the task's first VM_EXECUTABLE mapping, |
203 | * which corresponds loosely to "application name". This is | 203 | * which corresponds loosely to "application name". This is |
204 | * not strictly necessary but allows oprofile to associate | 204 | * not strictly necessary but allows oprofile to associate |
205 | * shared-library samples with particular applications | 205 | * shared-library samples with particular applications |
206 | */ | 206 | */ |
207 | static unsigned long get_exec_dcookie(struct mm_struct * mm) | 207 | static unsigned long get_exec_dcookie(struct mm_struct * mm) |
208 | { | 208 | { |
209 | unsigned long cookie = 0; | 209 | unsigned long cookie = 0; |
210 | struct vm_area_struct * vma; | 210 | struct vm_area_struct * vma; |
211 | 211 | ||
212 | if (!mm) | 212 | if (!mm) |
213 | goto out; | 213 | goto out; |
214 | 214 | ||
215 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 215 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
216 | if (!vma->vm_file) | 216 | if (!vma->vm_file) |
217 | continue; | 217 | continue; |
218 | if (!(vma->vm_flags & VM_EXECUTABLE)) | 218 | if (!(vma->vm_flags & VM_EXECUTABLE)) |
219 | continue; | 219 | continue; |
220 | cookie = fast_get_dcookie(vma->vm_file->f_dentry, | 220 | cookie = fast_get_dcookie(vma->vm_file->f_dentry, |
221 | vma->vm_file->f_vfsmnt); | 221 | vma->vm_file->f_vfsmnt); |
222 | break; | 222 | break; |
223 | } | 223 | } |
224 | 224 | ||
225 | out: | 225 | out: |
226 | return cookie; | 226 | return cookie; |
227 | } | 227 | } |
228 | 228 | ||
229 | 229 | ||
230 | /* Convert the EIP value of a sample into a persistent dentry/offset | 230 | /* Convert the EIP value of a sample into a persistent dentry/offset |
231 | * pair that can then be added to the global event buffer. We make | 231 | * pair that can then be added to the global event buffer. We make |
232 | * sure to do this lookup before a mm->mmap modification happens so | 232 | * sure to do this lookup before a mm->mmap modification happens so |
233 | * we don't lose track. | 233 | * we don't lose track. |
234 | */ | 234 | */ |
235 | static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) | 235 | static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) |
236 | { | 236 | { |
237 | unsigned long cookie = 0; | 237 | unsigned long cookie = 0; |
238 | struct vm_area_struct * vma; | 238 | struct vm_area_struct * vma; |
239 | 239 | ||
240 | for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { | 240 | for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { |
241 | 241 | ||
242 | if (!vma->vm_file) | 242 | if (!vma->vm_file) |
243 | continue; | 243 | continue; |
244 | 244 | ||
245 | if (addr < vma->vm_start || addr >= vma->vm_end) | 245 | if (addr < vma->vm_start || addr >= vma->vm_end) |
246 | continue; | 246 | continue; |
247 | 247 | ||
248 | cookie = fast_get_dcookie(vma->vm_file->f_dentry, | 248 | cookie = fast_get_dcookie(vma->vm_file->f_dentry, |
249 | vma->vm_file->f_vfsmnt); | 249 | vma->vm_file->f_vfsmnt); |
250 | *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start; | 250 | *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start; |
251 | break; | 251 | break; |
252 | } | 252 | } |
253 | 253 | ||
254 | return cookie; | 254 | return cookie; |
255 | } | 255 | } |
256 | 256 | ||
257 | 257 | ||
258 | static unsigned long last_cookie = ~0UL; | 258 | static unsigned long last_cookie = ~0UL; |
259 | 259 | ||
260 | static void add_cpu_switch(int i) | 260 | static void add_cpu_switch(int i) |
261 | { | 261 | { |
262 | add_event_entry(ESCAPE_CODE); | 262 | add_event_entry(ESCAPE_CODE); |
263 | add_event_entry(CPU_SWITCH_CODE); | 263 | add_event_entry(CPU_SWITCH_CODE); |
264 | add_event_entry(i); | 264 | add_event_entry(i); |
265 | last_cookie = ~0UL; | 265 | last_cookie = ~0UL; |
266 | } | 266 | } |
267 | 267 | ||
268 | static void add_kernel_ctx_switch(unsigned int in_kernel) | 268 | static void add_kernel_ctx_switch(unsigned int in_kernel) |
269 | { | 269 | { |
270 | add_event_entry(ESCAPE_CODE); | 270 | add_event_entry(ESCAPE_CODE); |
271 | if (in_kernel) | 271 | if (in_kernel) |
272 | add_event_entry(KERNEL_ENTER_SWITCH_CODE); | 272 | add_event_entry(KERNEL_ENTER_SWITCH_CODE); |
273 | else | 273 | else |
274 | add_event_entry(KERNEL_EXIT_SWITCH_CODE); | 274 | add_event_entry(KERNEL_EXIT_SWITCH_CODE); |
275 | } | 275 | } |
276 | 276 | ||
277 | static void | 277 | static void |
278 | add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) | 278 | add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) |
279 | { | 279 | { |
280 | add_event_entry(ESCAPE_CODE); | 280 | add_event_entry(ESCAPE_CODE); |
281 | add_event_entry(CTX_SWITCH_CODE); | 281 | add_event_entry(CTX_SWITCH_CODE); |
282 | add_event_entry(task->pid); | 282 | add_event_entry(task->pid); |
283 | add_event_entry(cookie); | 283 | add_event_entry(cookie); |
284 | /* Another code for daemon back-compat */ | 284 | /* Another code for daemon back-compat */ |
285 | add_event_entry(ESCAPE_CODE); | 285 | add_event_entry(ESCAPE_CODE); |
286 | add_event_entry(CTX_TGID_CODE); | 286 | add_event_entry(CTX_TGID_CODE); |
287 | add_event_entry(task->tgid); | 287 | add_event_entry(task->tgid); |
288 | } | 288 | } |
289 | 289 | ||
290 | 290 | ||
291 | static void add_cookie_switch(unsigned long cookie) | 291 | static void add_cookie_switch(unsigned long cookie) |
292 | { | 292 | { |
293 | add_event_entry(ESCAPE_CODE); | 293 | add_event_entry(ESCAPE_CODE); |
294 | add_event_entry(COOKIE_SWITCH_CODE); | 294 | add_event_entry(COOKIE_SWITCH_CODE); |
295 | add_event_entry(cookie); | 295 | add_event_entry(cookie); |
296 | } | 296 | } |
297 | 297 | ||
298 | 298 | ||
299 | static void add_trace_begin(void) | 299 | static void add_trace_begin(void) |
300 | { | 300 | { |
301 | add_event_entry(ESCAPE_CODE); | 301 | add_event_entry(ESCAPE_CODE); |
302 | add_event_entry(TRACE_BEGIN_CODE); | 302 | add_event_entry(TRACE_BEGIN_CODE); |
303 | } | 303 | } |
304 | 304 | ||
305 | 305 | ||
306 | static void add_sample_entry(unsigned long offset, unsigned long event) | 306 | static void add_sample_entry(unsigned long offset, unsigned long event) |
307 | { | 307 | { |
308 | add_event_entry(offset); | 308 | add_event_entry(offset); |
309 | add_event_entry(event); | 309 | add_event_entry(event); |
310 | } | 310 | } |
311 | 311 | ||
312 | 312 | ||
313 | static int add_us_sample(struct mm_struct * mm, struct op_sample * s) | 313 | static int add_us_sample(struct mm_struct * mm, struct op_sample * s) |
314 | { | 314 | { |
315 | unsigned long cookie; | 315 | unsigned long cookie; |
316 | off_t offset; | 316 | off_t offset; |
317 | 317 | ||
318 | cookie = lookup_dcookie(mm, s->eip, &offset); | 318 | cookie = lookup_dcookie(mm, s->eip, &offset); |
319 | 319 | ||
320 | if (!cookie) { | 320 | if (!cookie) { |
321 | atomic_inc(&oprofile_stats.sample_lost_no_mapping); | 321 | atomic_inc(&oprofile_stats.sample_lost_no_mapping); |
322 | return 0; | 322 | return 0; |
323 | } | 323 | } |
324 | 324 | ||
325 | if (cookie != last_cookie) { | 325 | if (cookie != last_cookie) { |
326 | add_cookie_switch(cookie); | 326 | add_cookie_switch(cookie); |
327 | last_cookie = cookie; | 327 | last_cookie = cookie; |
328 | } | 328 | } |
329 | 329 | ||
330 | add_sample_entry(offset, s->event); | 330 | add_sample_entry(offset, s->event); |
331 | 331 | ||
332 | return 1; | 332 | return 1; |
333 | } | 333 | } |
334 | 334 | ||
335 | 335 | ||
336 | /* Add a sample to the global event buffer. If possible the | 336 | /* Add a sample to the global event buffer. If possible the |
337 | * sample is converted into a persistent dentry/offset pair | 337 | * sample is converted into a persistent dentry/offset pair |
338 | * for later lookup from userspace. | 338 | * for later lookup from userspace. |
339 | */ | 339 | */ |
340 | static int | 340 | static int |
341 | add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) | 341 | add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) |
342 | { | 342 | { |
343 | if (in_kernel) { | 343 | if (in_kernel) { |
344 | add_sample_entry(s->eip, s->event); | 344 | add_sample_entry(s->eip, s->event); |
345 | return 1; | 345 | return 1; |
346 | } else if (mm) { | 346 | } else if (mm) { |
347 | return add_us_sample(mm, s); | 347 | return add_us_sample(mm, s); |
348 | } else { | 348 | } else { |
349 | atomic_inc(&oprofile_stats.sample_lost_no_mm); | 349 | atomic_inc(&oprofile_stats.sample_lost_no_mm); |
350 | } | 350 | } |
351 | return 0; | 351 | return 0; |
352 | } | 352 | } |
353 | 353 | ||
354 | 354 | ||
355 | static void release_mm(struct mm_struct * mm) | 355 | static void release_mm(struct mm_struct * mm) |
356 | { | 356 | { |
357 | if (!mm) | 357 | if (!mm) |
358 | return; | 358 | return; |
359 | up_read(&mm->mmap_sem); | 359 | up_read(&mm->mmap_sem); |
360 | mmput(mm); | 360 | mmput(mm); |
361 | } | 361 | } |
362 | 362 | ||
363 | 363 | ||
364 | static struct mm_struct * take_tasks_mm(struct task_struct * task) | 364 | static struct mm_struct * take_tasks_mm(struct task_struct * task) |
365 | { | 365 | { |
366 | struct mm_struct * mm = get_task_mm(task); | 366 | struct mm_struct * mm = get_task_mm(task); |
367 | if (mm) | 367 | if (mm) |
368 | down_read(&mm->mmap_sem); | 368 | down_read(&mm->mmap_sem); |
369 | return mm; | 369 | return mm; |
370 | } | 370 | } |
371 | 371 | ||
372 | 372 | ||
373 | static inline int is_code(unsigned long val) | 373 | static inline int is_code(unsigned long val) |
374 | { | 374 | { |
375 | return val == ESCAPE_CODE; | 375 | return val == ESCAPE_CODE; |
376 | } | 376 | } |
377 | 377 | ||
378 | 378 | ||
379 | /* "acquire" as many cpu buffer slots as we can */ | 379 | /* "acquire" as many cpu buffer slots as we can */ |
380 | static unsigned long get_slots(struct oprofile_cpu_buffer * b) | 380 | static unsigned long get_slots(struct oprofile_cpu_buffer * b) |
381 | { | 381 | { |
382 | unsigned long head = b->head_pos; | 382 | unsigned long head = b->head_pos; |
383 | unsigned long tail = b->tail_pos; | 383 | unsigned long tail = b->tail_pos; |
384 | 384 | ||
385 | /* | 385 | /* |
386 | * Subtle. This resets the persistent last_task | 386 | * Subtle. This resets the persistent last_task |
387 | * and in_kernel values used for switching notes. | 387 | * and in_kernel values used for switching notes. |
388 | * BUT, there is a small window between reading | 388 | * BUT, there is a small window between reading |
389 | * head_pos, and this call, that means samples | 389 | * head_pos, and this call, that means samples |
390 | * can appear at the new head position, but not | 390 | * can appear at the new head position, but not |
391 | * be prefixed with the notes for switching | 391 | * be prefixed with the notes for switching |
392 | * kernel mode or a task switch. This small hole | 392 | * kernel mode or a task switch. This small hole |
393 | * can lead to mis-attribution or samples where | 393 | * can lead to mis-attribution or samples where |
394 | * we don't know if it's in the kernel or not, | 394 | * we don't know if it's in the kernel or not, |
395 | * at the start of an event buffer. | 395 | * at the start of an event buffer. |
396 | */ | 396 | */ |
397 | cpu_buffer_reset(b); | 397 | cpu_buffer_reset(b); |
398 | 398 | ||
399 | if (head >= tail) | 399 | if (head >= tail) |
400 | return head - tail; | 400 | return head - tail; |
401 | 401 | ||
402 | return head + (b->buffer_size - tail); | 402 | return head + (b->buffer_size - tail); |
403 | } | 403 | } |
404 | 404 | ||
405 | 405 | ||
406 | static void increment_tail(struct oprofile_cpu_buffer * b) | 406 | static void increment_tail(struct oprofile_cpu_buffer * b) |
407 | { | 407 | { |
408 | unsigned long new_tail = b->tail_pos + 1; | 408 | unsigned long new_tail = b->tail_pos + 1; |
409 | 409 | ||
410 | rmb(); | 410 | rmb(); |
411 | 411 | ||
412 | if (new_tail < b->buffer_size) | 412 | if (new_tail < b->buffer_size) |
413 | b->tail_pos = new_tail; | 413 | b->tail_pos = new_tail; |
414 | else | 414 | else |
415 | b->tail_pos = 0; | 415 | b->tail_pos = 0; |
416 | } | 416 | } |
417 | 417 | ||
418 | 418 | ||
419 | /* Move tasks along towards death. Any tasks on dead_tasks | 419 | /* Move tasks along towards death. Any tasks on dead_tasks |
420 | * will definitely have no remaining references in any | 420 | * will definitely have no remaining references in any |
421 | * CPU buffers at this point, because we use two lists, | 421 | * CPU buffers at this point, because we use two lists, |
422 | * and to have reached the list, it must have gone through | 422 | * and to have reached the list, it must have gone through |
423 | * one full sync already. | 423 | * one full sync already. |
424 | */ | 424 | */ |
425 | static void process_task_mortuary(void) | 425 | static void process_task_mortuary(void) |
426 | { | 426 | { |
427 | struct list_head * pos; | 427 | struct list_head * pos; |
428 | struct list_head * pos2; | 428 | struct list_head * pos2; |
429 | struct task_struct * task; | 429 | struct task_struct * task; |
430 | 430 | ||
431 | spin_lock(&task_mortuary); | 431 | spin_lock(&task_mortuary); |
432 | 432 | ||
433 | list_for_each_safe(pos, pos2, &dead_tasks) { | 433 | list_for_each_safe(pos, pos2, &dead_tasks) { |
434 | task = list_entry(pos, struct task_struct, tasks); | 434 | task = list_entry(pos, struct task_struct, tasks); |
435 | list_del(&task->tasks); | 435 | list_del(&task->tasks); |
436 | free_task(task); | 436 | free_task(task); |
437 | } | 437 | } |
438 | 438 | ||
439 | list_for_each_safe(pos, pos2, &dying_tasks) { | 439 | list_for_each_safe(pos, pos2, &dying_tasks) { |
440 | task = list_entry(pos, struct task_struct, tasks); | 440 | task = list_entry(pos, struct task_struct, tasks); |
441 | list_del(&task->tasks); | 441 | list_del(&task->tasks); |
442 | list_add_tail(&task->tasks, &dead_tasks); | 442 | list_add_tail(&task->tasks, &dead_tasks); |
443 | } | 443 | } |
444 | 444 | ||
445 | spin_unlock(&task_mortuary); | 445 | spin_unlock(&task_mortuary); |
446 | } | 446 | } |
447 | 447 | ||
448 | 448 | ||
449 | static void mark_done(int cpu) | 449 | static void mark_done(int cpu) |
450 | { | 450 | { |
451 | int i; | 451 | int i; |
452 | 452 | ||
453 | cpu_set(cpu, marked_cpus); | 453 | cpu_set(cpu, marked_cpus); |
454 | 454 | ||
455 | for_each_online_cpu(i) { | 455 | for_each_online_cpu(i) { |
456 | if (!cpu_isset(i, marked_cpus)) | 456 | if (!cpu_isset(i, marked_cpus)) |
457 | return; | 457 | return; |
458 | } | 458 | } |
459 | 459 | ||
460 | /* All CPUs have been processed at least once, | 460 | /* All CPUs have been processed at least once, |
461 | * we can process the mortuary once | 461 | * we can process the mortuary once |
462 | */ | 462 | */ |
463 | process_task_mortuary(); | 463 | process_task_mortuary(); |
464 | 464 | ||
465 | cpus_clear(marked_cpus); | 465 | cpus_clear(marked_cpus); |
466 | } | 466 | } |
467 | 467 | ||
468 | 468 | ||
469 | /* FIXME: this is not sufficient if we implement syscall barrier backtrace | 469 | /* FIXME: this is not sufficient if we implement syscall barrier backtrace |
470 | * traversal, the code switch to sb_sample_start at first kernel enter/exit | 470 | * traversal, the code switch to sb_sample_start at first kernel enter/exit |
471 | * switch so we need a fifth state and some special handling in sync_buffer() | 471 | * switch so we need a fifth state and some special handling in sync_buffer() |
472 | */ | 472 | */ |
473 | typedef enum { | 473 | typedef enum { |
474 | sb_bt_ignore = -2, | 474 | sb_bt_ignore = -2, |
475 | sb_buffer_start, | 475 | sb_buffer_start, |
476 | sb_bt_start, | 476 | sb_bt_start, |
477 | sb_sample_start, | 477 | sb_sample_start, |
478 | } sync_buffer_state; | 478 | } sync_buffer_state; |
479 | 479 | ||
480 | /* Sync one of the CPU's buffers into the global event buffer. | 480 | /* Sync one of the CPU's buffers into the global event buffer. |
481 | * Here we need to go through each batch of samples punctuated | 481 | * Here we need to go through each batch of samples punctuated |
482 | * by context switch notes, taking the task's mmap_sem and doing | 482 | * by context switch notes, taking the task's mmap_sem and doing |
483 | * lookup in task->mm->mmap to convert EIP into dcookie/offset | 483 | * lookup in task->mm->mmap to convert EIP into dcookie/offset |
484 | * value. | 484 | * value. |
485 | */ | 485 | */ |
486 | void sync_buffer(int cpu) | 486 | void sync_buffer(int cpu) |
487 | { | 487 | { |
488 | struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; | 488 | struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; |
489 | struct mm_struct *mm = NULL; | 489 | struct mm_struct *mm = NULL; |
490 | struct task_struct * new; | 490 | struct task_struct * new; |
491 | unsigned long cookie = 0; | 491 | unsigned long cookie = 0; |
492 | int in_kernel = 1; | 492 | int in_kernel = 1; |
493 | unsigned int i; | 493 | unsigned int i; |
494 | sync_buffer_state state = sb_buffer_start; | 494 | sync_buffer_state state = sb_buffer_start; |
495 | unsigned long available; | 495 | unsigned long available; |
496 | 496 | ||
497 | down(&buffer_sem); | 497 | down(&buffer_sem); |
498 | 498 | ||
499 | add_cpu_switch(cpu); | 499 | add_cpu_switch(cpu); |
500 | 500 | ||
501 | /* Remember, only we can modify tail_pos */ | 501 | /* Remember, only we can modify tail_pos */ |
502 | 502 | ||
503 | available = get_slots(cpu_buf); | 503 | available = get_slots(cpu_buf); |
504 | 504 | ||
505 | for (i = 0; i < available; ++i) { | 505 | for (i = 0; i < available; ++i) { |
506 | struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; | 506 | struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; |
507 | 507 | ||
508 | if (is_code(s->eip)) { | 508 | if (is_code(s->eip)) { |
509 | if (s->event <= CPU_IS_KERNEL) { | 509 | if (s->event <= CPU_IS_KERNEL) { |
510 | /* kernel/userspace switch */ | 510 | /* kernel/userspace switch */ |
511 | in_kernel = s->event; | 511 | in_kernel = s->event; |
512 | if (state == sb_buffer_start) | 512 | if (state == sb_buffer_start) |
513 | state = sb_sample_start; | 513 | state = sb_sample_start; |
514 | add_kernel_ctx_switch(s->event); | 514 | add_kernel_ctx_switch(s->event); |
515 | } else if (s->event == CPU_TRACE_BEGIN) { | 515 | } else if (s->event == CPU_TRACE_BEGIN) { |
516 | state = sb_bt_start; | 516 | state = sb_bt_start; |
517 | add_trace_begin(); | 517 | add_trace_begin(); |
518 | } else { | 518 | } else { |
519 | struct mm_struct * oldmm = mm; | 519 | struct mm_struct * oldmm = mm; |
520 | 520 | ||
521 | /* userspace context switch */ | 521 | /* userspace context switch */ |
522 | new = (struct task_struct *)s->event; | 522 | new = (struct task_struct *)s->event; |
523 | 523 | ||
524 | release_mm(oldmm); | 524 | release_mm(oldmm); |
525 | mm = take_tasks_mm(new); | 525 | mm = take_tasks_mm(new); |
526 | if (mm != oldmm) | 526 | if (mm != oldmm) |
527 | cookie = get_exec_dcookie(mm); | 527 | cookie = get_exec_dcookie(mm); |
528 | add_user_ctx_switch(new, cookie); | 528 | add_user_ctx_switch(new, cookie); |
529 | } | 529 | } |
530 | } else { | 530 | } else { |
531 | if (state >= sb_bt_start && | 531 | if (state >= sb_bt_start && |
532 | !add_sample(mm, s, in_kernel)) { | 532 | !add_sample(mm, s, in_kernel)) { |
533 | if (state == sb_bt_start) { | 533 | if (state == sb_bt_start) { |
534 | state = sb_bt_ignore; | 534 | state = sb_bt_ignore; |
535 | atomic_inc(&oprofile_stats.bt_lost_no_mapping); | 535 | atomic_inc(&oprofile_stats.bt_lost_no_mapping); |
536 | } | 536 | } |
537 | } | 537 | } |
538 | } | 538 | } |
539 | 539 | ||
540 | increment_tail(cpu_buf); | 540 | increment_tail(cpu_buf); |
541 | } | 541 | } |
542 | release_mm(mm); | 542 | release_mm(mm); |
543 | 543 | ||
544 | mark_done(cpu); | 544 | mark_done(cpu); |
545 | 545 | ||
546 | up(&buffer_sem); | 546 | up(&buffer_sem); |
547 | } | 547 | } |
548 | 548 |
fs/xfs/linux-2.6/xfs_linux.h
1 | /* | 1 | /* |
2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. | 2 | * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify it | 4 | * This program is free software; you can redistribute it and/or modify it |
5 | * under the terms of version 2 of the GNU General Public License as | 5 | * under the terms of version 2 of the GNU General Public License as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * This program is distributed in the hope that it would be useful, but | 8 | * This program is distributed in the hope that it would be useful, but |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 9 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
11 | * | 11 | * |
12 | * Further, this software is distributed without any warranty that it is | 12 | * Further, this software is distributed without any warranty that it is |
13 | * free of the rightful claim of any third person regarding infringement | 13 | * free of the rightful claim of any third person regarding infringement |
14 | * or the like. Any license provided herein, whether implied or | 14 | * or the like. Any license provided herein, whether implied or |
15 | * otherwise, applies only to this software file. Patent licenses, if | 15 | * otherwise, applies only to this software file. Patent licenses, if |
16 | * any, provided herein do not apply to combinations of this program with | 16 | * any, provided herein do not apply to combinations of this program with |
17 | * other software, or any other product whatsoever. | 17 | * other software, or any other product whatsoever. |
18 | * | 18 | * |
19 | * You should have received a copy of the GNU General Public License along | 19 | * You should have received a copy of the GNU General Public License along |
20 | * with this program; if not, write the Free Software Foundation, Inc., 59 | 20 | * with this program; if not, write the Free Software Foundation, Inc., 59 |
21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | 21 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. |
22 | * | 22 | * |
23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, | 23 | * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, |
24 | * Mountain View, CA 94043, or: | 24 | * Mountain View, CA 94043, or: |
25 | * | 25 | * |
26 | * http://www.sgi.com | 26 | * http://www.sgi.com |
27 | * | 27 | * |
28 | * For further information regarding this notice, see: | 28 | * For further information regarding this notice, see: |
29 | * | 29 | * |
30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ | 30 | * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ |
31 | */ | 31 | */ |
32 | #ifndef __XFS_LINUX__ | 32 | #ifndef __XFS_LINUX__ |
33 | #define __XFS_LINUX__ | 33 | #define __XFS_LINUX__ |
34 | 34 | ||
35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
36 | #include <linux/config.h> | 36 | #include <linux/config.h> |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Some types are conditional depending on the target system. | 39 | * Some types are conditional depending on the target system. |
40 | * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. | 40 | * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. |
41 | * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well | 41 | * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well |
42 | * as requiring XFS_BIG_BLKNOS to be set. | 42 | * as requiring XFS_BIG_BLKNOS to be set. |
43 | */ | 43 | */ |
44 | #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) | 44 | #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) |
45 | # define XFS_BIG_BLKNOS 1 | 45 | # define XFS_BIG_BLKNOS 1 |
46 | # if BITS_PER_LONG == 64 | 46 | # if BITS_PER_LONG == 64 |
47 | # define XFS_BIG_INUMS 1 | 47 | # define XFS_BIG_INUMS 1 |
48 | # else | 48 | # else |
49 | # define XFS_BIG_INUMS 0 | 49 | # define XFS_BIG_INUMS 0 |
50 | # endif | 50 | # endif |
51 | #else | 51 | #else |
52 | # define XFS_BIG_BLKNOS 0 | 52 | # define XFS_BIG_BLKNOS 0 |
53 | # define XFS_BIG_INUMS 0 | 53 | # define XFS_BIG_INUMS 0 |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | #include <xfs_types.h> | 56 | #include <xfs_types.h> |
57 | #include <xfs_arch.h> | 57 | #include <xfs_arch.h> |
58 | 58 | ||
59 | #include <kmem.h> | 59 | #include <kmem.h> |
60 | #include <mrlock.h> | 60 | #include <mrlock.h> |
61 | #include <spin.h> | 61 | #include <spin.h> |
62 | #include <sv.h> | 62 | #include <sv.h> |
63 | #include <mutex.h> | 63 | #include <mutex.h> |
64 | #include <sema.h> | 64 | #include <sema.h> |
65 | #include <time.h> | 65 | #include <time.h> |
66 | 66 | ||
67 | #include <support/qsort.h> | 67 | #include <support/qsort.h> |
68 | #include <support/ktrace.h> | 68 | #include <support/ktrace.h> |
69 | #include <support/debug.h> | 69 | #include <support/debug.h> |
70 | #include <support/move.h> | 70 | #include <support/move.h> |
71 | #include <support/uuid.h> | 71 | #include <support/uuid.h> |
72 | 72 | ||
73 | #include <linux/mm.h> | 73 | #include <linux/mm.h> |
74 | #include <linux/kernel.h> | 74 | #include <linux/kernel.h> |
75 | #include <linux/blkdev.h> | 75 | #include <linux/blkdev.h> |
76 | #include <linux/slab.h> | 76 | #include <linux/slab.h> |
77 | #include <linux/module.h> | 77 | #include <linux/module.h> |
78 | #include <linux/file.h> | 78 | #include <linux/file.h> |
79 | #include <linux/swap.h> | 79 | #include <linux/swap.h> |
80 | #include <linux/errno.h> | 80 | #include <linux/errno.h> |
81 | #include <linux/sched.h> | 81 | #include <linux/sched.h> |
82 | #include <linux/bitops.h> | 82 | #include <linux/bitops.h> |
83 | #include <linux/major.h> | 83 | #include <linux/major.h> |
84 | #include <linux/pagemap.h> | 84 | #include <linux/pagemap.h> |
85 | #include <linux/vfs.h> | 85 | #include <linux/vfs.h> |
86 | #include <linux/seq_file.h> | 86 | #include <linux/seq_file.h> |
87 | #include <linux/init.h> | 87 | #include <linux/init.h> |
88 | #include <linux/list.h> | 88 | #include <linux/list.h> |
89 | #include <linux/proc_fs.h> | 89 | #include <linux/proc_fs.h> |
90 | #include <linux/version.h> | 90 | #include <linux/version.h> |
91 | #include <linux/sort.h> | 91 | #include <linux/sort.h> |
92 | 92 | ||
93 | #include <asm/page.h> | 93 | #include <asm/page.h> |
94 | #include <asm/div64.h> | 94 | #include <asm/div64.h> |
95 | #include <asm/param.h> | 95 | #include <asm/param.h> |
96 | #include <asm/uaccess.h> | 96 | #include <asm/uaccess.h> |
97 | #include <asm/byteorder.h> | 97 | #include <asm/byteorder.h> |
98 | #include <asm/unaligned.h> | 98 | #include <asm/unaligned.h> |
99 | 99 | ||
100 | #include <xfs_behavior.h> | 100 | #include <xfs_behavior.h> |
101 | #include <xfs_vfs.h> | 101 | #include <xfs_vfs.h> |
102 | #include <xfs_cred.h> | 102 | #include <xfs_cred.h> |
103 | #include <xfs_vnode.h> | 103 | #include <xfs_vnode.h> |
104 | #include <xfs_stats.h> | 104 | #include <xfs_stats.h> |
105 | #include <xfs_sysctl.h> | 105 | #include <xfs_sysctl.h> |
106 | #include <xfs_iops.h> | 106 | #include <xfs_iops.h> |
107 | #include <xfs_super.h> | 107 | #include <xfs_super.h> |
108 | #include <xfs_globals.h> | 108 | #include <xfs_globals.h> |
109 | #include <xfs_fs_subr.h> | 109 | #include <xfs_fs_subr.h> |
110 | #include <xfs_lrw.h> | 110 | #include <xfs_lrw.h> |
111 | #include <xfs_buf.h> | 111 | #include <xfs_buf.h> |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * Feature macros (disable/enable) | 114 | * Feature macros (disable/enable) |
115 | */ | 115 | */ |
116 | #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ | 116 | #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ |
117 | #define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ | 117 | #define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * State flag for unwritten extent buffers. | 120 | * State flag for unwritten extent buffers. |
121 | * | 121 | * |
122 | * We need to be able to distinguish between these and delayed | 122 | * We need to be able to distinguish between these and delayed |
123 | * allocate buffers within XFS. The generic IO path code does | 123 | * allocate buffers within XFS. The generic IO path code does |
124 | * not need to distinguish - we use the BH_Delay flag for both | 124 | * not need to distinguish - we use the BH_Delay flag for both |
125 | * delalloc and these ondisk-uninitialised buffers. | 125 | * delalloc and these ondisk-uninitialised buffers. |
126 | */ | 126 | */ |
127 | BUFFER_FNS(PrivateStart, unwritten); | 127 | BUFFER_FNS(PrivateStart, unwritten); |
128 | static inline void set_buffer_unwritten_io(struct buffer_head *bh) | 128 | static inline void set_buffer_unwritten_io(struct buffer_head *bh) |
129 | { | 129 | { |
130 | bh->b_end_io = linvfs_unwritten_done; | 130 | bh->b_end_io = linvfs_unwritten_done; |
131 | } | 131 | } |
132 | 132 | ||
133 | #define restricted_chown xfs_params.restrict_chown.val | 133 | #define restricted_chown xfs_params.restrict_chown.val |
134 | #define irix_sgid_inherit xfs_params.sgid_inherit.val | 134 | #define irix_sgid_inherit xfs_params.sgid_inherit.val |
135 | #define irix_symlink_mode xfs_params.symlink_mode.val | 135 | #define irix_symlink_mode xfs_params.symlink_mode.val |
136 | #define xfs_panic_mask xfs_params.panic_mask.val | 136 | #define xfs_panic_mask xfs_params.panic_mask.val |
137 | #define xfs_error_level xfs_params.error_level.val | 137 | #define xfs_error_level xfs_params.error_level.val |
138 | #define xfs_syncd_centisecs xfs_params.syncd_timer.val | 138 | #define xfs_syncd_centisecs xfs_params.syncd_timer.val |
139 | #define xfs_stats_clear xfs_params.stats_clear.val | 139 | #define xfs_stats_clear xfs_params.stats_clear.val |
140 | #define xfs_inherit_sync xfs_params.inherit_sync.val | 140 | #define xfs_inherit_sync xfs_params.inherit_sync.val |
141 | #define xfs_inherit_nodump xfs_params.inherit_nodump.val | 141 | #define xfs_inherit_nodump xfs_params.inherit_nodump.val |
142 | #define xfs_inherit_noatime xfs_params.inherit_noatim.val | 142 | #define xfs_inherit_noatime xfs_params.inherit_noatim.val |
143 | #define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val | 143 | #define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val |
144 | #define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val | 144 | #define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val |
145 | #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val | 145 | #define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val |
146 | #define xfs_rotorstep xfs_params.rotorstep.val | 146 | #define xfs_rotorstep xfs_params.rotorstep.val |
147 | 147 | ||
148 | #ifndef __smp_processor_id | 148 | #ifndef raw_smp_processor_id |
149 | #define __smp_processor_id() smp_processor_id() | 149 | #define raw_smp_processor_id() smp_processor_id() |
150 | #endif | 150 | #endif |
151 | #define current_cpu() __smp_processor_id() | 151 | #define current_cpu() raw_smp_processor_id() |
152 | #define current_pid() (current->pid) | 152 | #define current_pid() (current->pid) |
153 | #define current_fsuid(cred) (current->fsuid) | 153 | #define current_fsuid(cred) (current->fsuid) |
154 | #define current_fsgid(cred) (current->fsgid) | 154 | #define current_fsgid(cred) (current->fsgid) |
155 | 155 | ||
156 | #define NBPP PAGE_SIZE | 156 | #define NBPP PAGE_SIZE |
157 | #define DPPSHFT (PAGE_SHIFT - 9) | 157 | #define DPPSHFT (PAGE_SHIFT - 9) |
158 | #define NDPP (1 << (PAGE_SHIFT - 9)) | 158 | #define NDPP (1 << (PAGE_SHIFT - 9)) |
159 | #define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT) | 159 | #define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT) |
160 | #define dtopt(DD) ((DD) >> DPPSHFT) | 160 | #define dtopt(DD) ((DD) >> DPPSHFT) |
161 | #define dpoff(DD) ((DD) & (NDPP-1)) | 161 | #define dpoff(DD) ((DD) & (NDPP-1)) |
162 | 162 | ||
163 | #define NBBY 8 /* number of bits per byte */ | 163 | #define NBBY 8 /* number of bits per byte */ |
164 | #define NBPC PAGE_SIZE /* Number of bytes per click */ | 164 | #define NBPC PAGE_SIZE /* Number of bytes per click */ |
165 | #define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ | 165 | #define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */ |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Size of block device i/o is parameterized here. | 168 | * Size of block device i/o is parameterized here. |
169 | * Currently the system supports page-sized i/o. | 169 | * Currently the system supports page-sized i/o. |
170 | */ | 170 | */ |
171 | #define BLKDEV_IOSHIFT BPCSHIFT | 171 | #define BLKDEV_IOSHIFT BPCSHIFT |
172 | #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) | 172 | #define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT) |
173 | /* number of BB's per block device block */ | 173 | /* number of BB's per block device block */ |
174 | #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) | 174 | #define BLKDEV_BB BTOBB(BLKDEV_IOSIZE) |
175 | 175 | ||
176 | /* bytes to clicks */ | 176 | /* bytes to clicks */ |
177 | #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) | 177 | #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) |
178 | #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) | 178 | #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) |
179 | #define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) | 179 | #define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) |
180 | #define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) | 180 | #define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT) |
181 | #define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT) | 181 | #define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT) |
182 | #define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT) | 182 | #define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT) |
183 | 183 | ||
184 | /* off_t bytes to clicks */ | 184 | /* off_t bytes to clicks */ |
185 | #define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) | 185 | #define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT) |
186 | #define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) | 186 | #define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT) |
187 | 187 | ||
188 | /* clicks to off_t bytes */ | 188 | /* clicks to off_t bytes */ |
189 | #define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT) | 189 | #define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT) |
190 | 190 | ||
191 | /* clicks to bytes */ | 191 | /* clicks to bytes */ |
192 | #define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) | 192 | #define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT) |
193 | #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) | 193 | #define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT) |
194 | #define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) | 194 | #define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT) |
195 | #define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT) | 195 | #define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT) |
196 | 196 | ||
197 | /* bytes to clicks */ | 197 | /* bytes to clicks */ |
198 | #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) | 198 | #define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT) |
199 | 199 | ||
200 | #ifndef CELL_CAPABLE | 200 | #ifndef CELL_CAPABLE |
201 | #define FSC_NOTIFY_NAME_CHANGED(vp) | 201 | #define FSC_NOTIFY_NAME_CHANGED(vp) |
202 | #endif | 202 | #endif |
203 | 203 | ||
204 | #ifndef ENOATTR | 204 | #ifndef ENOATTR |
205 | #define ENOATTR ENODATA /* Attribute not found */ | 205 | #define ENOATTR ENODATA /* Attribute not found */ |
206 | #endif | 206 | #endif |
207 | 207 | ||
208 | /* Note: EWRONGFS never visible outside the kernel */ | 208 | /* Note: EWRONGFS never visible outside the kernel */ |
209 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ | 209 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ |
210 | 210 | ||
211 | /* | 211 | /* |
212 | * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't | 212 | * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't |
213 | * return codes out of its known range in errno. | 213 | * return codes out of its known range in errno. |
214 | * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't | 214 | * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't |
215 | * conflict with any code we use already or any code a driver may use) | 215 | * conflict with any code we use already or any code a driver may use) |
216 | * XXX Some options (currently we do #2): | 216 | * XXX Some options (currently we do #2): |
217 | * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated] | 217 | * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated] |
218 | * 2/ 990 ["Unknown error 990"] | 218 | * 2/ 990 ["Unknown error 990"] |
219 | * 3/ EUCLEAN ["Structure needs cleaning"] | 219 | * 3/ EUCLEAN ["Structure needs cleaning"] |
220 | * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace] | 220 | * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace] |
221 | */ | 221 | */ |
222 | #define EFSCORRUPTED 990 /* Filesystem is corrupted */ | 222 | #define EFSCORRUPTED 990 /* Filesystem is corrupted */ |
223 | 223 | ||
224 | #define SYNCHRONIZE() barrier() | 224 | #define SYNCHRONIZE() barrier() |
225 | #define __return_address __builtin_return_address(0) | 225 | #define __return_address __builtin_return_address(0) |
226 | 226 | ||
227 | /* | 227 | /* |
228 | * IRIX (BSD) quotactl makes use of separate commands for user/group, | 228 | * IRIX (BSD) quotactl makes use of separate commands for user/group, |
229 | * whereas on Linux the syscall encodes this information into the cmd | 229 | * whereas on Linux the syscall encodes this information into the cmd |
230 | * field (see the QCMD macro in quota.h). These macros help keep the | 230 | * field (see the QCMD macro in quota.h). These macros help keep the |
231 | * code portable - they are not visible from the syscall interface. | 231 | * code portable - they are not visible from the syscall interface. |
232 | */ | 232 | */ |
233 | #define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ | 233 | #define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */ |
234 | #define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ | 234 | #define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */ |
235 | 235 | ||
236 | /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ | 236 | /* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */ |
237 | /* we may well need to fine-tune this if it ever becomes an issue. */ | 237 | /* we may well need to fine-tune this if it ever becomes an issue. */ |
238 | #define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */ | 238 | #define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */ |
239 | #define ndquot DQUOT_MAX_HEURISTIC | 239 | #define ndquot DQUOT_MAX_HEURISTIC |
240 | 240 | ||
241 | /* IRIX uses the current size of the name cache to guess a good value */ | 241 | /* IRIX uses the current size of the name cache to guess a good value */ |
242 | /* - this isn't the same but is a good enough starting point for now. */ | 242 | /* - this isn't the same but is a good enough starting point for now. */ |
243 | #define DQUOT_HASH_HEURISTIC files_stat.nr_files | 243 | #define DQUOT_HASH_HEURISTIC files_stat.nr_files |
244 | 244 | ||
245 | /* IRIX inodes maintain the project ID also, zero this field on Linux */ | 245 | /* IRIX inodes maintain the project ID also, zero this field on Linux */ |
246 | #define DEFAULT_PROJID 0 | 246 | #define DEFAULT_PROJID 0 |
247 | #define dfltprid DEFAULT_PROJID | 247 | #define dfltprid DEFAULT_PROJID |
248 | 248 | ||
249 | #define MAXPATHLEN 1024 | 249 | #define MAXPATHLEN 1024 |
250 | 250 | ||
251 | #define MIN(a,b) (min(a,b)) | 251 | #define MIN(a,b) (min(a,b)) |
252 | #define MAX(a,b) (max(a,b)) | 252 | #define MAX(a,b) (max(a,b)) |
253 | #define howmany(x, y) (((x)+((y)-1))/(y)) | 253 | #define howmany(x, y) (((x)+((y)-1))/(y)) |
254 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | 254 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) |
255 | 255 | ||
256 | #define xfs_stack_trace() dump_stack() | 256 | #define xfs_stack_trace() dump_stack() |
257 | 257 | ||
258 | #define xfs_itruncate_data(ip, off) \ | 258 | #define xfs_itruncate_data(ip, off) \ |
259 | (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) | 259 | (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off))) |
260 | 260 | ||
261 | 261 | ||
262 | /* Move the kernel do_div definition off to one side */ | 262 | /* Move the kernel do_div definition off to one side */ |
263 | 263 | ||
264 | #if defined __i386__ | 264 | #if defined __i386__ |
265 | /* For ia32 we need to pull some tricks to get past various versions | 265 | /* For ia32 we need to pull some tricks to get past various versions |
266 | * of the compiler which do not like us using do_div in the middle | 266 | * of the compiler which do not like us using do_div in the middle |
267 | * of large functions. | 267 | * of large functions. |
268 | */ | 268 | */ |
269 | static inline __u32 xfs_do_div(void *a, __u32 b, int n) | 269 | static inline __u32 xfs_do_div(void *a, __u32 b, int n) |
270 | { | 270 | { |
271 | __u32 mod; | 271 | __u32 mod; |
272 | 272 | ||
273 | switch (n) { | 273 | switch (n) { |
274 | case 4: | 274 | case 4: |
275 | mod = *(__u32 *)a % b; | 275 | mod = *(__u32 *)a % b; |
276 | *(__u32 *)a = *(__u32 *)a / b; | 276 | *(__u32 *)a = *(__u32 *)a / b; |
277 | return mod; | 277 | return mod; |
278 | case 8: | 278 | case 8: |
279 | { | 279 | { |
280 | unsigned long __upper, __low, __high, __mod; | 280 | unsigned long __upper, __low, __high, __mod; |
281 | __u64 c = *(__u64 *)a; | 281 | __u64 c = *(__u64 *)a; |
282 | __upper = __high = c >> 32; | 282 | __upper = __high = c >> 32; |
283 | __low = c; | 283 | __low = c; |
284 | if (__high) { | 284 | if (__high) { |
285 | __upper = __high % (b); | 285 | __upper = __high % (b); |
286 | __high = __high / (b); | 286 | __high = __high / (b); |
287 | } | 287 | } |
288 | asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); | 288 | asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); |
289 | asm("":"=A" (c):"a" (__low),"d" (__high)); | 289 | asm("":"=A" (c):"a" (__low),"d" (__high)); |
290 | *(__u64 *)a = c; | 290 | *(__u64 *)a = c; |
291 | return __mod; | 291 | return __mod; |
292 | } | 292 | } |
293 | } | 293 | } |
294 | 294 | ||
295 | /* NOTREACHED */ | 295 | /* NOTREACHED */ |
296 | return 0; | 296 | return 0; |
297 | } | 297 | } |
298 | 298 | ||
299 | /* Side effect free 64 bit mod operation */ | 299 | /* Side effect free 64 bit mod operation */ |
300 | static inline __u32 xfs_do_mod(void *a, __u32 b, int n) | 300 | static inline __u32 xfs_do_mod(void *a, __u32 b, int n) |
301 | { | 301 | { |
302 | switch (n) { | 302 | switch (n) { |
303 | case 4: | 303 | case 4: |
304 | return *(__u32 *)a % b; | 304 | return *(__u32 *)a % b; |
305 | case 8: | 305 | case 8: |
306 | { | 306 | { |
307 | unsigned long __upper, __low, __high, __mod; | 307 | unsigned long __upper, __low, __high, __mod; |
308 | __u64 c = *(__u64 *)a; | 308 | __u64 c = *(__u64 *)a; |
309 | __upper = __high = c >> 32; | 309 | __upper = __high = c >> 32; |
310 | __low = c; | 310 | __low = c; |
311 | if (__high) { | 311 | if (__high) { |
312 | __upper = __high % (b); | 312 | __upper = __high % (b); |
313 | __high = __high / (b); | 313 | __high = __high / (b); |
314 | } | 314 | } |
315 | asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); | 315 | asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); |
316 | asm("":"=A" (c):"a" (__low),"d" (__high)); | 316 | asm("":"=A" (c):"a" (__low),"d" (__high)); |
317 | return __mod; | 317 | return __mod; |
318 | } | 318 | } |
319 | } | 319 | } |
320 | 320 | ||
321 | /* NOTREACHED */ | 321 | /* NOTREACHED */ |
322 | return 0; | 322 | return 0; |
323 | } | 323 | } |
324 | #else | 324 | #else |
325 | static inline __u32 xfs_do_div(void *a, __u32 b, int n) | 325 | static inline __u32 xfs_do_div(void *a, __u32 b, int n) |
326 | { | 326 | { |
327 | __u32 mod; | 327 | __u32 mod; |
328 | 328 | ||
329 | switch (n) { | 329 | switch (n) { |
330 | case 4: | 330 | case 4: |
331 | mod = *(__u32 *)a % b; | 331 | mod = *(__u32 *)a % b; |
332 | *(__u32 *)a = *(__u32 *)a / b; | 332 | *(__u32 *)a = *(__u32 *)a / b; |
333 | return mod; | 333 | return mod; |
334 | case 8: | 334 | case 8: |
335 | mod = do_div(*(__u64 *)a, b); | 335 | mod = do_div(*(__u64 *)a, b); |
336 | return mod; | 336 | return mod; |
337 | } | 337 | } |
338 | 338 | ||
339 | /* NOTREACHED */ | 339 | /* NOTREACHED */ |
340 | return 0; | 340 | return 0; |
341 | } | 341 | } |
342 | 342 | ||
343 | /* Side effect free 64 bit mod operation */ | 343 | /* Side effect free 64 bit mod operation */ |
344 | static inline __u32 xfs_do_mod(void *a, __u32 b, int n) | 344 | static inline __u32 xfs_do_mod(void *a, __u32 b, int n) |
345 | { | 345 | { |
346 | switch (n) { | 346 | switch (n) { |
347 | case 4: | 347 | case 4: |
348 | return *(__u32 *)a % b; | 348 | return *(__u32 *)a % b; |
349 | case 8: | 349 | case 8: |
350 | { | 350 | { |
351 | __u64 c = *(__u64 *)a; | 351 | __u64 c = *(__u64 *)a; |
352 | return do_div(c, b); | 352 | return do_div(c, b); |
353 | } | 353 | } |
354 | } | 354 | } |
355 | 355 | ||
356 | /* NOTREACHED */ | 356 | /* NOTREACHED */ |
357 | return 0; | 357 | return 0; |
358 | } | 358 | } |
359 | #endif | 359 | #endif |
360 | 360 | ||
361 | #undef do_div | 361 | #undef do_div |
362 | #define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) | 362 | #define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) |
363 | #define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) | 363 | #define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) |
364 | 364 | ||
365 | static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) | 365 | static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) |
366 | { | 366 | { |
367 | x += y - 1; | 367 | x += y - 1; |
368 | do_div(x, y); | 368 | do_div(x, y); |
369 | return(x * y); | 369 | return(x * y); |
370 | } | 370 | } |
371 | 371 | ||
372 | #define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) | 372 | #define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL) |
373 | 373 | ||
374 | #endif /* __XFS_LINUX__ */ | 374 | #endif /* __XFS_LINUX__ */ |
375 | 375 |
include/asm-alpha/smp.h
1 | #ifndef __ASM_SMP_H | 1 | #ifndef __ASM_SMP_H |
2 | #define __ASM_SMP_H | 2 | #define __ASM_SMP_H |
3 | 3 | ||
4 | #include <linux/config.h> | 4 | #include <linux/config.h> |
5 | #include <linux/threads.h> | 5 | #include <linux/threads.h> |
6 | #include <linux/cpumask.h> | 6 | #include <linux/cpumask.h> |
7 | #include <linux/bitops.h> | 7 | #include <linux/bitops.h> |
8 | #include <asm/pal.h> | 8 | #include <asm/pal.h> |
9 | 9 | ||
10 | /* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */ | 10 | /* HACK: Cabrio WHAMI return value is bogus if more than 8 bits used.. :-( */ |
11 | 11 | ||
12 | static __inline__ unsigned char | 12 | static __inline__ unsigned char |
13 | __hard_smp_processor_id(void) | 13 | __hard_smp_processor_id(void) |
14 | { | 14 | { |
15 | register unsigned char __r0 __asm__("$0"); | 15 | register unsigned char __r0 __asm__("$0"); |
16 | __asm__ __volatile__( | 16 | __asm__ __volatile__( |
17 | "call_pal %1 #whami" | 17 | "call_pal %1 #whami" |
18 | : "=r"(__r0) | 18 | : "=r"(__r0) |
19 | :"i" (PAL_whami) | 19 | :"i" (PAL_whami) |
20 | : "$1", "$22", "$23", "$24", "$25"); | 20 | : "$1", "$22", "$23", "$24", "$25"); |
21 | return __r0; | 21 | return __r0; |
22 | } | 22 | } |
23 | 23 | ||
24 | #ifdef CONFIG_SMP | 24 | #ifdef CONFIG_SMP |
25 | 25 | ||
26 | #include <asm/irq.h> | 26 | #include <asm/irq.h> |
27 | 27 | ||
28 | struct cpuinfo_alpha { | 28 | struct cpuinfo_alpha { |
29 | unsigned long loops_per_jiffy; | 29 | unsigned long loops_per_jiffy; |
30 | unsigned long last_asn; | 30 | unsigned long last_asn; |
31 | int need_new_asn; | 31 | int need_new_asn; |
32 | int asn_lock; | 32 | int asn_lock; |
33 | unsigned long ipi_count; | 33 | unsigned long ipi_count; |
34 | unsigned long prof_multiplier; | 34 | unsigned long prof_multiplier; |
35 | unsigned long prof_counter; | 35 | unsigned long prof_counter; |
36 | unsigned char mcheck_expected; | 36 | unsigned char mcheck_expected; |
37 | unsigned char mcheck_taken; | 37 | unsigned char mcheck_taken; |
38 | unsigned char mcheck_extra; | 38 | unsigned char mcheck_extra; |
39 | } __attribute__((aligned(64))); | 39 | } __attribute__((aligned(64))); |
40 | 40 | ||
41 | extern struct cpuinfo_alpha cpu_data[NR_CPUS]; | 41 | extern struct cpuinfo_alpha cpu_data[NR_CPUS]; |
42 | 42 | ||
43 | #define PROC_CHANGE_PENALTY 20 | 43 | #define PROC_CHANGE_PENALTY 20 |
44 | 44 | ||
45 | #define hard_smp_processor_id() __hard_smp_processor_id() | 45 | #define hard_smp_processor_id() __hard_smp_processor_id() |
46 | #define smp_processor_id() (current_thread_info()->cpu) | 46 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
47 | 47 | ||
48 | extern cpumask_t cpu_present_mask; | 48 | extern cpumask_t cpu_present_mask; |
49 | extern cpumask_t cpu_online_map; | 49 | extern cpumask_t cpu_online_map; |
50 | extern int smp_num_cpus; | 50 | extern int smp_num_cpus; |
51 | #define cpu_possible_map cpu_present_mask | 51 | #define cpu_possible_map cpu_present_mask |
52 | 52 | ||
53 | int smp_call_function_on_cpu(void (*func) (void *info), void *info,int retry, int wait, cpumask_t cpu); | 53 | int smp_call_function_on_cpu(void (*func) (void *info), void *info,int retry, int wait, cpumask_t cpu); |
54 | 54 | ||
55 | #else /* CONFIG_SMP */ | 55 | #else /* CONFIG_SMP */ |
56 | 56 | ||
57 | #define smp_call_function_on_cpu(func,info,retry,wait,cpu) ({ 0; }) | 57 | #define smp_call_function_on_cpu(func,info,retry,wait,cpu) ({ 0; }) |
58 | 58 | ||
59 | #endif /* CONFIG_SMP */ | 59 | #endif /* CONFIG_SMP */ |
60 | 60 | ||
61 | #define NO_PROC_ID (-1) | 61 | #define NO_PROC_ID (-1) |
62 | 62 | ||
63 | #endif | 63 | #endif |
64 | 64 |
include/asm-arm/smp.h
1 | /* | 1 | /* |
2 | * linux/include/asm-arm/smp.h | 2 | * linux/include/asm-arm/smp.h |
3 | * | 3 | * |
4 | * Copyright (C) 2004-2005 ARM Ltd. | 4 | * Copyright (C) 2004-2005 ARM Ltd. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | #ifndef __ASM_ARM_SMP_H | 10 | #ifndef __ASM_ARM_SMP_H |
11 | #define __ASM_ARM_SMP_H | 11 | #define __ASM_ARM_SMP_H |
12 | 12 | ||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
15 | #include <linux/cpumask.h> | 15 | #include <linux/cpumask.h> |
16 | #include <linux/thread_info.h> | 16 | #include <linux/thread_info.h> |
17 | 17 | ||
18 | #include <asm/arch/smp.h> | 18 | #include <asm/arch/smp.h> |
19 | 19 | ||
20 | #ifndef CONFIG_SMP | 20 | #ifndef CONFIG_SMP |
21 | # error "<asm-arm/smp.h> included in non-SMP build" | 21 | # error "<asm-arm/smp.h> included in non-SMP build" |
22 | #endif | 22 | #endif |
23 | 23 | ||
24 | #define smp_processor_id() (current_thread_info()->cpu) | 24 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
25 | 25 | ||
26 | extern cpumask_t cpu_present_mask; | 26 | extern cpumask_t cpu_present_mask; |
27 | #define cpu_possible_map cpu_present_mask | 27 | #define cpu_possible_map cpu_present_mask |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * at the moment, there's not a big penalty for changing CPUs | 30 | * at the moment, there's not a big penalty for changing CPUs |
31 | * (the >big< penalty is running SMP in the first place) | 31 | * (the >big< penalty is running SMP in the first place) |
32 | */ | 32 | */ |
33 | #define PROC_CHANGE_PENALTY 15 | 33 | #define PROC_CHANGE_PENALTY 15 |
34 | 34 | ||
35 | struct seq_file; | 35 | struct seq_file; |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * generate IPI list text | 38 | * generate IPI list text |
39 | */ | 39 | */ |
40 | extern void show_ipi_list(struct seq_file *p); | 40 | extern void show_ipi_list(struct seq_file *p); |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Move global data into per-processor storage. | 43 | * Move global data into per-processor storage. |
44 | */ | 44 | */ |
45 | extern void smp_store_cpu_info(unsigned int cpuid); | 45 | extern void smp_store_cpu_info(unsigned int cpuid); |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Raise an IPI cross call on CPUs in callmap. | 48 | * Raise an IPI cross call on CPUs in callmap. |
49 | */ | 49 | */ |
50 | extern void smp_cross_call(cpumask_t callmap); | 50 | extern void smp_cross_call(cpumask_t callmap); |
51 | 51 | ||
52 | /* | 52 | /* |
53 | * Boot a secondary CPU, and assign it the specified idle task. | 53 | * Boot a secondary CPU, and assign it the specified idle task. |
54 | * This also gives us the initial stack to use for this CPU. | 54 | * This also gives us the initial stack to use for this CPU. |
55 | */ | 55 | */ |
56 | extern int boot_secondary(unsigned int cpu, struct task_struct *); | 56 | extern int boot_secondary(unsigned int cpu, struct task_struct *); |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Perform platform specific initialisation of the specified CPU. | 59 | * Perform platform specific initialisation of the specified CPU. |
60 | */ | 60 | */ |
61 | extern void platform_secondary_init(unsigned int cpu); | 61 | extern void platform_secondary_init(unsigned int cpu); |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Initial data for bringing up a secondary CPU. | 64 | * Initial data for bringing up a secondary CPU. |
65 | */ | 65 | */ |
66 | struct secondary_data { | 66 | struct secondary_data { |
67 | unsigned long pgdir; | 67 | unsigned long pgdir; |
68 | void *stack; | 68 | void *stack; |
69 | }; | 69 | }; |
70 | extern struct secondary_data secondary_data; | 70 | extern struct secondary_data secondary_data; |
71 | 71 | ||
72 | #endif /* ifndef __ASM_ARM_SMP_H */ | 72 | #endif /* ifndef __ASM_ARM_SMP_H */ |
73 | 73 |
include/asm-i386/smp.h
1 | #ifndef __ASM_SMP_H | 1 | #ifndef __ASM_SMP_H |
2 | #define __ASM_SMP_H | 2 | #define __ASM_SMP_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * We need the APIC definitions automatically as part of 'smp.h' | 5 | * We need the APIC definitions automatically as part of 'smp.h' |
6 | */ | 6 | */ |
7 | #ifndef __ASSEMBLY__ | 7 | #ifndef __ASSEMBLY__ |
8 | #include <linux/config.h> | 8 | #include <linux/config.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/threads.h> | 10 | #include <linux/threads.h> |
11 | #include <linux/cpumask.h> | 11 | #include <linux/cpumask.h> |
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | #ifdef CONFIG_X86_LOCAL_APIC | 14 | #ifdef CONFIG_X86_LOCAL_APIC |
15 | #ifndef __ASSEMBLY__ | 15 | #ifndef __ASSEMBLY__ |
16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/bitops.h> | 17 | #include <asm/bitops.h> |
18 | #include <asm/mpspec.h> | 18 | #include <asm/mpspec.h> |
19 | #ifdef CONFIG_X86_IO_APIC | 19 | #ifdef CONFIG_X86_IO_APIC |
20 | #include <asm/io_apic.h> | 20 | #include <asm/io_apic.h> |
21 | #endif | 21 | #endif |
22 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
23 | #endif | 23 | #endif |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | #define BAD_APICID 0xFFu | 26 | #define BAD_APICID 0xFFu |
27 | #ifdef CONFIG_SMP | 27 | #ifdef CONFIG_SMP |
28 | #ifndef __ASSEMBLY__ | 28 | #ifndef __ASSEMBLY__ |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * Private routines/data | 31 | * Private routines/data |
32 | */ | 32 | */ |
33 | 33 | ||
34 | extern void smp_alloc_memory(void); | 34 | extern void smp_alloc_memory(void); |
35 | extern int pic_mode; | 35 | extern int pic_mode; |
36 | extern int smp_num_siblings; | 36 | extern int smp_num_siblings; |
37 | extern cpumask_t cpu_sibling_map[]; | 37 | extern cpumask_t cpu_sibling_map[]; |
38 | extern cpumask_t cpu_core_map[]; | 38 | extern cpumask_t cpu_core_map[]; |
39 | 39 | ||
40 | extern void smp_flush_tlb(void); | 40 | extern void smp_flush_tlb(void); |
41 | extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); | 41 | extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); |
42 | extern void smp_invalidate_rcv(void); /* Process an NMI */ | 42 | extern void smp_invalidate_rcv(void); /* Process an NMI */ |
43 | extern void (*mtrr_hook) (void); | 43 | extern void (*mtrr_hook) (void); |
44 | extern void zap_low_mappings (void); | 44 | extern void zap_low_mappings (void); |
45 | 45 | ||
46 | #define MAX_APICID 256 | 46 | #define MAX_APICID 256 |
47 | extern u8 x86_cpu_to_apicid[]; | 47 | extern u8 x86_cpu_to_apicid[]; |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * This function is needed by all SMP systems. It must _always_ be valid | 50 | * This function is needed by all SMP systems. It must _always_ be valid |
51 | * from the initial startup. We map APIC_BASE very early in page_setup(), | 51 | * from the initial startup. We map APIC_BASE very early in page_setup(), |
52 | * so this is correct in the x86 case. | 52 | * so this is correct in the x86 case. |
53 | */ | 53 | */ |
54 | #define __smp_processor_id() (current_thread_info()->cpu) | 54 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
55 | 55 | ||
56 | extern cpumask_t cpu_callout_map; | 56 | extern cpumask_t cpu_callout_map; |
57 | extern cpumask_t cpu_callin_map; | 57 | extern cpumask_t cpu_callin_map; |
58 | #define cpu_possible_map cpu_callout_map | 58 | #define cpu_possible_map cpu_callout_map |
59 | 59 | ||
60 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ | 60 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ |
61 | static inline int num_booting_cpus(void) | 61 | static inline int num_booting_cpus(void) |
62 | { | 62 | { |
63 | return cpus_weight(cpu_callout_map); | 63 | return cpus_weight(cpu_callout_map); |
64 | } | 64 | } |
65 | 65 | ||
66 | #ifdef CONFIG_X86_LOCAL_APIC | 66 | #ifdef CONFIG_X86_LOCAL_APIC |
67 | 67 | ||
68 | #ifdef APIC_DEFINITION | 68 | #ifdef APIC_DEFINITION |
69 | extern int hard_smp_processor_id(void); | 69 | extern int hard_smp_processor_id(void); |
70 | #else | 70 | #else |
71 | #include <mach_apicdef.h> | 71 | #include <mach_apicdef.h> |
72 | static inline int hard_smp_processor_id(void) | 72 | static inline int hard_smp_processor_id(void) |
73 | { | 73 | { |
74 | /* we don't want to mark this access volatile - bad code generation */ | 74 | /* we don't want to mark this access volatile - bad code generation */ |
75 | return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); | 75 | return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); |
76 | } | 76 | } |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | static __inline int logical_smp_processor_id(void) | 79 | static __inline int logical_smp_processor_id(void) |
80 | { | 80 | { |
81 | /* we don't want to mark this access volatile - bad code generation */ | 81 | /* we don't want to mark this access volatile - bad code generation */ |
82 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); | 82 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
83 | } | 83 | } |
84 | 84 | ||
85 | #endif | 85 | #endif |
86 | #endif /* !__ASSEMBLY__ */ | 86 | #endif /* !__ASSEMBLY__ */ |
87 | 87 | ||
88 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 88 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
89 | 89 | ||
90 | #endif | 90 | #endif |
91 | #endif | 91 | #endif |
92 | 92 |
include/asm-ia64/smp.h
1 | /* | 1 | /* |
2 | * SMP Support | 2 | * SMP Support |
3 | * | 3 | * |
4 | * Copyright (C) 1999 VA Linux Systems | 4 | * Copyright (C) 1999 VA Linux Systems |
5 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | 5 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> |
6 | * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P. | 6 | * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P. |
7 | * David Mosberger-Tang <davidm@hpl.hp.com> | 7 | * David Mosberger-Tang <davidm@hpl.hp.com> |
8 | * Bjorn Helgaas <bjorn.helgaas@hp.com> | 8 | * Bjorn Helgaas <bjorn.helgaas@hp.com> |
9 | */ | 9 | */ |
10 | #ifndef _ASM_IA64_SMP_H | 10 | #ifndef _ASM_IA64_SMP_H |
11 | #define _ASM_IA64_SMP_H | 11 | #define _ASM_IA64_SMP_H |
12 | 12 | ||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/threads.h> | 15 | #include <linux/threads.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/cpumask.h> | 17 | #include <linux/cpumask.h> |
18 | 18 | ||
19 | #include <asm/bitops.h> | 19 | #include <asm/bitops.h> |
20 | #include <asm/io.h> | 20 | #include <asm/io.h> |
21 | #include <asm/param.h> | 21 | #include <asm/param.h> |
22 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
23 | #include <asm/ptrace.h> | 23 | #include <asm/ptrace.h> |
24 | 24 | ||
25 | static inline unsigned int | 25 | static inline unsigned int |
26 | ia64_get_lid (void) | 26 | ia64_get_lid (void) |
27 | { | 27 | { |
28 | union { | 28 | union { |
29 | struct { | 29 | struct { |
30 | unsigned long reserved : 16; | 30 | unsigned long reserved : 16; |
31 | unsigned long eid : 8; | 31 | unsigned long eid : 8; |
32 | unsigned long id : 8; | 32 | unsigned long id : 8; |
33 | unsigned long ignored : 32; | 33 | unsigned long ignored : 32; |
34 | } f; | 34 | } f; |
35 | unsigned long bits; | 35 | unsigned long bits; |
36 | } lid; | 36 | } lid; |
37 | 37 | ||
38 | lid.bits = ia64_getreg(_IA64_REG_CR_LID); | 38 | lid.bits = ia64_getreg(_IA64_REG_CR_LID); |
39 | return lid.f.id << 8 | lid.f.eid; | 39 | return lid.f.id << 8 | lid.f.eid; |
40 | } | 40 | } |
41 | 41 | ||
42 | #ifdef CONFIG_SMP | 42 | #ifdef CONFIG_SMP |
43 | 43 | ||
44 | #define XTP_OFFSET 0x1e0008 | 44 | #define XTP_OFFSET 0x1e0008 |
45 | 45 | ||
46 | #define SMP_IRQ_REDIRECTION (1 << 0) | 46 | #define SMP_IRQ_REDIRECTION (1 << 0) |
47 | #define SMP_IPI_REDIRECTION (1 << 1) | 47 | #define SMP_IPI_REDIRECTION (1 << 1) |
48 | 48 | ||
49 | #define smp_processor_id() (current_thread_info()->cpu) | 49 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
50 | 50 | ||
51 | extern struct smp_boot_data { | 51 | extern struct smp_boot_data { |
52 | int cpu_count; | 52 | int cpu_count; |
53 | int cpu_phys_id[NR_CPUS]; | 53 | int cpu_phys_id[NR_CPUS]; |
54 | } smp_boot_data __initdata; | 54 | } smp_boot_data __initdata; |
55 | 55 | ||
56 | extern char no_int_routing __devinitdata; | 56 | extern char no_int_routing __devinitdata; |
57 | 57 | ||
58 | extern cpumask_t cpu_online_map; | 58 | extern cpumask_t cpu_online_map; |
59 | extern cpumask_t cpu_core_map[NR_CPUS]; | 59 | extern cpumask_t cpu_core_map[NR_CPUS]; |
60 | extern cpumask_t cpu_sibling_map[NR_CPUS]; | 60 | extern cpumask_t cpu_sibling_map[NR_CPUS]; |
61 | extern int smp_num_siblings; | 61 | extern int smp_num_siblings; |
62 | extern int smp_num_cpucores; | 62 | extern int smp_num_cpucores; |
63 | extern void __iomem *ipi_base_addr; | 63 | extern void __iomem *ipi_base_addr; |
64 | extern unsigned char smp_int_redirect; | 64 | extern unsigned char smp_int_redirect; |
65 | 65 | ||
66 | extern volatile int ia64_cpu_to_sapicid[]; | 66 | extern volatile int ia64_cpu_to_sapicid[]; |
67 | #define cpu_physical_id(i) ia64_cpu_to_sapicid[i] | 67 | #define cpu_physical_id(i) ia64_cpu_to_sapicid[i] |
68 | 68 | ||
69 | extern unsigned long ap_wakeup_vector; | 69 | extern unsigned long ap_wakeup_vector; |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * Function to map hard smp processor id to logical id. Slow, so don't use this in | 72 | * Function to map hard smp processor id to logical id. Slow, so don't use this in |
73 | * performance-critical code. | 73 | * performance-critical code. |
74 | */ | 74 | */ |
75 | static inline int | 75 | static inline int |
76 | cpu_logical_id (int cpuid) | 76 | cpu_logical_id (int cpuid) |
77 | { | 77 | { |
78 | int i; | 78 | int i; |
79 | 79 | ||
80 | for (i = 0; i < NR_CPUS; ++i) | 80 | for (i = 0; i < NR_CPUS; ++i) |
81 | if (cpu_physical_id(i) == cpuid) | 81 | if (cpu_physical_id(i) == cpuid) |
82 | break; | 82 | break; |
83 | return i; | 83 | return i; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * XTP control functions: | 87 | * XTP control functions: |
88 | * min_xtp : route all interrupts to this CPU | 88 | * min_xtp : route all interrupts to this CPU |
89 | * normal_xtp: nominal XTP value | 89 | * normal_xtp: nominal XTP value |
90 | * max_xtp : never deliver interrupts to this CPU. | 90 | * max_xtp : never deliver interrupts to this CPU. |
91 | */ | 91 | */ |
92 | 92 | ||
93 | static inline void | 93 | static inline void |
94 | min_xtp (void) | 94 | min_xtp (void) |
95 | { | 95 | { |
96 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) | 96 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) |
97 | writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */ | 97 | writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */ |
98 | } | 98 | } |
99 | 99 | ||
100 | static inline void | 100 | static inline void |
101 | normal_xtp (void) | 101 | normal_xtp (void) |
102 | { | 102 | { |
103 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) | 103 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) |
104 | writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */ | 104 | writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */ |
105 | } | 105 | } |
106 | 106 | ||
107 | static inline void | 107 | static inline void |
108 | max_xtp (void) | 108 | max_xtp (void) |
109 | { | 109 | { |
110 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) | 110 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) |
111 | writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ | 111 | writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ |
112 | } | 112 | } |
113 | 113 | ||
114 | #define hard_smp_processor_id() ia64_get_lid() | 114 | #define hard_smp_processor_id() ia64_get_lid() |
115 | 115 | ||
116 | /* Upping and downing of CPUs */ | 116 | /* Upping and downing of CPUs */ |
117 | extern int __cpu_disable (void); | 117 | extern int __cpu_disable (void); |
118 | extern void __cpu_die (unsigned int cpu); | 118 | extern void __cpu_die (unsigned int cpu); |
119 | extern void cpu_die (void) __attribute__ ((noreturn)); | 119 | extern void cpu_die (void) __attribute__ ((noreturn)); |
120 | extern int __cpu_up (unsigned int cpu); | 120 | extern int __cpu_up (unsigned int cpu); |
121 | extern void __init smp_build_cpu_map(void); | 121 | extern void __init smp_build_cpu_map(void); |
122 | 122 | ||
123 | extern void __init init_smp_config (void); | 123 | extern void __init init_smp_config (void); |
124 | extern void smp_do_timer (struct pt_regs *regs); | 124 | extern void smp_do_timer (struct pt_regs *regs); |
125 | 125 | ||
126 | extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info, | 126 | extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info, |
127 | int retry, int wait); | 127 | int retry, int wait); |
128 | extern void smp_send_reschedule (int cpu); | 128 | extern void smp_send_reschedule (int cpu); |
129 | extern void lock_ipi_calllock(void); | 129 | extern void lock_ipi_calllock(void); |
130 | extern void unlock_ipi_calllock(void); | 130 | extern void unlock_ipi_calllock(void); |
131 | extern void identify_siblings (struct cpuinfo_ia64 *); | 131 | extern void identify_siblings (struct cpuinfo_ia64 *); |
132 | 132 | ||
133 | #else | 133 | #else |
134 | 134 | ||
135 | #define cpu_logical_id(i) 0 | 135 | #define cpu_logical_id(i) 0 |
136 | #define cpu_physical_id(i) ia64_get_lid() | 136 | #define cpu_physical_id(i) ia64_get_lid() |
137 | 137 | ||
138 | #endif /* CONFIG_SMP */ | 138 | #endif /* CONFIG_SMP */ |
139 | #endif /* _ASM_IA64_SMP_H */ | 139 | #endif /* _ASM_IA64_SMP_H */ |
140 | 140 |
include/asm-m32r/smp.h
1 | #ifndef _ASM_M32R_SMP_H | 1 | #ifndef _ASM_M32R_SMP_H |
2 | #define _ASM_M32R_SMP_H | 2 | #define _ASM_M32R_SMP_H |
3 | 3 | ||
4 | /* $Id$ */ | 4 | /* $Id$ */ |
5 | 5 | ||
6 | #include <linux/config.h> | 6 | #include <linux/config.h> |
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | #ifndef __ASSEMBLY__ | 9 | #ifndef __ASSEMBLY__ |
10 | 10 | ||
11 | #include <linux/cpumask.h> | 11 | #include <linux/cpumask.h> |
12 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
13 | #include <linux/threads.h> | 13 | #include <linux/threads.h> |
14 | #include <asm/m32r.h> | 14 | #include <asm/m32r.h> |
15 | 15 | ||
16 | #define PHYSID_ARRAY_SIZE 1 | 16 | #define PHYSID_ARRAY_SIZE 1 |
17 | 17 | ||
18 | struct physid_mask | 18 | struct physid_mask |
19 | { | 19 | { |
20 | unsigned long mask[PHYSID_ARRAY_SIZE]; | 20 | unsigned long mask[PHYSID_ARRAY_SIZE]; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | typedef struct physid_mask physid_mask_t; | 23 | typedef struct physid_mask physid_mask_t; |
24 | 24 | ||
25 | #define physid_set(physid, map) set_bit(physid, (map).mask) | 25 | #define physid_set(physid, map) set_bit(physid, (map).mask) |
26 | #define physid_clear(physid, map) clear_bit(physid, (map).mask) | 26 | #define physid_clear(physid, map) clear_bit(physid, (map).mask) |
27 | #define physid_isset(physid, map) test_bit(physid, (map).mask) | 27 | #define physid_isset(physid, map) test_bit(physid, (map).mask) |
28 | #define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask) | 28 | #define physid_test_and_set(physid, map) test_and_set_bit(physid, (map).mask) |
29 | 29 | ||
30 | #define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) | 30 | #define physids_and(dst, src1, src2) bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) |
31 | #define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) | 31 | #define physids_or(dst, src1, src2) bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) |
32 | #define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) | 32 | #define physids_clear(map) bitmap_zero((map).mask, MAX_APICS) |
33 | #define physids_complement(dst, src) bitmap_complement((dst).mask,(src).mask, MAX_APICS) | 33 | #define physids_complement(dst, src) bitmap_complement((dst).mask,(src).mask, MAX_APICS) |
34 | #define physids_empty(map) bitmap_empty((map).mask, MAX_APICS) | 34 | #define physids_empty(map) bitmap_empty((map).mask, MAX_APICS) |
35 | #define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS) | 35 | #define physids_equal(map1, map2) bitmap_equal((map1).mask, (map2).mask, MAX_APICS) |
36 | #define physids_weight(map) bitmap_weight((map).mask, MAX_APICS) | 36 | #define physids_weight(map) bitmap_weight((map).mask, MAX_APICS) |
37 | #define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) | 37 | #define physids_shift_right(d, s, n) bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) |
38 | #define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) | 38 | #define physids_shift_left(d, s, n) bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) |
39 | #define physids_coerce(map) ((map).mask[0]) | 39 | #define physids_coerce(map) ((map).mask[0]) |
40 | 40 | ||
41 | #define physids_promote(physids) \ | 41 | #define physids_promote(physids) \ |
42 | ({ \ | 42 | ({ \ |
43 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ | 43 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ |
44 | __physid_mask.mask[0] = physids; \ | 44 | __physid_mask.mask[0] = physids; \ |
45 | __physid_mask; \ | 45 | __physid_mask; \ |
46 | }) | 46 | }) |
47 | 47 | ||
48 | #define physid_mask_of_physid(physid) \ | 48 | #define physid_mask_of_physid(physid) \ |
49 | ({ \ | 49 | ({ \ |
50 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ | 50 | physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ |
51 | physid_set(physid, __physid_mask); \ | 51 | physid_set(physid, __physid_mask); \ |
52 | __physid_mask; \ | 52 | __physid_mask; \ |
53 | }) | 53 | }) |
54 | 54 | ||
55 | #define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } | 55 | #define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } |
56 | #define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } | 56 | #define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } |
57 | 57 | ||
58 | extern physid_mask_t phys_cpu_present_map; | 58 | extern physid_mask_t phys_cpu_present_map; |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * Some lowlevel functions might want to know about | 61 | * Some lowlevel functions might want to know about |
62 | * the real CPU ID <-> CPU # mapping. | 62 | * the real CPU ID <-> CPU # mapping. |
63 | */ | 63 | */ |
64 | extern volatile int physid_2_cpu[NR_CPUS]; | 64 | extern volatile int physid_2_cpu[NR_CPUS]; |
65 | extern volatile int cpu_2_physid[NR_CPUS]; | 65 | extern volatile int cpu_2_physid[NR_CPUS]; |
66 | #define physid_to_cpu(physid) physid_2_cpu[physid] | 66 | #define physid_to_cpu(physid) physid_2_cpu[physid] |
67 | #define cpu_to_physid(cpu_id) cpu_2_physid[cpu_id] | 67 | #define cpu_to_physid(cpu_id) cpu_2_physid[cpu_id] |
68 | 68 | ||
69 | #define smp_processor_id() (current_thread_info()->cpu) | 69 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
70 | 70 | ||
71 | extern cpumask_t cpu_callout_map; | 71 | extern cpumask_t cpu_callout_map; |
72 | #define cpu_possible_map cpu_callout_map | 72 | #define cpu_possible_map cpu_callout_map |
73 | 73 | ||
74 | static __inline__ int hard_smp_processor_id(void) | 74 | static __inline__ int hard_smp_processor_id(void) |
75 | { | 75 | { |
76 | return (int)*(volatile long *)M32R_CPUID_PORTL; | 76 | return (int)*(volatile long *)M32R_CPUID_PORTL; |
77 | } | 77 | } |
78 | 78 | ||
79 | static __inline__ int cpu_logical_map(int cpu) | 79 | static __inline__ int cpu_logical_map(int cpu) |
80 | { | 80 | { |
81 | return cpu; | 81 | return cpu; |
82 | } | 82 | } |
83 | 83 | ||
84 | static __inline__ int cpu_number_map(int cpu) | 84 | static __inline__ int cpu_number_map(int cpu) |
85 | { | 85 | { |
86 | return cpu; | 86 | return cpu; |
87 | } | 87 | } |
88 | 88 | ||
89 | static __inline__ unsigned int num_booting_cpus(void) | 89 | static __inline__ unsigned int num_booting_cpus(void) |
90 | { | 90 | { |
91 | return cpus_weight(cpu_callout_map); | 91 | return cpus_weight(cpu_callout_map); |
92 | } | 92 | } |
93 | 93 | ||
94 | extern void smp_send_timer(void); | 94 | extern void smp_send_timer(void); |
95 | extern unsigned long send_IPI_mask_phys(cpumask_t, int, int); | 95 | extern unsigned long send_IPI_mask_phys(cpumask_t, int, int); |
96 | 96 | ||
97 | #endif /* not __ASSEMBLY__ */ | 97 | #endif /* not __ASSEMBLY__ */ |
98 | 98 | ||
99 | #define NO_PROC_ID (0xff) /* No processor magic marker */ | 99 | #define NO_PROC_ID (0xff) /* No processor magic marker */ |
100 | 100 | ||
101 | #define PROC_CHANGE_PENALTY (15) /* Schedule penalty */ | 101 | #define PROC_CHANGE_PENALTY (15) /* Schedule penalty */ |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * M32R-mp IPI | 104 | * M32R-mp IPI |
105 | */ | 105 | */ |
106 | #define RESCHEDULE_IPI (M32R_IRQ_IPI0-M32R_IRQ_IPI0) | 106 | #define RESCHEDULE_IPI (M32R_IRQ_IPI0-M32R_IRQ_IPI0) |
107 | #define INVALIDATE_TLB_IPI (M32R_IRQ_IPI1-M32R_IRQ_IPI0) | 107 | #define INVALIDATE_TLB_IPI (M32R_IRQ_IPI1-M32R_IRQ_IPI0) |
108 | #define CALL_FUNCTION_IPI (M32R_IRQ_IPI2-M32R_IRQ_IPI0) | 108 | #define CALL_FUNCTION_IPI (M32R_IRQ_IPI2-M32R_IRQ_IPI0) |
109 | #define LOCAL_TIMER_IPI (M32R_IRQ_IPI3-M32R_IRQ_IPI0) | 109 | #define LOCAL_TIMER_IPI (M32R_IRQ_IPI3-M32R_IRQ_IPI0) |
110 | #define INVALIDATE_CACHE_IPI (M32R_IRQ_IPI4-M32R_IRQ_IPI0) | 110 | #define INVALIDATE_CACHE_IPI (M32R_IRQ_IPI4-M32R_IRQ_IPI0) |
111 | #define CPU_BOOT_IPI (M32R_IRQ_IPI5-M32R_IRQ_IPI0) | 111 | #define CPU_BOOT_IPI (M32R_IRQ_IPI5-M32R_IRQ_IPI0) |
112 | 112 | ||
113 | #define IPI_SHIFT (0) | 113 | #define IPI_SHIFT (0) |
114 | #define NR_IPIS (8) | 114 | #define NR_IPIS (8) |
115 | 115 | ||
116 | #endif /* CONFIG_SMP */ | 116 | #endif /* CONFIG_SMP */ |
117 | 117 | ||
118 | #endif /* _ASM_M32R_SMP_H */ | 118 | #endif /* _ASM_M32R_SMP_H */ |
119 | 119 |
include/asm-mips/smp.h
1 | /* | 1 | /* |
2 | * This file is subject to the terms and conditions of the GNU General | 2 | * This file is subject to the terms and conditions of the GNU General |
3 | * Public License. See the file "COPYING" in the main directory of this | 3 | * Public License. See the file "COPYING" in the main directory of this |
4 | * archive for more details. | 4 | * archive for more details. |
5 | * | 5 | * |
6 | * Copyright (C) 2000 - 2001 by Kanoj Sarcar (kanoj@sgi.com) | 6 | * Copyright (C) 2000 - 2001 by Kanoj Sarcar (kanoj@sgi.com) |
7 | * Copyright (C) 2000 - 2001 by Silicon Graphics, Inc. | 7 | * Copyright (C) 2000 - 2001 by Silicon Graphics, Inc. |
8 | * Copyright (C) 2000, 2001, 2002 Ralf Baechle | 8 | * Copyright (C) 2000, 2001, 2002 Ralf Baechle |
9 | * Copyright (C) 2000, 2001 Broadcom Corporation | 9 | * Copyright (C) 2000, 2001 Broadcom Corporation |
10 | */ | 10 | */ |
11 | #ifndef __ASM_SMP_H | 11 | #ifndef __ASM_SMP_H |
12 | #define __ASM_SMP_H | 12 | #define __ASM_SMP_H |
13 | 13 | ||
14 | #include <linux/config.h> | 14 | #include <linux/config.h> |
15 | 15 | ||
16 | #ifdef CONFIG_SMP | 16 | #ifdef CONFIG_SMP |
17 | 17 | ||
18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
19 | #include <linux/linkage.h> | 19 | #include <linux/linkage.h> |
20 | #include <linux/threads.h> | 20 | #include <linux/threads.h> |
21 | #include <linux/cpumask.h> | 21 | #include <linux/cpumask.h> |
22 | #include <asm/atomic.h> | 22 | #include <asm/atomic.h> |
23 | 23 | ||
24 | #define smp_processor_id() (current_thread_info()->cpu) | 24 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
25 | 25 | ||
26 | /* Map from cpu id to sequential logical cpu number. This will only | 26 | /* Map from cpu id to sequential logical cpu number. This will only |
27 | not be idempotent when cpus failed to come on-line. */ | 27 | not be idempotent when cpus failed to come on-line. */ |
28 | extern int __cpu_number_map[NR_CPUS]; | 28 | extern int __cpu_number_map[NR_CPUS]; |
29 | #define cpu_number_map(cpu) __cpu_number_map[cpu] | 29 | #define cpu_number_map(cpu) __cpu_number_map[cpu] |
30 | 30 | ||
31 | /* The reverse map from sequential logical cpu number to cpu id. */ | 31 | /* The reverse map from sequential logical cpu number to cpu id. */ |
32 | extern int __cpu_logical_map[NR_CPUS]; | 32 | extern int __cpu_logical_map[NR_CPUS]; |
33 | #define cpu_logical_map(cpu) __cpu_logical_map[cpu] | 33 | #define cpu_logical_map(cpu) __cpu_logical_map[cpu] |
34 | 34 | ||
35 | #define NO_PROC_ID (-1) | 35 | #define NO_PROC_ID (-1) |
36 | 36 | ||
37 | struct call_data_struct { | 37 | struct call_data_struct { |
38 | void (*func)(void *); | 38 | void (*func)(void *); |
39 | void *info; | 39 | void *info; |
40 | atomic_t started; | 40 | atomic_t started; |
41 | atomic_t finished; | 41 | atomic_t finished; |
42 | int wait; | 42 | int wait; |
43 | }; | 43 | }; |
44 | 44 | ||
45 | extern struct call_data_struct *call_data; | 45 | extern struct call_data_struct *call_data; |
46 | 46 | ||
47 | #define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ | 47 | #define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ |
48 | #define SMP_CALL_FUNCTION 0x2 | 48 | #define SMP_CALL_FUNCTION 0x2 |
49 | 49 | ||
50 | extern cpumask_t phys_cpu_present_map; | 50 | extern cpumask_t phys_cpu_present_map; |
51 | extern cpumask_t cpu_online_map; | 51 | extern cpumask_t cpu_online_map; |
52 | #define cpu_possible_map phys_cpu_present_map | 52 | #define cpu_possible_map phys_cpu_present_map |
53 | 53 | ||
54 | extern cpumask_t cpu_callout_map; | 54 | extern cpumask_t cpu_callout_map; |
55 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ | 55 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ |
56 | static inline int num_booting_cpus(void) | 56 | static inline int num_booting_cpus(void) |
57 | { | 57 | { |
58 | return cpus_weight(cpu_callout_map); | 58 | return cpus_weight(cpu_callout_map); |
59 | } | 59 | } |
60 | 60 | ||
61 | /* These are defined by the board-specific code. */ | 61 | /* These are defined by the board-specific code. */ |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Cause the function described by call_data to be executed on the passed | 64 | * Cause the function described by call_data to be executed on the passed |
65 | * cpu. When the function has finished, increment the finished field of | 65 | * cpu. When the function has finished, increment the finished field of |
66 | * call_data. | 66 | * call_data. |
67 | */ | 67 | */ |
68 | extern void core_send_ipi(int cpu, unsigned int action); | 68 | extern void core_send_ipi(int cpu, unsigned int action); |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * Firmware CPU startup hook | 71 | * Firmware CPU startup hook |
72 | */ | 72 | */ |
73 | extern void prom_boot_secondary(int cpu, struct task_struct *idle); | 73 | extern void prom_boot_secondary(int cpu, struct task_struct *idle); |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * After we've done initial boot, this function is called to allow the | 76 | * After we've done initial boot, this function is called to allow the |
77 | * board code to clean up state, if needed | 77 | * board code to clean up state, if needed |
78 | */ | 78 | */ |
79 | extern void prom_init_secondary(void); | 79 | extern void prom_init_secondary(void); |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * Detect available CPUs, populate phys_cpu_present_map before smp_init | 82 | * Detect available CPUs, populate phys_cpu_present_map before smp_init |
83 | */ | 83 | */ |
84 | extern void prom_prepare_cpus(unsigned int max_cpus); | 84 | extern void prom_prepare_cpus(unsigned int max_cpus); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Last chance for the board code to finish SMP initialization before | 87 | * Last chance for the board code to finish SMP initialization before |
88 | * the CPU is "online". | 88 | * the CPU is "online". |
89 | */ | 89 | */ |
90 | extern void prom_smp_finish(void); | 90 | extern void prom_smp_finish(void); |
91 | 91 | ||
92 | /* Hook for after all CPUs are online */ | 92 | /* Hook for after all CPUs are online */ |
93 | extern void prom_cpus_done(void); | 93 | extern void prom_cpus_done(void); |
94 | 94 | ||
95 | extern void asmlinkage smp_bootstrap(void); | 95 | extern void asmlinkage smp_bootstrap(void); |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * this function sends a 'reschedule' IPI to another CPU. | 98 | * this function sends a 'reschedule' IPI to another CPU. |
99 | * it goes straight through and wastes no time serializing | 99 | * it goes straight through and wastes no time serializing |
100 | * anything. Worst case is that we lose a reschedule ... | 100 | * anything. Worst case is that we lose a reschedule ... |
101 | */ | 101 | */ |
102 | static inline void smp_send_reschedule(int cpu) | 102 | static inline void smp_send_reschedule(int cpu) |
103 | { | 103 | { |
104 | core_send_ipi(cpu, SMP_RESCHEDULE_YOURSELF); | 104 | core_send_ipi(cpu, SMP_RESCHEDULE_YOURSELF); |
105 | } | 105 | } |
106 | 106 | ||
107 | extern asmlinkage void smp_call_function_interrupt(void); | 107 | extern asmlinkage void smp_call_function_interrupt(void); |
108 | 108 | ||
109 | #endif /* CONFIG_SMP */ | 109 | #endif /* CONFIG_SMP */ |
110 | 110 | ||
111 | #endif /* __ASM_SMP_H */ | 111 | #endif /* __ASM_SMP_H */ |
112 | 112 |
include/asm-parisc/smp.h
1 | #ifndef __ASM_SMP_H | 1 | #ifndef __ASM_SMP_H |
2 | #define __ASM_SMP_H | 2 | #define __ASM_SMP_H |
3 | 3 | ||
4 | #include <linux/config.h> | 4 | #include <linux/config.h> |
5 | 5 | ||
6 | #if defined(CONFIG_SMP) | 6 | #if defined(CONFIG_SMP) |
7 | 7 | ||
8 | /* Page Zero Location PDC will look for the address to branch to when we poke | 8 | /* Page Zero Location PDC will look for the address to branch to when we poke |
9 | ** slave CPUs still in "Icache loop". | 9 | ** slave CPUs still in "Icache loop". |
10 | */ | 10 | */ |
11 | #define PDC_OS_BOOT_RENDEZVOUS 0x10 | 11 | #define PDC_OS_BOOT_RENDEZVOUS 0x10 |
12 | #define PDC_OS_BOOT_RENDEZVOUS_HI 0x28 | 12 | #define PDC_OS_BOOT_RENDEZVOUS_HI 0x28 |
13 | 13 | ||
14 | #ifndef ASSEMBLY | 14 | #ifndef ASSEMBLY |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/threads.h> /* for NR_CPUS */ | 16 | #include <linux/threads.h> /* for NR_CPUS */ |
17 | #include <linux/cpumask.h> | 17 | #include <linux/cpumask.h> |
18 | typedef unsigned long address_t; | 18 | typedef unsigned long address_t; |
19 | 19 | ||
20 | extern cpumask_t cpu_online_map; | 20 | extern cpumask_t cpu_online_map; |
21 | 21 | ||
22 | 22 | ||
23 | /* | 23 | /* |
24 | * Private routines/data | 24 | * Private routines/data |
25 | * | 25 | * |
26 | * physical and logical are equivalent until we support CPU hotplug. | 26 | * physical and logical are equivalent until we support CPU hotplug. |
27 | */ | 27 | */ |
28 | #define cpu_number_map(cpu) (cpu) | 28 | #define cpu_number_map(cpu) (cpu) |
29 | #define cpu_logical_map(cpu) (cpu) | 29 | #define cpu_logical_map(cpu) (cpu) |
30 | 30 | ||
31 | extern void smp_send_reschedule(int cpu); | 31 | extern void smp_send_reschedule(int cpu); |
32 | 32 | ||
33 | #endif /* !ASSEMBLY */ | 33 | #endif /* !ASSEMBLY */ |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * This magic constant controls our willingness to transfer | 36 | * This magic constant controls our willingness to transfer |
37 | * a process across CPUs. Such a transfer incurs cache and tlb | 37 | * a process across CPUs. Such a transfer incurs cache and tlb |
38 | * misses. The current value is inherited from i386. Still needs | 38 | * misses. The current value is inherited from i386. Still needs |
39 | * to be tuned for parisc. | 39 | * to be tuned for parisc. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ | 42 | #define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ |
43 | 43 | ||
44 | #undef ENTRY_SYS_CPUS | 44 | #undef ENTRY_SYS_CPUS |
45 | #ifdef ENTRY_SYS_CPUS | 45 | #ifdef ENTRY_SYS_CPUS |
46 | #define STATE_RENDEZVOUS 0 | 46 | #define STATE_RENDEZVOUS 0 |
47 | #define STATE_STOPPED 1 | 47 | #define STATE_STOPPED 1 |
48 | #define STATE_RUNNING 2 | 48 | #define STATE_RUNNING 2 |
49 | #define STATE_HALTED 3 | 49 | #define STATE_HALTED 3 |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | extern unsigned long cpu_present_mask; | 52 | extern unsigned long cpu_present_mask; |
53 | 53 | ||
54 | #define smp_processor_id() (current_thread_info()->cpu) | 54 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
55 | 55 | ||
56 | #endif /* CONFIG_SMP */ | 56 | #endif /* CONFIG_SMP */ |
57 | 57 | ||
58 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 58 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
59 | #define ANY_PROC_ID 0xFF /* Any processor magic marker */ | 59 | #define ANY_PROC_ID 0xFF /* Any processor magic marker */ |
60 | static inline int __cpu_disable (void) { | 60 | static inline int __cpu_disable (void) { |
61 | return 0; | 61 | return 0; |
62 | } | 62 | } |
63 | static inline void __cpu_die (unsigned int cpu) { | 63 | static inline void __cpu_die (unsigned int cpu) { |
64 | while(1) | 64 | while(1) |
65 | ; | 65 | ; |
66 | } | 66 | } |
67 | extern int __cpu_up (unsigned int cpu); | 67 | extern int __cpu_up (unsigned int cpu); |
68 | 68 | ||
69 | #endif /* __ASM_SMP_H */ | 69 | #endif /* __ASM_SMP_H */ |
70 | 70 |
include/asm-ppc/smp.h
1 | /* smp.h: PPC specific SMP stuff. | 1 | /* smp.h: PPC specific SMP stuff. |
2 | * | 2 | * |
3 | * Original was a copy of sparc smp.h. Now heavily modified | 3 | * Original was a copy of sparc smp.h. Now heavily modified |
4 | * for PPC. | 4 | * for PPC. |
5 | * | 5 | * |
6 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | 6 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) |
7 | * Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com> | 7 | * Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com> |
8 | */ | 8 | */ |
9 | #ifdef __KERNEL__ | 9 | #ifdef __KERNEL__ |
10 | #ifndef _PPC_SMP_H | 10 | #ifndef _PPC_SMP_H |
11 | #define _PPC_SMP_H | 11 | #define _PPC_SMP_H |
12 | 12 | ||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
17 | #include <linux/cpumask.h> | 17 | #include <linux/cpumask.h> |
18 | #include <linux/threads.h> | 18 | #include <linux/threads.h> |
19 | 19 | ||
20 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
21 | 21 | ||
22 | #ifndef __ASSEMBLY__ | 22 | #ifndef __ASSEMBLY__ |
23 | 23 | ||
24 | struct cpuinfo_PPC { | 24 | struct cpuinfo_PPC { |
25 | unsigned long loops_per_jiffy; | 25 | unsigned long loops_per_jiffy; |
26 | unsigned long pvr; | 26 | unsigned long pvr; |
27 | unsigned long *pgd_cache; | 27 | unsigned long *pgd_cache; |
28 | unsigned long *pte_cache; | 28 | unsigned long *pte_cache; |
29 | unsigned long pgtable_cache_sz; | 29 | unsigned long pgtable_cache_sz; |
30 | }; | 30 | }; |
31 | 31 | ||
32 | extern struct cpuinfo_PPC cpu_data[]; | 32 | extern struct cpuinfo_PPC cpu_data[]; |
33 | extern cpumask_t cpu_online_map; | 33 | extern cpumask_t cpu_online_map; |
34 | extern cpumask_t cpu_possible_map; | 34 | extern cpumask_t cpu_possible_map; |
35 | extern unsigned long smp_proc_in_lock[]; | 35 | extern unsigned long smp_proc_in_lock[]; |
36 | extern volatile unsigned long cpu_callin_map[]; | 36 | extern volatile unsigned long cpu_callin_map[]; |
37 | extern int smp_tb_synchronized; | 37 | extern int smp_tb_synchronized; |
38 | 38 | ||
39 | extern void smp_send_tlb_invalidate(int); | 39 | extern void smp_send_tlb_invalidate(int); |
40 | extern void smp_send_xmon_break(int cpu); | 40 | extern void smp_send_xmon_break(int cpu); |
41 | struct pt_regs; | 41 | struct pt_regs; |
42 | extern void smp_message_recv(int, struct pt_regs *); | 42 | extern void smp_message_recv(int, struct pt_regs *); |
43 | 43 | ||
44 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 44 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
45 | #define PROC_CHANGE_PENALTY 20 | 45 | #define PROC_CHANGE_PENALTY 20 |
46 | 46 | ||
47 | #define smp_processor_id() (current_thread_info()->cpu) | 47 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
48 | 48 | ||
49 | extern int __cpu_up(unsigned int cpu); | 49 | extern int __cpu_up(unsigned int cpu); |
50 | 50 | ||
51 | extern int smp_hw_index[]; | 51 | extern int smp_hw_index[]; |
52 | #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()]) | 52 | #define hard_smp_processor_id() (smp_hw_index[smp_processor_id()]) |
53 | 53 | ||
54 | struct klock_info_struct { | 54 | struct klock_info_struct { |
55 | unsigned long kernel_flag; | 55 | unsigned long kernel_flag; |
56 | unsigned char akp; | 56 | unsigned char akp; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | extern struct klock_info_struct klock_info; | 59 | extern struct klock_info_struct klock_info; |
60 | #define KLOCK_HELD 0xffffffff | 60 | #define KLOCK_HELD 0xffffffff |
61 | #define KLOCK_CLEAR 0x0 | 61 | #define KLOCK_CLEAR 0x0 |
62 | 62 | ||
63 | #endif /* __ASSEMBLY__ */ | 63 | #endif /* __ASSEMBLY__ */ |
64 | 64 | ||
65 | #else /* !(CONFIG_SMP) */ | 65 | #else /* !(CONFIG_SMP) */ |
66 | 66 | ||
67 | #endif /* !(CONFIG_SMP) */ | 67 | #endif /* !(CONFIG_SMP) */ |
68 | 68 | ||
69 | #endif /* !(_PPC_SMP_H) */ | 69 | #endif /* !(_PPC_SMP_H) */ |
70 | #endif /* __KERNEL__ */ | 70 | #endif /* __KERNEL__ */ |
71 | 71 |
include/asm-ppc64/smp.h
1 | /* | 1 | /* |
2 | * smp.h: PPC64 specific SMP code. | 2 | * smp.h: PPC64 specific SMP code. |
3 | * | 3 | * |
4 | * Original was a copy of sparc smp.h. Now heavily modified | 4 | * Original was a copy of sparc smp.h. Now heavily modified |
5 | * for PPC. | 5 | * for PPC. |
6 | * | 6 | * |
7 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | 7 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) |
8 | * Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com> | 8 | * Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com> |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public License | 11 | * modify it under the terms of the GNU General Public License |
12 | * as published by the Free Software Foundation; either version | 12 | * as published by the Free Software Foundation; either version |
13 | * 2 of the License, or (at your option) any later version. | 13 | * 2 of the License, or (at your option) any later version. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #ifdef __KERNEL__ | 16 | #ifdef __KERNEL__ |
17 | #ifndef _PPC64_SMP_H | 17 | #ifndef _PPC64_SMP_H |
18 | #define _PPC64_SMP_H | 18 | #define _PPC64_SMP_H |
19 | 19 | ||
20 | #include <linux/config.h> | 20 | #include <linux/config.h> |
21 | #include <linux/threads.h> | 21 | #include <linux/threads.h> |
22 | #include <linux/cpumask.h> | 22 | #include <linux/cpumask.h> |
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | 24 | ||
25 | #ifndef __ASSEMBLY__ | 25 | #ifndef __ASSEMBLY__ |
26 | 26 | ||
27 | #include <asm/paca.h> | 27 | #include <asm/paca.h> |
28 | 28 | ||
29 | extern int boot_cpuid; | 29 | extern int boot_cpuid; |
30 | extern int boot_cpuid_phys; | 30 | extern int boot_cpuid_phys; |
31 | 31 | ||
32 | extern void cpu_die(void); | 32 | extern void cpu_die(void); |
33 | 33 | ||
34 | #ifdef CONFIG_SMP | 34 | #ifdef CONFIG_SMP |
35 | 35 | ||
36 | extern void smp_send_debugger_break(int cpu); | 36 | extern void smp_send_debugger_break(int cpu); |
37 | struct pt_regs; | 37 | struct pt_regs; |
38 | extern void smp_message_recv(int, struct pt_regs *); | 38 | extern void smp_message_recv(int, struct pt_regs *); |
39 | 39 | ||
40 | #ifdef CONFIG_HOTPLUG_CPU | 40 | #ifdef CONFIG_HOTPLUG_CPU |
41 | extern void fixup_irqs(cpumask_t map); | 41 | extern void fixup_irqs(cpumask_t map); |
42 | int generic_cpu_disable(void); | 42 | int generic_cpu_disable(void); |
43 | int generic_cpu_enable(unsigned int cpu); | 43 | int generic_cpu_enable(unsigned int cpu); |
44 | void generic_cpu_die(unsigned int cpu); | 44 | void generic_cpu_die(unsigned int cpu); |
45 | void generic_mach_cpu_die(void); | 45 | void generic_mach_cpu_die(void); |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #define __smp_processor_id() (get_paca()->paca_index) | 48 | #define raw_smp_processor_id() (get_paca()->paca_index) |
49 | #define hard_smp_processor_id() (get_paca()->hw_cpu_id) | 49 | #define hard_smp_processor_id() (get_paca()->hw_cpu_id) |
50 | 50 | ||
51 | extern cpumask_t cpu_sibling_map[NR_CPUS]; | 51 | extern cpumask_t cpu_sibling_map[NR_CPUS]; |
52 | 52 | ||
53 | /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. | 53 | /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. |
54 | * | 54 | * |
55 | * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up | 55 | * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up |
56 | * in /proc/interrupts will be wrong!!! --Troy */ | 56 | * in /proc/interrupts will be wrong!!! --Troy */ |
57 | #define PPC_MSG_CALL_FUNCTION 0 | 57 | #define PPC_MSG_CALL_FUNCTION 0 |
58 | #define PPC_MSG_RESCHEDULE 1 | 58 | #define PPC_MSG_RESCHEDULE 1 |
59 | /* This is unused now */ | 59 | /* This is unused now */ |
60 | #if 0 | 60 | #if 0 |
61 | #define PPC_MSG_MIGRATE_TASK 2 | 61 | #define PPC_MSG_MIGRATE_TASK 2 |
62 | #endif | 62 | #endif |
63 | #define PPC_MSG_DEBUGGER_BREAK 3 | 63 | #define PPC_MSG_DEBUGGER_BREAK 3 |
64 | 64 | ||
65 | void smp_init_iSeries(void); | 65 | void smp_init_iSeries(void); |
66 | void smp_init_pSeries(void); | 66 | void smp_init_pSeries(void); |
67 | 67 | ||
68 | extern int __cpu_disable(void); | 68 | extern int __cpu_disable(void); |
69 | extern void __cpu_die(unsigned int cpu); | 69 | extern void __cpu_die(unsigned int cpu); |
70 | #endif /* CONFIG_SMP */ | 70 | #endif /* CONFIG_SMP */ |
71 | 71 | ||
72 | #define get_hard_smp_processor_id(CPU) (paca[(CPU)].hw_cpu_id) | 72 | #define get_hard_smp_processor_id(CPU) (paca[(CPU)].hw_cpu_id) |
73 | #define set_hard_smp_processor_id(CPU, VAL) \ | 73 | #define set_hard_smp_processor_id(CPU, VAL) \ |
74 | do { (paca[(CPU)].hw_cpu_id = (VAL)); } while (0) | 74 | do { (paca[(CPU)].hw_cpu_id = (VAL)); } while (0) |
75 | 75 | ||
76 | extern int smt_enabled_at_boot; | 76 | extern int smt_enabled_at_boot; |
77 | 77 | ||
78 | extern int smp_mpic_probe(void); | 78 | extern int smp_mpic_probe(void); |
79 | extern void smp_mpic_setup_cpu(int cpu); | 79 | extern void smp_mpic_setup_cpu(int cpu); |
80 | extern void smp_mpic_message_pass(int target, int msg); | 80 | extern void smp_mpic_message_pass(int target, int msg); |
81 | extern void smp_generic_kick_cpu(int nr); | 81 | extern void smp_generic_kick_cpu(int nr); |
82 | 82 | ||
83 | extern void smp_generic_give_timebase(void); | 83 | extern void smp_generic_give_timebase(void); |
84 | extern void smp_generic_take_timebase(void); | 84 | extern void smp_generic_take_timebase(void); |
85 | 85 | ||
86 | extern struct smp_ops_t *smp_ops; | 86 | extern struct smp_ops_t *smp_ops; |
87 | 87 | ||
88 | #endif /* __ASSEMBLY__ */ | 88 | #endif /* __ASSEMBLY__ */ |
89 | 89 | ||
90 | #endif /* !(_PPC64_SMP_H) */ | 90 | #endif /* !(_PPC64_SMP_H) */ |
91 | #endif /* __KERNEL__ */ | 91 | #endif /* __KERNEL__ */ |
92 | 92 |
include/asm-s390/smp.h
1 | /* | 1 | /* |
2 | * include/asm-s390/smp.h | 2 | * include/asm-s390/smp.h |
3 | * | 3 | * |
4 | * S390 version | 4 | * S390 version |
5 | * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation | 5 | * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation |
6 | * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), | 6 | * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), |
7 | * Martin Schwidefsky (schwidefsky@de.ibm.com) | 7 | * Martin Schwidefsky (schwidefsky@de.ibm.com) |
8 | * Heiko Carstens (heiko.carstens@de.ibm.com) | 8 | * Heiko Carstens (heiko.carstens@de.ibm.com) |
9 | */ | 9 | */ |
10 | #ifndef __ASM_SMP_H | 10 | #ifndef __ASM_SMP_H |
11 | #define __ASM_SMP_H | 11 | #define __ASM_SMP_H |
12 | 12 | ||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
15 | #include <linux/cpumask.h> | 15 | #include <linux/cpumask.h> |
16 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
17 | 17 | ||
18 | #if defined(__KERNEL__) && defined(CONFIG_SMP) && !defined(__ASSEMBLY__) | 18 | #if defined(__KERNEL__) && defined(CONFIG_SMP) && !defined(__ASSEMBLY__) |
19 | 19 | ||
20 | #include <asm/lowcore.h> | 20 | #include <asm/lowcore.h> |
21 | #include <asm/sigp.h> | 21 | #include <asm/sigp.h> |
22 | 22 | ||
23 | /* | 23 | /* |
24 | s390 specific smp.c headers | 24 | s390 specific smp.c headers |
25 | */ | 25 | */ |
26 | typedef struct | 26 | typedef struct |
27 | { | 27 | { |
28 | int intresting; | 28 | int intresting; |
29 | sigp_ccode ccode; | 29 | sigp_ccode ccode; |
30 | __u32 status; | 30 | __u32 status; |
31 | __u16 cpu; | 31 | __u16 cpu; |
32 | } sigp_info; | 32 | } sigp_info; |
33 | 33 | ||
34 | extern int smp_call_function_on(void (*func) (void *info), void *info, | 34 | extern int smp_call_function_on(void (*func) (void *info), void *info, |
35 | int nonatomic, int wait, int cpu); | 35 | int nonatomic, int wait, int cpu); |
36 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 36 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * This magic constant controls our willingness to transfer | 39 | * This magic constant controls our willingness to transfer |
40 | * a process across CPUs. Such a transfer incurs misses on the L1 | 40 | * a process across CPUs. Such a transfer incurs misses on the L1 |
41 | * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My | 41 | * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My |
42 | * gut feeling is this will vary by board in value. For a board | 42 | * gut feeling is this will vary by board in value. For a board |
43 | * with separate L2 cache it probably depends also on the RSS, and | 43 | * with separate L2 cache it probably depends also on the RSS, and |
44 | * for a board with shared L2 cache it ought to decay fast as other | 44 | * for a board with shared L2 cache it ought to decay fast as other |
45 | * processes are run. | 45 | * processes are run. |
46 | */ | 46 | */ |
47 | 47 | ||
48 | #define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ | 48 | #define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ |
49 | 49 | ||
50 | #define smp_processor_id() (S390_lowcore.cpu_data.cpu_nr) | 50 | #define raw_smp_processor_id() (S390_lowcore.cpu_data.cpu_nr) |
51 | 51 | ||
52 | extern int smp_get_cpu(cpumask_t cpu_map); | 52 | extern int smp_get_cpu(cpumask_t cpu_map); |
53 | extern void smp_put_cpu(int cpu); | 53 | extern void smp_put_cpu(int cpu); |
54 | 54 | ||
55 | extern __inline__ __u16 hard_smp_processor_id(void) | 55 | extern __inline__ __u16 hard_smp_processor_id(void) |
56 | { | 56 | { |
57 | __u16 cpu_address; | 57 | __u16 cpu_address; |
58 | 58 | ||
59 | __asm__ ("stap %0\n" : "=m" (cpu_address)); | 59 | __asm__ ("stap %0\n" : "=m" (cpu_address)); |
60 | return cpu_address; | 60 | return cpu_address; |
61 | } | 61 | } |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * returns 1 if cpu is in stopped/check stopped state or not operational | 64 | * returns 1 if cpu is in stopped/check stopped state or not operational |
65 | * returns 0 otherwise | 65 | * returns 0 otherwise |
66 | */ | 66 | */ |
67 | static inline int | 67 | static inline int |
68 | smp_cpu_not_running(int cpu) | 68 | smp_cpu_not_running(int cpu) |
69 | { | 69 | { |
70 | __u32 status; | 70 | __u32 status; |
71 | 71 | ||
72 | switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) { | 72 | switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) { |
73 | case sigp_order_code_accepted: | 73 | case sigp_order_code_accepted: |
74 | case sigp_status_stored: | 74 | case sigp_status_stored: |
75 | /* Check for stopped and check stop state */ | 75 | /* Check for stopped and check stop state */ |
76 | if (status & 0x50) | 76 | if (status & 0x50) |
77 | return 1; | 77 | return 1; |
78 | break; | 78 | break; |
79 | case sigp_not_operational: | 79 | case sigp_not_operational: |
80 | return 1; | 80 | return 1; |
81 | default: | 81 | default: |
82 | break; | 82 | break; |
83 | } | 83 | } |
84 | return 0; | 84 | return 0; |
85 | } | 85 | } |
86 | 86 | ||
87 | #define cpu_logical_map(cpu) (cpu) | 87 | #define cpu_logical_map(cpu) (cpu) |
88 | 88 | ||
89 | extern int __cpu_disable (void); | 89 | extern int __cpu_disable (void); |
90 | extern void __cpu_die (unsigned int cpu); | 90 | extern void __cpu_die (unsigned int cpu); |
91 | extern void cpu_die (void) __attribute__ ((noreturn)); | 91 | extern void cpu_die (void) __attribute__ ((noreturn)); |
92 | extern int __cpu_up (unsigned int cpu); | 92 | extern int __cpu_up (unsigned int cpu); |
93 | 93 | ||
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | #ifndef CONFIG_SMP | 96 | #ifndef CONFIG_SMP |
97 | static inline int | 97 | static inline int |
98 | smp_call_function_on(void (*func) (void *info), void *info, | 98 | smp_call_function_on(void (*func) (void *info), void *info, |
99 | int nonatomic, int wait, int cpu) | 99 | int nonatomic, int wait, int cpu) |
100 | { | 100 | { |
101 | func(info); | 101 | func(info); |
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
104 | #define smp_get_cpu(cpu) ({ 0; }) | 104 | #define smp_get_cpu(cpu) ({ 0; }) |
105 | #define smp_put_cpu(cpu) ({ 0; }) | 105 | #define smp_put_cpu(cpu) ({ 0; }) |
106 | #endif | 106 | #endif |
107 | 107 | ||
108 | #endif | 108 | #endif |
109 | 109 |
include/asm-sh/smp.h
1 | /* | 1 | /* |
2 | * include/asm-sh/smp.h | 2 | * include/asm-sh/smp.h |
3 | * | 3 | * |
4 | * Copyright (C) 2002, 2003 Paul Mundt | 4 | * Copyright (C) 2002, 2003 Paul Mundt |
5 | * | 5 | * |
6 | * This file is subject to the terms and conditions of the GNU General Public | 6 | * This file is subject to the terms and conditions of the GNU General Public |
7 | * License. See the file "COPYING" in the main directory of this archive for | 7 | * License. See the file "COPYING" in the main directory of this archive for |
8 | * more details. | 8 | * more details. |
9 | */ | 9 | */ |
10 | #ifndef __ASM_SH_SMP_H | 10 | #ifndef __ASM_SH_SMP_H |
11 | #define __ASM_SH_SMP_H | 11 | #define __ASM_SH_SMP_H |
12 | 12 | ||
13 | #include <linux/config.h> | 13 | #include <linux/config.h> |
14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/cpumask.h> | 15 | #include <linux/cpumask.h> |
16 | 16 | ||
17 | #ifdef CONFIG_SMP | 17 | #ifdef CONFIG_SMP |
18 | 18 | ||
19 | #include <asm/spinlock.h> | 19 | #include <asm/spinlock.h> |
20 | #include <asm/atomic.h> | 20 | #include <asm/atomic.h> |
21 | #include <asm/current.h> | 21 | #include <asm/current.h> |
22 | 22 | ||
23 | extern cpumask_t cpu_online_map; | 23 | extern cpumask_t cpu_online_map; |
24 | extern cpumask_t cpu_possible_map; | 24 | extern cpumask_t cpu_possible_map; |
25 | 25 | ||
26 | #define cpu_online(cpu) cpu_isset(cpu, cpu_online_map) | 26 | #define cpu_online(cpu) cpu_isset(cpu, cpu_online_map) |
27 | 27 | ||
28 | #define smp_processor_id() (current_thread_info()->cpu) | 28 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
29 | 29 | ||
30 | /* I've no idea what the real meaning of this is */ | 30 | /* I've no idea what the real meaning of this is */ |
31 | #define PROC_CHANGE_PENALTY 20 | 31 | #define PROC_CHANGE_PENALTY 20 |
32 | 32 | ||
33 | #define NO_PROC_ID (-1) | 33 | #define NO_PROC_ID (-1) |
34 | 34 | ||
35 | struct smp_fn_call_struct { | 35 | struct smp_fn_call_struct { |
36 | spinlock_t lock; | 36 | spinlock_t lock; |
37 | atomic_t finished; | 37 | atomic_t finished; |
38 | void (*fn)(void *); | 38 | void (*fn)(void *); |
39 | void *data; | 39 | void *data; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | extern struct smp_fn_call_struct smp_fn_call; | 42 | extern struct smp_fn_call_struct smp_fn_call; |
43 | 43 | ||
44 | #define SMP_MSG_RESCHEDULE 0x0001 | 44 | #define SMP_MSG_RESCHEDULE 0x0001 |
45 | 45 | ||
46 | #endif /* CONFIG_SMP */ | 46 | #endif /* CONFIG_SMP */ |
47 | 47 | ||
48 | #endif /* __ASM_SH_SMP_H */ | 48 | #endif /* __ASM_SH_SMP_H */ |
49 | 49 |
include/asm-sparc/smp.h
1 | /* smp.h: Sparc specific SMP stuff. | 1 | /* smp.h: Sparc specific SMP stuff. |
2 | * | 2 | * |
3 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | 3 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifndef _SPARC_SMP_H | 6 | #ifndef _SPARC_SMP_H |
7 | #define _SPARC_SMP_H | 7 | #define _SPARC_SMP_H |
8 | 8 | ||
9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
10 | #include <linux/threads.h> | 10 | #include <linux/threads.h> |
11 | #include <asm/head.h> | 11 | #include <asm/head.h> |
12 | #include <asm/btfixup.h> | 12 | #include <asm/btfixup.h> |
13 | 13 | ||
14 | #ifndef __ASSEMBLY__ | 14 | #ifndef __ASSEMBLY__ |
15 | 15 | ||
16 | #include <linux/cpumask.h> | 16 | #include <linux/cpumask.h> |
17 | 17 | ||
18 | #endif /* __ASSEMBLY__ */ | 18 | #endif /* __ASSEMBLY__ */ |
19 | 19 | ||
20 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
21 | 21 | ||
22 | #ifndef __ASSEMBLY__ | 22 | #ifndef __ASSEMBLY__ |
23 | 23 | ||
24 | #include <asm/ptrace.h> | 24 | #include <asm/ptrace.h> |
25 | #include <asm/asi.h> | 25 | #include <asm/asi.h> |
26 | #include <asm/atomic.h> | 26 | #include <asm/atomic.h> |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Private routines/data | 29 | * Private routines/data |
30 | */ | 30 | */ |
31 | 31 | ||
32 | extern unsigned char boot_cpu_id; | 32 | extern unsigned char boot_cpu_id; |
33 | extern cpumask_t phys_cpu_present_map; | 33 | extern cpumask_t phys_cpu_present_map; |
34 | #define cpu_possible_map phys_cpu_present_map | 34 | #define cpu_possible_map phys_cpu_present_map |
35 | 35 | ||
36 | typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, | 36 | typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, |
37 | unsigned long, unsigned long); | 37 | unsigned long, unsigned long); |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * General functions that each host system must provide. | 40 | * General functions that each host system must provide. |
41 | */ | 41 | */ |
42 | 42 | ||
43 | void sun4m_init_smp(void); | 43 | void sun4m_init_smp(void); |
44 | void sun4d_init_smp(void); | 44 | void sun4d_init_smp(void); |
45 | 45 | ||
46 | void smp_callin(void); | 46 | void smp_callin(void); |
47 | void smp_boot_cpus(void); | 47 | void smp_boot_cpus(void); |
48 | void smp_store_cpu_info(int); | 48 | void smp_store_cpu_info(int); |
49 | 49 | ||
50 | struct seq_file; | 50 | struct seq_file; |
51 | void smp_bogo(struct seq_file *); | 51 | void smp_bogo(struct seq_file *); |
52 | void smp_info(struct seq_file *); | 52 | void smp_info(struct seq_file *); |
53 | 53 | ||
54 | BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) | 54 | BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) |
55 | BTFIXUPDEF_CALL(void, smp_message_pass, int, int, unsigned long, int) | 55 | BTFIXUPDEF_CALL(void, smp_message_pass, int, int, unsigned long, int) |
56 | BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) | 56 | BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) |
57 | BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) | 57 | BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) |
58 | BTFIXUPDEF_BLACKBOX(load_current) | 58 | BTFIXUPDEF_BLACKBOX(load_current) |
59 | 59 | ||
60 | #define smp_cross_call(func,arg1,arg2,arg3,arg4,arg5) BTFIXUP_CALL(smp_cross_call)(func,arg1,arg2,arg3,arg4,arg5) | 60 | #define smp_cross_call(func,arg1,arg2,arg3,arg4,arg5) BTFIXUP_CALL(smp_cross_call)(func,arg1,arg2,arg3,arg4,arg5) |
61 | #define smp_message_pass(target,msg,data,wait) BTFIXUP_CALL(smp_message_pass)(target,msg,data,wait) | 61 | #define smp_message_pass(target,msg,data,wait) BTFIXUP_CALL(smp_message_pass)(target,msg,data,wait) |
62 | 62 | ||
63 | extern __inline__ void xc0(smpfunc_t func) { smp_cross_call(func, 0, 0, 0, 0, 0); } | 63 | extern __inline__ void xc0(smpfunc_t func) { smp_cross_call(func, 0, 0, 0, 0, 0); } |
64 | extern __inline__ void xc1(smpfunc_t func, unsigned long arg1) | 64 | extern __inline__ void xc1(smpfunc_t func, unsigned long arg1) |
65 | { smp_cross_call(func, arg1, 0, 0, 0, 0); } | 65 | { smp_cross_call(func, arg1, 0, 0, 0, 0); } |
66 | extern __inline__ void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) | 66 | extern __inline__ void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) |
67 | { smp_cross_call(func, arg1, arg2, 0, 0, 0); } | 67 | { smp_cross_call(func, arg1, arg2, 0, 0, 0); } |
68 | extern __inline__ void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, | 68 | extern __inline__ void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, |
69 | unsigned long arg3) | 69 | unsigned long arg3) |
70 | { smp_cross_call(func, arg1, arg2, arg3, 0, 0); } | 70 | { smp_cross_call(func, arg1, arg2, arg3, 0, 0); } |
71 | extern __inline__ void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, | 71 | extern __inline__ void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, |
72 | unsigned long arg3, unsigned long arg4) | 72 | unsigned long arg3, unsigned long arg4) |
73 | { smp_cross_call(func, arg1, arg2, arg3, arg4, 0); } | 73 | { smp_cross_call(func, arg1, arg2, arg3, arg4, 0); } |
74 | extern __inline__ void xc5(smpfunc_t func, unsigned long arg1, unsigned long arg2, | 74 | extern __inline__ void xc5(smpfunc_t func, unsigned long arg1, unsigned long arg2, |
75 | unsigned long arg3, unsigned long arg4, unsigned long arg5) | 75 | unsigned long arg3, unsigned long arg4, unsigned long arg5) |
76 | { smp_cross_call(func, arg1, arg2, arg3, arg4, arg5); } | 76 | { smp_cross_call(func, arg1, arg2, arg3, arg4, arg5); } |
77 | 77 | ||
78 | extern __inline__ int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait) | 78 | extern __inline__ int smp_call_function(void (*func)(void *info), void *info, int nonatomic, int wait) |
79 | { | 79 | { |
80 | xc1((smpfunc_t)func, (unsigned long)info); | 80 | xc1((smpfunc_t)func, (unsigned long)info); |
81 | return 0; | 81 | return 0; |
82 | } | 82 | } |
83 | 83 | ||
84 | extern __volatile__ int __cpu_number_map[NR_CPUS]; | 84 | extern __volatile__ int __cpu_number_map[NR_CPUS]; |
85 | extern __volatile__ int __cpu_logical_map[NR_CPUS]; | 85 | extern __volatile__ int __cpu_logical_map[NR_CPUS]; |
86 | 86 | ||
87 | extern __inline__ int cpu_logical_map(int cpu) | 87 | extern __inline__ int cpu_logical_map(int cpu) |
88 | { | 88 | { |
89 | return __cpu_logical_map[cpu]; | 89 | return __cpu_logical_map[cpu]; |
90 | } | 90 | } |
91 | extern __inline__ int cpu_number_map(int cpu) | 91 | extern __inline__ int cpu_number_map(int cpu) |
92 | { | 92 | { |
93 | return __cpu_number_map[cpu]; | 93 | return __cpu_number_map[cpu]; |
94 | } | 94 | } |
95 | 95 | ||
96 | extern __inline__ int hard_smp4m_processor_id(void) | 96 | extern __inline__ int hard_smp4m_processor_id(void) |
97 | { | 97 | { |
98 | int cpuid; | 98 | int cpuid; |
99 | 99 | ||
100 | __asm__ __volatile__("rd %%tbr, %0\n\t" | 100 | __asm__ __volatile__("rd %%tbr, %0\n\t" |
101 | "srl %0, 12, %0\n\t" | 101 | "srl %0, 12, %0\n\t" |
102 | "and %0, 3, %0\n\t" : | 102 | "and %0, 3, %0\n\t" : |
103 | "=&r" (cpuid)); | 103 | "=&r" (cpuid)); |
104 | return cpuid; | 104 | return cpuid; |
105 | } | 105 | } |
106 | 106 | ||
107 | extern __inline__ int hard_smp4d_processor_id(void) | 107 | extern __inline__ int hard_smp4d_processor_id(void) |
108 | { | 108 | { |
109 | int cpuid; | 109 | int cpuid; |
110 | 110 | ||
111 | __asm__ __volatile__("lda [%%g0] %1, %0\n\t" : | 111 | __asm__ __volatile__("lda [%%g0] %1, %0\n\t" : |
112 | "=&r" (cpuid) : "i" (ASI_M_VIKING_TMP1)); | 112 | "=&r" (cpuid) : "i" (ASI_M_VIKING_TMP1)); |
113 | return cpuid; | 113 | return cpuid; |
114 | } | 114 | } |
115 | 115 | ||
116 | #ifndef MODULE | 116 | #ifndef MODULE |
117 | extern __inline__ int hard_smp_processor_id(void) | 117 | extern __inline__ int hard_smp_processor_id(void) |
118 | { | 118 | { |
119 | int cpuid; | 119 | int cpuid; |
120 | 120 | ||
121 | /* Black box - sun4m | 121 | /* Black box - sun4m |
122 | __asm__ __volatile__("rd %%tbr, %0\n\t" | 122 | __asm__ __volatile__("rd %%tbr, %0\n\t" |
123 | "srl %0, 12, %0\n\t" | 123 | "srl %0, 12, %0\n\t" |
124 | "and %0, 3, %0\n\t" : | 124 | "and %0, 3, %0\n\t" : |
125 | "=&r" (cpuid)); | 125 | "=&r" (cpuid)); |
126 | - sun4d | 126 | - sun4d |
127 | __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" | 127 | __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" |
128 | "nop; nop" : | 128 | "nop; nop" : |
129 | "=&r" (cpuid)); | 129 | "=&r" (cpuid)); |
130 | See btfixup.h and btfixupprep.c to understand how a blackbox works. | 130 | See btfixup.h and btfixupprep.c to understand how a blackbox works. |
131 | */ | 131 | */ |
132 | __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" | 132 | __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" |
133 | "sethi %%hi(boot_cpu_id), %0\n\t" | 133 | "sethi %%hi(boot_cpu_id), %0\n\t" |
134 | "ldub [%0 + %%lo(boot_cpu_id)], %0\n\t" : | 134 | "ldub [%0 + %%lo(boot_cpu_id)], %0\n\t" : |
135 | "=&r" (cpuid)); | 135 | "=&r" (cpuid)); |
136 | return cpuid; | 136 | return cpuid; |
137 | } | 137 | } |
138 | #else | 138 | #else |
139 | extern __inline__ int hard_smp_processor_id(void) | 139 | extern __inline__ int hard_smp_processor_id(void) |
140 | { | 140 | { |
141 | int cpuid; | 141 | int cpuid; |
142 | 142 | ||
143 | __asm__ __volatile__("mov %%o7, %%g1\n\t" | 143 | __asm__ __volatile__("mov %%o7, %%g1\n\t" |
144 | "call ___f___hard_smp_processor_id\n\t" | 144 | "call ___f___hard_smp_processor_id\n\t" |
145 | " nop\n\t" | 145 | " nop\n\t" |
146 | "mov %%g2, %0\n\t" : "=r"(cpuid) : : "g1", "g2"); | 146 | "mov %%g2, %0\n\t" : "=r"(cpuid) : : "g1", "g2"); |
147 | return cpuid; | 147 | return cpuid; |
148 | } | 148 | } |
149 | #endif | 149 | #endif |
150 | 150 | ||
151 | #define smp_processor_id() (current_thread_info()->cpu) | 151 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
152 | 152 | ||
153 | #define prof_multiplier(__cpu) cpu_data(__cpu).multiplier | 153 | #define prof_multiplier(__cpu) cpu_data(__cpu).multiplier |
154 | #define prof_counter(__cpu) cpu_data(__cpu).counter | 154 | #define prof_counter(__cpu) cpu_data(__cpu).counter |
155 | 155 | ||
156 | #endif /* !(__ASSEMBLY__) */ | 156 | #endif /* !(__ASSEMBLY__) */ |
157 | 157 | ||
158 | /* Sparc specific messages. */ | 158 | /* Sparc specific messages. */ |
159 | #define MSG_CROSS_CALL 0x0005 /* run func on cpus */ | 159 | #define MSG_CROSS_CALL 0x0005 /* run func on cpus */ |
160 | 160 | ||
161 | /* Empirical PROM processor mailbox constants. If the per-cpu mailbox | 161 | /* Empirical PROM processor mailbox constants. If the per-cpu mailbox |
162 | * contains something other than one of these then the ipi is from | 162 | * contains something other than one of these then the ipi is from |
163 | * Linux's active_kernel_processor. This facility exists so that | 163 | * Linux's active_kernel_processor. This facility exists so that |
164 | * the boot monitor can capture all the other cpus when one catches | 164 | * the boot monitor can capture all the other cpus when one catches |
165 | * a watchdog reset or the user enters the monitor using L1-A keys. | 165 | * a watchdog reset or the user enters the monitor using L1-A keys. |
166 | */ | 166 | */ |
167 | #define MBOX_STOPCPU 0xFB | 167 | #define MBOX_STOPCPU 0xFB |
168 | #define MBOX_IDLECPU 0xFC | 168 | #define MBOX_IDLECPU 0xFC |
169 | #define MBOX_IDLECPU2 0xFD | 169 | #define MBOX_IDLECPU2 0xFD |
170 | #define MBOX_STOPCPU2 0xFE | 170 | #define MBOX_STOPCPU2 0xFE |
171 | 171 | ||
172 | #endif /* SMP */ | 172 | #endif /* SMP */ |
173 | 173 | ||
174 | #define NO_PROC_ID 0xFF | 174 | #define NO_PROC_ID 0xFF |
175 | 175 | ||
176 | #endif /* !(_SPARC_SMP_H) */ | 176 | #endif /* !(_SPARC_SMP_H) */ |
177 | 177 |
include/asm-sparc64/smp.h
1 | /* smp.h: Sparc64 specific SMP stuff. | 1 | /* smp.h: Sparc64 specific SMP stuff. |
2 | * | 2 | * |
3 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | 3 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifndef _SPARC64_SMP_H | 6 | #ifndef _SPARC64_SMP_H |
7 | #define _SPARC64_SMP_H | 7 | #define _SPARC64_SMP_H |
8 | 8 | ||
9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
10 | #include <linux/threads.h> | 10 | #include <linux/threads.h> |
11 | #include <asm/asi.h> | 11 | #include <asm/asi.h> |
12 | #include <asm/starfire.h> | 12 | #include <asm/starfire.h> |
13 | #include <asm/spitfire.h> | 13 | #include <asm/spitfire.h> |
14 | 14 | ||
15 | #ifndef __ASSEMBLY__ | 15 | #ifndef __ASSEMBLY__ |
16 | 16 | ||
17 | #include <linux/cpumask.h> | 17 | #include <linux/cpumask.h> |
18 | #include <linux/cache.h> | 18 | #include <linux/cache.h> |
19 | 19 | ||
20 | #endif /* !(__ASSEMBLY__) */ | 20 | #endif /* !(__ASSEMBLY__) */ |
21 | 21 | ||
22 | #ifdef CONFIG_SMP | 22 | #ifdef CONFIG_SMP |
23 | 23 | ||
24 | #ifndef __ASSEMBLY__ | 24 | #ifndef __ASSEMBLY__ |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Private routines/data | 27 | * Private routines/data |
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <asm/bitops.h> | 30 | #include <asm/bitops.h> |
31 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
32 | 32 | ||
33 | extern cpumask_t phys_cpu_present_map; | 33 | extern cpumask_t phys_cpu_present_map; |
34 | #define cpu_possible_map phys_cpu_present_map | 34 | #define cpu_possible_map phys_cpu_present_map |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * General functions that each host system must provide. | 37 | * General functions that each host system must provide. |
38 | */ | 38 | */ |
39 | 39 | ||
40 | static __inline__ int hard_smp_processor_id(void) | 40 | static __inline__ int hard_smp_processor_id(void) |
41 | { | 41 | { |
42 | if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 42 | if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
43 | unsigned long cfg, ver; | 43 | unsigned long cfg, ver; |
44 | __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); | 44 | __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); |
45 | if ((ver >> 32) == 0x003e0016) { | 45 | if ((ver >> 32) == 0x003e0016) { |
46 | __asm__ __volatile__("ldxa [%%g0] %1, %0" | 46 | __asm__ __volatile__("ldxa [%%g0] %1, %0" |
47 | : "=r" (cfg) | 47 | : "=r" (cfg) |
48 | : "i" (ASI_JBUS_CONFIG)); | 48 | : "i" (ASI_JBUS_CONFIG)); |
49 | return ((cfg >> 17) & 0x1f); | 49 | return ((cfg >> 17) & 0x1f); |
50 | } else { | 50 | } else { |
51 | __asm__ __volatile__("ldxa [%%g0] %1, %0" | 51 | __asm__ __volatile__("ldxa [%%g0] %1, %0" |
52 | : "=r" (cfg) | 52 | : "=r" (cfg) |
53 | : "i" (ASI_SAFARI_CONFIG)); | 53 | : "i" (ASI_SAFARI_CONFIG)); |
54 | return ((cfg >> 17) & 0x3ff); | 54 | return ((cfg >> 17) & 0x3ff); |
55 | } | 55 | } |
56 | } else if (this_is_starfire != 0) { | 56 | } else if (this_is_starfire != 0) { |
57 | return starfire_hard_smp_processor_id(); | 57 | return starfire_hard_smp_processor_id(); |
58 | } else { | 58 | } else { |
59 | unsigned long upaconfig; | 59 | unsigned long upaconfig; |
60 | __asm__ __volatile__("ldxa [%%g0] %1, %0" | 60 | __asm__ __volatile__("ldxa [%%g0] %1, %0" |
61 | : "=r" (upaconfig) | 61 | : "=r" (upaconfig) |
62 | : "i" (ASI_UPA_CONFIG)); | 62 | : "i" (ASI_UPA_CONFIG)); |
63 | return ((upaconfig >> 17) & 0x1f); | 63 | return ((upaconfig >> 17) & 0x1f); |
64 | } | 64 | } |
65 | } | 65 | } |
66 | 66 | ||
67 | #define smp_processor_id() (current_thread_info()->cpu) | 67 | #define raw_smp_processor_id() (current_thread_info()->cpu) |
68 | 68 | ||
69 | #endif /* !(__ASSEMBLY__) */ | 69 | #endif /* !(__ASSEMBLY__) */ |
70 | 70 | ||
71 | #endif /* !(CONFIG_SMP) */ | 71 | #endif /* !(CONFIG_SMP) */ |
72 | 72 | ||
73 | #define NO_PROC_ID 0xFF | 73 | #define NO_PROC_ID 0xFF |
74 | 74 | ||
75 | #endif /* !(_SPARC64_SMP_H) */ | 75 | #endif /* !(_SPARC64_SMP_H) */ |
76 | 76 |
include/asm-um/smp.h
1 | #ifndef __UM_SMP_H | 1 | #ifndef __UM_SMP_H |
2 | #define __UM_SMP_H | 2 | #define __UM_SMP_H |
3 | 3 | ||
4 | #ifdef CONFIG_SMP | 4 | #ifdef CONFIG_SMP |
5 | 5 | ||
6 | #include "linux/config.h" | 6 | #include "linux/config.h" |
7 | #include "linux/bitops.h" | 7 | #include "linux/bitops.h" |
8 | #include "asm/current.h" | 8 | #include "asm/current.h" |
9 | #include "linux/cpumask.h" | 9 | #include "linux/cpumask.h" |
10 | 10 | ||
11 | #define smp_processor_id() (current_thread->cpu) | 11 | #define raw_smp_processor_id() (current_thread->cpu) |
12 | |||
12 | #define cpu_logical_map(n) (n) | 13 | #define cpu_logical_map(n) (n) |
13 | #define cpu_number_map(n) (n) | 14 | #define cpu_number_map(n) (n) |
14 | #define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ | 15 | #define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ |
15 | extern int hard_smp_processor_id(void); | 16 | extern int hard_smp_processor_id(void); |
16 | #define NO_PROC_ID -1 | 17 | #define NO_PROC_ID -1 |
17 | 18 | ||
18 | extern int ncpus; | 19 | extern int ncpus; |
19 | 20 | ||
20 | 21 | ||
21 | extern inline void smp_cpus_done(unsigned int maxcpus) | 22 | extern inline void smp_cpus_done(unsigned int maxcpus) |
22 | { | 23 | { |
23 | } | 24 | } |
24 | 25 | ||
25 | #endif | 26 | #endif |
26 | 27 | ||
27 | #endif | 28 | #endif |
28 | 29 |
include/asm-x86_64/smp.h
1 | #ifndef __ASM_SMP_H | 1 | #ifndef __ASM_SMP_H |
2 | #define __ASM_SMP_H | 2 | #define __ASM_SMP_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * We need the APIC definitions automatically as part of 'smp.h' | 5 | * We need the APIC definitions automatically as part of 'smp.h' |
6 | */ | 6 | */ |
7 | #ifndef __ASSEMBLY__ | 7 | #ifndef __ASSEMBLY__ |
8 | #include <linux/config.h> | 8 | #include <linux/config.h> |
9 | #include <linux/threads.h> | 9 | #include <linux/threads.h> |
10 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
11 | #include <linux/bitops.h> | 11 | #include <linux/bitops.h> |
12 | extern int disable_apic; | 12 | extern int disable_apic; |
13 | #endif | 13 | #endif |
14 | 14 | ||
15 | #ifdef CONFIG_X86_LOCAL_APIC | 15 | #ifdef CONFIG_X86_LOCAL_APIC |
16 | #ifndef __ASSEMBLY__ | 16 | #ifndef __ASSEMBLY__ |
17 | #include <asm/fixmap.h> | 17 | #include <asm/fixmap.h> |
18 | #include <asm/mpspec.h> | 18 | #include <asm/mpspec.h> |
19 | #ifdef CONFIG_X86_IO_APIC | 19 | #ifdef CONFIG_X86_IO_APIC |
20 | #include <asm/io_apic.h> | 20 | #include <asm/io_apic.h> |
21 | #endif | 21 | #endif |
22 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/thread_info.h> | 23 | #include <asm/thread_info.h> |
24 | #endif | 24 | #endif |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | #ifdef CONFIG_SMP | 27 | #ifdef CONFIG_SMP |
28 | #ifndef ASSEMBLY | 28 | #ifndef ASSEMBLY |
29 | 29 | ||
30 | #include <asm/pda.h> | 30 | #include <asm/pda.h> |
31 | 31 | ||
32 | struct pt_regs; | 32 | struct pt_regs; |
33 | 33 | ||
34 | extern cpumask_t cpu_present_mask; | 34 | extern cpumask_t cpu_present_mask; |
35 | extern cpumask_t cpu_possible_map; | 35 | extern cpumask_t cpu_possible_map; |
36 | extern cpumask_t cpu_online_map; | 36 | extern cpumask_t cpu_online_map; |
37 | extern cpumask_t cpu_callout_map; | 37 | extern cpumask_t cpu_callout_map; |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Private routines/data | 40 | * Private routines/data |
41 | */ | 41 | */ |
42 | 42 | ||
43 | extern void smp_alloc_memory(void); | 43 | extern void smp_alloc_memory(void); |
44 | extern volatile unsigned long smp_invalidate_needed; | 44 | extern volatile unsigned long smp_invalidate_needed; |
45 | extern int pic_mode; | 45 | extern int pic_mode; |
46 | extern int smp_num_siblings; | 46 | extern int smp_num_siblings; |
47 | extern void smp_flush_tlb(void); | 47 | extern void smp_flush_tlb(void); |
48 | extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); | 48 | extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); |
49 | extern void smp_send_reschedule(int cpu); | 49 | extern void smp_send_reschedule(int cpu); |
50 | extern void smp_invalidate_rcv(void); /* Process an NMI */ | 50 | extern void smp_invalidate_rcv(void); /* Process an NMI */ |
51 | extern void zap_low_mappings(void); | 51 | extern void zap_low_mappings(void); |
52 | void smp_stop_cpu(void); | 52 | void smp_stop_cpu(void); |
53 | extern cpumask_t cpu_sibling_map[NR_CPUS]; | 53 | extern cpumask_t cpu_sibling_map[NR_CPUS]; |
54 | extern cpumask_t cpu_core_map[NR_CPUS]; | 54 | extern cpumask_t cpu_core_map[NR_CPUS]; |
55 | extern u8 phys_proc_id[NR_CPUS]; | 55 | extern u8 phys_proc_id[NR_CPUS]; |
56 | extern u8 cpu_core_id[NR_CPUS]; | 56 | extern u8 cpu_core_id[NR_CPUS]; |
57 | 57 | ||
58 | #define SMP_TRAMPOLINE_BASE 0x6000 | 58 | #define SMP_TRAMPOLINE_BASE 0x6000 |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * On x86 all CPUs are mapped 1:1 to the APIC space. | 61 | * On x86 all CPUs are mapped 1:1 to the APIC space. |
62 | * This simplifies scheduling and IPI sending and | 62 | * This simplifies scheduling and IPI sending and |
63 | * compresses data structures. | 63 | * compresses data structures. |
64 | */ | 64 | */ |
65 | 65 | ||
66 | static inline int num_booting_cpus(void) | 66 | static inline int num_booting_cpus(void) |
67 | { | 67 | { |
68 | return cpus_weight(cpu_callout_map); | 68 | return cpus_weight(cpu_callout_map); |
69 | } | 69 | } |
70 | 70 | ||
71 | #define __smp_processor_id() read_pda(cpunumber) | 71 | #define raw_smp_processor_id() read_pda(cpunumber) |
72 | 72 | ||
73 | extern __inline int hard_smp_processor_id(void) | 73 | extern __inline int hard_smp_processor_id(void) |
74 | { | 74 | { |
75 | /* we don't want to mark this access volatile - bad code generation */ | 75 | /* we don't want to mark this access volatile - bad code generation */ |
76 | return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | 76 | return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); |
77 | } | 77 | } |
78 | 78 | ||
79 | extern int safe_smp_processor_id(void); | 79 | extern int safe_smp_processor_id(void); |
80 | 80 | ||
81 | #endif /* !ASSEMBLY */ | 81 | #endif /* !ASSEMBLY */ |
82 | 82 | ||
83 | #define NO_PROC_ID 0xFF /* No processor magic marker */ | 83 | #define NO_PROC_ID 0xFF /* No processor magic marker */ |
84 | 84 | ||
85 | #endif | 85 | #endif |
86 | 86 | ||
87 | #ifndef ASSEMBLY | 87 | #ifndef ASSEMBLY |
88 | /* | 88 | /* |
89 | * Some lowlevel functions might want to know about | 89 | * Some lowlevel functions might want to know about |
90 | * the real APIC ID <-> CPU # mapping. | 90 | * the real APIC ID <-> CPU # mapping. |
91 | */ | 91 | */ |
92 | extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ | 92 | extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ |
93 | extern u8 x86_cpu_to_log_apicid[NR_CPUS]; | 93 | extern u8 x86_cpu_to_log_apicid[NR_CPUS]; |
94 | extern u8 bios_cpu_apicid[]; | 94 | extern u8 bios_cpu_apicid[]; |
95 | 95 | ||
96 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 96 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) |
97 | { | 97 | { |
98 | return cpus_addr(cpumask)[0]; | 98 | return cpus_addr(cpumask)[0]; |
99 | } | 99 | } |
100 | 100 | ||
101 | static inline int cpu_present_to_apicid(int mps_cpu) | 101 | static inline int cpu_present_to_apicid(int mps_cpu) |
102 | { | 102 | { |
103 | if (mps_cpu < NR_CPUS) | 103 | if (mps_cpu < NR_CPUS) |
104 | return (int)bios_cpu_apicid[mps_cpu]; | 104 | return (int)bios_cpu_apicid[mps_cpu]; |
105 | else | 105 | else |
106 | return BAD_APICID; | 106 | return BAD_APICID; |
107 | } | 107 | } |
108 | 108 | ||
109 | #endif /* !ASSEMBLY */ | 109 | #endif /* !ASSEMBLY */ |
110 | 110 | ||
111 | #ifndef CONFIG_SMP | 111 | #ifndef CONFIG_SMP |
112 | #define stack_smp_processor_id() 0 | 112 | #define stack_smp_processor_id() 0 |
113 | #define safe_smp_processor_id() 0 | 113 | #define safe_smp_processor_id() 0 |
114 | #define cpu_logical_map(x) (x) | 114 | #define cpu_logical_map(x) (x) |
115 | #else | 115 | #else |
116 | #include <asm/thread_info.h> | 116 | #include <asm/thread_info.h> |
117 | #define stack_smp_processor_id() \ | 117 | #define stack_smp_processor_id() \ |
118 | ({ \ | 118 | ({ \ |
119 | struct thread_info *ti; \ | 119 | struct thread_info *ti; \ |
120 | __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ | 120 | __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ |
121 | ti->cpu; \ | 121 | ti->cpu; \ |
122 | }) | 122 | }) |
123 | #endif | 123 | #endif |
124 | 124 | ||
125 | #ifndef __ASSEMBLY__ | 125 | #ifndef __ASSEMBLY__ |
126 | static __inline int logical_smp_processor_id(void) | 126 | static __inline int logical_smp_processor_id(void) |
127 | { | 127 | { |
128 | /* we don't want to mark this access volatile - bad code generation */ | 128 | /* we don't want to mark this access volatile - bad code generation */ |
129 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); | 129 | return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); |
130 | } | 130 | } |
131 | #endif | 131 | #endif |
132 | 132 | ||
133 | #endif | 133 | #endif |
134 | 134 | ||
135 | 135 |
include/linux/mmzone.h
1 | #ifndef _LINUX_MMZONE_H | 1 | #ifndef _LINUX_MMZONE_H |
2 | #define _LINUX_MMZONE_H | 2 | #define _LINUX_MMZONE_H |
3 | 3 | ||
4 | #ifdef __KERNEL__ | 4 | #ifdef __KERNEL__ |
5 | #ifndef __ASSEMBLY__ | 5 | #ifndef __ASSEMBLY__ |
6 | 6 | ||
7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
8 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
10 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
11 | #include <linux/cache.h> | 11 | #include <linux/cache.h> |
12 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <asm/atomic.h> | 15 | #include <asm/atomic.h> |
16 | 16 | ||
17 | /* Free memory management - zoned buddy allocator. */ | 17 | /* Free memory management - zoned buddy allocator. */ |
18 | #ifndef CONFIG_FORCE_MAX_ZONEORDER | 18 | #ifndef CONFIG_FORCE_MAX_ZONEORDER |
19 | #define MAX_ORDER 11 | 19 | #define MAX_ORDER 11 |
20 | #else | 20 | #else |
21 | #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER | 21 | #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER |
22 | #endif | 22 | #endif |
23 | 23 | ||
24 | struct free_area { | 24 | struct free_area { |
25 | struct list_head free_list; | 25 | struct list_head free_list; |
26 | unsigned long nr_free; | 26 | unsigned long nr_free; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct pglist_data; | 29 | struct pglist_data; |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. | 32 | * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. |
33 | * So add a wild amount of padding here to ensure that they fall into separate | 33 | * So add a wild amount of padding here to ensure that they fall into separate |
34 | * cachelines. There are very few zone structures in the machine, so space | 34 | * cachelines. There are very few zone structures in the machine, so space |
35 | * consumption is not a concern here. | 35 | * consumption is not a concern here. |
36 | */ | 36 | */ |
37 | #if defined(CONFIG_SMP) | 37 | #if defined(CONFIG_SMP) |
38 | struct zone_padding { | 38 | struct zone_padding { |
39 | char x[0]; | 39 | char x[0]; |
40 | } ____cacheline_maxaligned_in_smp; | 40 | } ____cacheline_maxaligned_in_smp; |
41 | #define ZONE_PADDING(name) struct zone_padding name; | 41 | #define ZONE_PADDING(name) struct zone_padding name; |
42 | #else | 42 | #else |
43 | #define ZONE_PADDING(name) | 43 | #define ZONE_PADDING(name) |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | struct per_cpu_pages { | 46 | struct per_cpu_pages { |
47 | int count; /* number of pages in the list */ | 47 | int count; /* number of pages in the list */ |
48 | int low; /* low watermark, refill needed */ | 48 | int low; /* low watermark, refill needed */ |
49 | int high; /* high watermark, emptying needed */ | 49 | int high; /* high watermark, emptying needed */ |
50 | int batch; /* chunk size for buddy add/remove */ | 50 | int batch; /* chunk size for buddy add/remove */ |
51 | struct list_head list; /* the list of pages */ | 51 | struct list_head list; /* the list of pages */ |
52 | }; | 52 | }; |
53 | 53 | ||
54 | struct per_cpu_pageset { | 54 | struct per_cpu_pageset { |
55 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ | 55 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ |
56 | #ifdef CONFIG_NUMA | 56 | #ifdef CONFIG_NUMA |
57 | unsigned long numa_hit; /* allocated in intended node */ | 57 | unsigned long numa_hit; /* allocated in intended node */ |
58 | unsigned long numa_miss; /* allocated in non intended node */ | 58 | unsigned long numa_miss; /* allocated in non intended node */ |
59 | unsigned long numa_foreign; /* was intended here, hit elsewhere */ | 59 | unsigned long numa_foreign; /* was intended here, hit elsewhere */ |
60 | unsigned long interleave_hit; /* interleaver prefered this zone */ | 60 | unsigned long interleave_hit; /* interleaver prefered this zone */ |
61 | unsigned long local_node; /* allocation from local node */ | 61 | unsigned long local_node; /* allocation from local node */ |
62 | unsigned long other_node; /* allocation from other node */ | 62 | unsigned long other_node; /* allocation from other node */ |
63 | #endif | 63 | #endif |
64 | } ____cacheline_aligned_in_smp; | 64 | } ____cacheline_aligned_in_smp; |
65 | 65 | ||
66 | #define ZONE_DMA 0 | 66 | #define ZONE_DMA 0 |
67 | #define ZONE_NORMAL 1 | 67 | #define ZONE_NORMAL 1 |
68 | #define ZONE_HIGHMEM 2 | 68 | #define ZONE_HIGHMEM 2 |
69 | 69 | ||
70 | #define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ | 70 | #define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ |
71 | #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ | 71 | #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ |
72 | 72 | ||
73 | 73 | ||
74 | /* | 74 | /* |
75 | * When a memory allocation must conform to specific limitations (such | 75 | * When a memory allocation must conform to specific limitations (such |
76 | * as being suitable for DMA) the caller will pass in hints to the | 76 | * as being suitable for DMA) the caller will pass in hints to the |
77 | * allocator in the gfp_mask, in the zone modifier bits. These bits | 77 | * allocator in the gfp_mask, in the zone modifier bits. These bits |
78 | * are used to select a priority ordered list of memory zones which | 78 | * are used to select a priority ordered list of memory zones which |
79 | * match the requested limits. GFP_ZONEMASK defines which bits within | 79 | * match the requested limits. GFP_ZONEMASK defines which bits within |
80 | * the gfp_mask should be considered as zone modifiers. Each valid | 80 | * the gfp_mask should be considered as zone modifiers. Each valid |
81 | * combination of the zone modifier bits has a corresponding list | 81 | * combination of the zone modifier bits has a corresponding list |
82 | * of zones (in node_zonelists). Thus for two zone modifiers there | 82 | * of zones (in node_zonelists). Thus for two zone modifiers there |
83 | * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will | 83 | * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will |
84 | * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible | 84 | * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible |
85 | * combinations of zone modifiers in "zone modifier space". | 85 | * combinations of zone modifiers in "zone modifier space". |
86 | */ | 86 | */ |
87 | #define GFP_ZONEMASK 0x03 | 87 | #define GFP_ZONEMASK 0x03 |
88 | /* | 88 | /* |
89 | * As an optimisation any zone modifier bits which are only valid when | 89 | * As an optimisation any zone modifier bits which are only valid when |
90 | * no other zone modifier bits are set (loners) should be placed in | 90 | * no other zone modifier bits are set (loners) should be placed in |
91 | * the highest order bits of this field. This allows us to reduce the | 91 | * the highest order bits of this field. This allows us to reduce the |
92 | * extent of the zonelists thus saving space. For example in the case | 92 | * extent of the zonelists thus saving space. For example in the case |
93 | * of three zone modifier bits, we could require up to eight zonelists. | 93 | * of three zone modifier bits, we could require up to eight zonelists. |
94 | * If the left most zone modifier is a "loner" then the highest valid | 94 | * If the left most zone modifier is a "loner" then the highest valid |
95 | * zonelist would be four allowing us to allocate only five zonelists. | 95 | * zonelist would be four allowing us to allocate only five zonelists. |
96 | * Use the first form when the left most bit is not a "loner", otherwise | 96 | * Use the first form when the left most bit is not a "loner", otherwise |
97 | * use the second. | 97 | * use the second. |
98 | */ | 98 | */ |
99 | /* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ | 99 | /* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ |
100 | #define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ | 100 | #define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * On machines where it is needed (eg PCs) we divide physical memory | 103 | * On machines where it is needed (eg PCs) we divide physical memory |
104 | * into multiple physical zones. On a PC we have 3 zones: | 104 | * into multiple physical zones. On a PC we have 3 zones: |
105 | * | 105 | * |
106 | * ZONE_DMA < 16 MB ISA DMA capable memory | 106 | * ZONE_DMA < 16 MB ISA DMA capable memory |
107 | * ZONE_NORMAL 16-896 MB direct mapped by the kernel | 107 | * ZONE_NORMAL 16-896 MB direct mapped by the kernel |
108 | * ZONE_HIGHMEM > 896 MB only page cache and user processes | 108 | * ZONE_HIGHMEM > 896 MB only page cache and user processes |
109 | */ | 109 | */ |
110 | 110 | ||
111 | struct zone { | 111 | struct zone { |
112 | /* Fields commonly accessed by the page allocator */ | 112 | /* Fields commonly accessed by the page allocator */ |
113 | unsigned long free_pages; | 113 | unsigned long free_pages; |
114 | unsigned long pages_min, pages_low, pages_high; | 114 | unsigned long pages_min, pages_low, pages_high; |
115 | /* | 115 | /* |
116 | * We don't know if the memory that we're going to allocate will be freeable | 116 | * We don't know if the memory that we're going to allocate will be freeable |
117 | * or/and it will be released eventually, so to avoid totally wasting several | 117 | * or/and it will be released eventually, so to avoid totally wasting several |
118 | * GB of ram we must reserve some of the lower zone memory (otherwise we risk | 118 | * GB of ram we must reserve some of the lower zone memory (otherwise we risk |
119 | * to run OOM on the lower zones despite there's tons of freeable ram | 119 | * to run OOM on the lower zones despite there's tons of freeable ram |
120 | * on the higher zones). This array is recalculated at runtime if the | 120 | * on the higher zones). This array is recalculated at runtime if the |
121 | * sysctl_lowmem_reserve_ratio sysctl changes. | 121 | * sysctl_lowmem_reserve_ratio sysctl changes. |
122 | */ | 122 | */ |
123 | unsigned long lowmem_reserve[MAX_NR_ZONES]; | 123 | unsigned long lowmem_reserve[MAX_NR_ZONES]; |
124 | 124 | ||
125 | struct per_cpu_pageset pageset[NR_CPUS]; | 125 | struct per_cpu_pageset pageset[NR_CPUS]; |
126 | 126 | ||
127 | /* | 127 | /* |
128 | * free areas of different sizes | 128 | * free areas of different sizes |
129 | */ | 129 | */ |
130 | spinlock_t lock; | 130 | spinlock_t lock; |
131 | struct free_area free_area[MAX_ORDER]; | 131 | struct free_area free_area[MAX_ORDER]; |
132 | 132 | ||
133 | 133 | ||
134 | ZONE_PADDING(_pad1_) | 134 | ZONE_PADDING(_pad1_) |
135 | 135 | ||
136 | /* Fields commonly accessed by the page reclaim scanner */ | 136 | /* Fields commonly accessed by the page reclaim scanner */ |
137 | spinlock_t lru_lock; | 137 | spinlock_t lru_lock; |
138 | struct list_head active_list; | 138 | struct list_head active_list; |
139 | struct list_head inactive_list; | 139 | struct list_head inactive_list; |
140 | unsigned long nr_scan_active; | 140 | unsigned long nr_scan_active; |
141 | unsigned long nr_scan_inactive; | 141 | unsigned long nr_scan_inactive; |
142 | unsigned long nr_active; | 142 | unsigned long nr_active; |
143 | unsigned long nr_inactive; | 143 | unsigned long nr_inactive; |
144 | unsigned long pages_scanned; /* since last reclaim */ | 144 | unsigned long pages_scanned; /* since last reclaim */ |
145 | int all_unreclaimable; /* All pages pinned */ | 145 | int all_unreclaimable; /* All pages pinned */ |
146 | 146 | ||
147 | /* | 147 | /* |
148 | * prev_priority holds the scanning priority for this zone. It is | 148 | * prev_priority holds the scanning priority for this zone. It is |
149 | * defined as the scanning priority at which we achieved our reclaim | 149 | * defined as the scanning priority at which we achieved our reclaim |
150 | * target at the previous try_to_free_pages() or balance_pgdat() | 150 | * target at the previous try_to_free_pages() or balance_pgdat() |
151 | * invokation. | 151 | * invokation. |
152 | * | 152 | * |
153 | * We use prev_priority as a measure of how much stress page reclaim is | 153 | * We use prev_priority as a measure of how much stress page reclaim is |
154 | * under - it drives the swappiness decision: whether to unmap mapped | 154 | * under - it drives the swappiness decision: whether to unmap mapped |
155 | * pages. | 155 | * pages. |
156 | * | 156 | * |
157 | * temp_priority is used to remember the scanning priority at which | 157 | * temp_priority is used to remember the scanning priority at which |
158 | * this zone was successfully refilled to free_pages == pages_high. | 158 | * this zone was successfully refilled to free_pages == pages_high. |
159 | * | 159 | * |
160 | * Access to both these fields is quite racy even on uniprocessor. But | 160 | * Access to both these fields is quite racy even on uniprocessor. But |
161 | * it is expected to average out OK. | 161 | * it is expected to average out OK. |
162 | */ | 162 | */ |
163 | int temp_priority; | 163 | int temp_priority; |
164 | int prev_priority; | 164 | int prev_priority; |
165 | 165 | ||
166 | 166 | ||
167 | ZONE_PADDING(_pad2_) | 167 | ZONE_PADDING(_pad2_) |
168 | /* Rarely used or read-mostly fields */ | 168 | /* Rarely used or read-mostly fields */ |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * wait_table -- the array holding the hash table | 171 | * wait_table -- the array holding the hash table |
172 | * wait_table_size -- the size of the hash table array | 172 | * wait_table_size -- the size of the hash table array |
173 | * wait_table_bits -- wait_table_size == (1 << wait_table_bits) | 173 | * wait_table_bits -- wait_table_size == (1 << wait_table_bits) |
174 | * | 174 | * |
175 | * The purpose of all these is to keep track of the people | 175 | * The purpose of all these is to keep track of the people |
176 | * waiting for a page to become available and make them | 176 | * waiting for a page to become available and make them |
177 | * runnable again when possible. The trouble is that this | 177 | * runnable again when possible. The trouble is that this |
178 | * consumes a lot of space, especially when so few things | 178 | * consumes a lot of space, especially when so few things |
179 | * wait on pages at a given time. So instead of using | 179 | * wait on pages at a given time. So instead of using |
180 | * per-page waitqueues, we use a waitqueue hash table. | 180 | * per-page waitqueues, we use a waitqueue hash table. |
181 | * | 181 | * |
182 | * The bucket discipline is to sleep on the same queue when | 182 | * The bucket discipline is to sleep on the same queue when |
183 | * colliding and wake all in that wait queue when removing. | 183 | * colliding and wake all in that wait queue when removing. |
184 | * When something wakes, it must check to be sure its page is | 184 | * When something wakes, it must check to be sure its page is |
185 | * truly available, a la thundering herd. The cost of a | 185 | * truly available, a la thundering herd. The cost of a |
186 | * collision is great, but given the expected load of the | 186 | * collision is great, but given the expected load of the |
187 | * table, they should be so rare as to be outweighed by the | 187 | * table, they should be so rare as to be outweighed by the |
188 | * benefits from the saved space. | 188 | * benefits from the saved space. |
189 | * | 189 | * |
190 | * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the | 190 | * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the |
191 | * primary users of these fields, and in mm/page_alloc.c | 191 | * primary users of these fields, and in mm/page_alloc.c |
192 | * free_area_init_core() performs the initialization of them. | 192 | * free_area_init_core() performs the initialization of them. |
193 | */ | 193 | */ |
194 | wait_queue_head_t * wait_table; | 194 | wait_queue_head_t * wait_table; |
195 | unsigned long wait_table_size; | 195 | unsigned long wait_table_size; |
196 | unsigned long wait_table_bits; | 196 | unsigned long wait_table_bits; |
197 | 197 | ||
198 | /* | 198 | /* |
199 | * Discontig memory support fields. | 199 | * Discontig memory support fields. |
200 | */ | 200 | */ |
201 | struct pglist_data *zone_pgdat; | 201 | struct pglist_data *zone_pgdat; |
202 | struct page *zone_mem_map; | 202 | struct page *zone_mem_map; |
203 | /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ | 203 | /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ |
204 | unsigned long zone_start_pfn; | 204 | unsigned long zone_start_pfn; |
205 | 205 | ||
206 | unsigned long spanned_pages; /* total size, including holes */ | 206 | unsigned long spanned_pages; /* total size, including holes */ |
207 | unsigned long present_pages; /* amount of memory (excluding holes) */ | 207 | unsigned long present_pages; /* amount of memory (excluding holes) */ |
208 | 208 | ||
209 | /* | 209 | /* |
210 | * rarely used fields: | 210 | * rarely used fields: |
211 | */ | 211 | */ |
212 | char *name; | 212 | char *name; |
213 | } ____cacheline_maxaligned_in_smp; | 213 | } ____cacheline_maxaligned_in_smp; |
214 | 214 | ||
215 | 215 | ||
216 | /* | 216 | /* |
217 | * The "priority" of VM scanning is how much of the queues we will scan in one | 217 | * The "priority" of VM scanning is how much of the queues we will scan in one |
218 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the | 218 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the |
219 | * queues ("queue_length >> 12") during an aging round. | 219 | * queues ("queue_length >> 12") during an aging round. |
220 | */ | 220 | */ |
221 | #define DEF_PRIORITY 12 | 221 | #define DEF_PRIORITY 12 |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * One allocation request operates on a zonelist. A zonelist | 224 | * One allocation request operates on a zonelist. A zonelist |
225 | * is a list of zones, the first one is the 'goal' of the | 225 | * is a list of zones, the first one is the 'goal' of the |
226 | * allocation, the other zones are fallback zones, in decreasing | 226 | * allocation, the other zones are fallback zones, in decreasing |
227 | * priority. | 227 | * priority. |
228 | * | 228 | * |
229 | * Right now a zonelist takes up less than a cacheline. We never | 229 | * Right now a zonelist takes up less than a cacheline. We never |
230 | * modify it apart from boot-up, and only a few indices are used, | 230 | * modify it apart from boot-up, and only a few indices are used, |
231 | * so despite the zonelist table being relatively big, the cache | 231 | * so despite the zonelist table being relatively big, the cache |
232 | * footprint of this construct is very small. | 232 | * footprint of this construct is very small. |
233 | */ | 233 | */ |
234 | struct zonelist { | 234 | struct zonelist { |
235 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited | 235 | struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited |
236 | }; | 236 | }; |
237 | 237 | ||
238 | 238 | ||
239 | /* | 239 | /* |
240 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM | 240 | * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM |
241 | * (mostly NUMA machines?) to denote a higher-level memory zone than the | 241 | * (mostly NUMA machines?) to denote a higher-level memory zone than the |
242 | * zone denotes. | 242 | * zone denotes. |
243 | * | 243 | * |
244 | * On NUMA machines, each NUMA node would have a pg_data_t to describe | 244 | * On NUMA machines, each NUMA node would have a pg_data_t to describe |
245 | * it's memory layout. | 245 | * it's memory layout. |
246 | * | 246 | * |
247 | * Memory statistics and page replacement data structures are maintained on a | 247 | * Memory statistics and page replacement data structures are maintained on a |
248 | * per-zone basis. | 248 | * per-zone basis. |
249 | */ | 249 | */ |
250 | struct bootmem_data; | 250 | struct bootmem_data; |
251 | typedef struct pglist_data { | 251 | typedef struct pglist_data { |
252 | struct zone node_zones[MAX_NR_ZONES]; | 252 | struct zone node_zones[MAX_NR_ZONES]; |
253 | struct zonelist node_zonelists[GFP_ZONETYPES]; | 253 | struct zonelist node_zonelists[GFP_ZONETYPES]; |
254 | int nr_zones; | 254 | int nr_zones; |
255 | struct page *node_mem_map; | 255 | struct page *node_mem_map; |
256 | struct bootmem_data *bdata; | 256 | struct bootmem_data *bdata; |
257 | unsigned long node_start_pfn; | 257 | unsigned long node_start_pfn; |
258 | unsigned long node_present_pages; /* total number of physical pages */ | 258 | unsigned long node_present_pages; /* total number of physical pages */ |
259 | unsigned long node_spanned_pages; /* total size of physical page | 259 | unsigned long node_spanned_pages; /* total size of physical page |
260 | range, including holes */ | 260 | range, including holes */ |
261 | int node_id; | 261 | int node_id; |
262 | struct pglist_data *pgdat_next; | 262 | struct pglist_data *pgdat_next; |
263 | wait_queue_head_t kswapd_wait; | 263 | wait_queue_head_t kswapd_wait; |
264 | struct task_struct *kswapd; | 264 | struct task_struct *kswapd; |
265 | int kswapd_max_order; | 265 | int kswapd_max_order; |
266 | } pg_data_t; | 266 | } pg_data_t; |
267 | 267 | ||
268 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) | 268 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) |
269 | #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) | 269 | #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) |
270 | 270 | ||
271 | extern struct pglist_data *pgdat_list; | 271 | extern struct pglist_data *pgdat_list; |
272 | 272 | ||
273 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | 273 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, |
274 | unsigned long *free, struct pglist_data *pgdat); | 274 | unsigned long *free, struct pglist_data *pgdat); |
275 | void get_zone_counts(unsigned long *active, unsigned long *inactive, | 275 | void get_zone_counts(unsigned long *active, unsigned long *inactive, |
276 | unsigned long *free); | 276 | unsigned long *free); |
277 | void build_all_zonelists(void); | 277 | void build_all_zonelists(void); |
278 | void wakeup_kswapd(struct zone *zone, int order); | 278 | void wakeup_kswapd(struct zone *zone, int order); |
279 | int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 279 | int zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
280 | int alloc_type, int can_try_harder, int gfp_high); | 280 | int alloc_type, int can_try_harder, int gfp_high); |
281 | 281 | ||
282 | #ifdef CONFIG_HAVE_MEMORY_PRESENT | 282 | #ifdef CONFIG_HAVE_MEMORY_PRESENT |
283 | void memory_present(int nid, unsigned long start, unsigned long end); | 283 | void memory_present(int nid, unsigned long start, unsigned long end); |
284 | #else | 284 | #else |
285 | static inline void memory_present(int nid, unsigned long start, unsigned long end) {} | 285 | static inline void memory_present(int nid, unsigned long start, unsigned long end) {} |
286 | #endif | 286 | #endif |
287 | 287 | ||
288 | #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE | 288 | #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE |
289 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | 289 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); |
290 | #endif | 290 | #endif |
291 | 291 | ||
292 | /* | 292 | /* |
293 | * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. | 293 | * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. |
294 | */ | 294 | */ |
295 | #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) | 295 | #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) |
296 | 296 | ||
297 | /** | 297 | /** |
298 | * for_each_pgdat - helper macro to iterate over all nodes | 298 | * for_each_pgdat - helper macro to iterate over all nodes |
299 | * @pgdat - pointer to a pg_data_t variable | 299 | * @pgdat - pointer to a pg_data_t variable |
300 | * | 300 | * |
301 | * Meant to help with common loops of the form | 301 | * Meant to help with common loops of the form |
302 | * pgdat = pgdat_list; | 302 | * pgdat = pgdat_list; |
303 | * while(pgdat) { | 303 | * while(pgdat) { |
304 | * ... | 304 | * ... |
305 | * pgdat = pgdat->pgdat_next; | 305 | * pgdat = pgdat->pgdat_next; |
306 | * } | 306 | * } |
307 | */ | 307 | */ |
308 | #define for_each_pgdat(pgdat) \ | 308 | #define for_each_pgdat(pgdat) \ |
309 | for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) | 309 | for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) |
310 | 310 | ||
311 | /* | 311 | /* |
312 | * next_zone - helper magic for for_each_zone() | 312 | * next_zone - helper magic for for_each_zone() |
313 | * Thanks to William Lee Irwin III for this piece of ingenuity. | 313 | * Thanks to William Lee Irwin III for this piece of ingenuity. |
314 | */ | 314 | */ |
315 | static inline struct zone *next_zone(struct zone *zone) | 315 | static inline struct zone *next_zone(struct zone *zone) |
316 | { | 316 | { |
317 | pg_data_t *pgdat = zone->zone_pgdat; | 317 | pg_data_t *pgdat = zone->zone_pgdat; |
318 | 318 | ||
319 | if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) | 319 | if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) |
320 | zone++; | 320 | zone++; |
321 | else if (pgdat->pgdat_next) { | 321 | else if (pgdat->pgdat_next) { |
322 | pgdat = pgdat->pgdat_next; | 322 | pgdat = pgdat->pgdat_next; |
323 | zone = pgdat->node_zones; | 323 | zone = pgdat->node_zones; |
324 | } else | 324 | } else |
325 | zone = NULL; | 325 | zone = NULL; |
326 | 326 | ||
327 | return zone; | 327 | return zone; |
328 | } | 328 | } |
329 | 329 | ||
330 | /** | 330 | /** |
331 | * for_each_zone - helper macro to iterate over all memory zones | 331 | * for_each_zone - helper macro to iterate over all memory zones |
332 | * @zone - pointer to struct zone variable | 332 | * @zone - pointer to struct zone variable |
333 | * | 333 | * |
334 | * The user only needs to declare the zone variable, for_each_zone | 334 | * The user only needs to declare the zone variable, for_each_zone |
335 | * fills it in. This basically means for_each_zone() is an | 335 | * fills it in. This basically means for_each_zone() is an |
336 | * easier to read version of this piece of code: | 336 | * easier to read version of this piece of code: |
337 | * | 337 | * |
338 | * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) | 338 | * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) |
339 | * for (i = 0; i < MAX_NR_ZONES; ++i) { | 339 | * for (i = 0; i < MAX_NR_ZONES; ++i) { |
340 | * struct zone * z = pgdat->node_zones + i; | 340 | * struct zone * z = pgdat->node_zones + i; |
341 | * ... | 341 | * ... |
342 | * } | 342 | * } |
343 | * } | 343 | * } |
344 | */ | 344 | */ |
345 | #define for_each_zone(zone) \ | 345 | #define for_each_zone(zone) \ |
346 | for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) | 346 | for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) |
347 | 347 | ||
348 | static inline int is_highmem_idx(int idx) | 348 | static inline int is_highmem_idx(int idx) |
349 | { | 349 | { |
350 | return (idx == ZONE_HIGHMEM); | 350 | return (idx == ZONE_HIGHMEM); |
351 | } | 351 | } |
352 | 352 | ||
353 | static inline int is_normal_idx(int idx) | 353 | static inline int is_normal_idx(int idx) |
354 | { | 354 | { |
355 | return (idx == ZONE_NORMAL); | 355 | return (idx == ZONE_NORMAL); |
356 | } | 356 | } |
357 | /** | 357 | /** |
358 | * is_highmem - helper function to quickly check if a struct zone is a | 358 | * is_highmem - helper function to quickly check if a struct zone is a |
359 | * highmem zone or not. This is an attempt to keep references | 359 | * highmem zone or not. This is an attempt to keep references |
360 | * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. | 360 | * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. |
361 | * @zone - pointer to struct zone variable | 361 | * @zone - pointer to struct zone variable |
362 | */ | 362 | */ |
363 | static inline int is_highmem(struct zone *zone) | 363 | static inline int is_highmem(struct zone *zone) |
364 | { | 364 | { |
365 | return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; | 365 | return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; |
366 | } | 366 | } |
367 | 367 | ||
368 | static inline int is_normal(struct zone *zone) | 368 | static inline int is_normal(struct zone *zone) |
369 | { | 369 | { |
370 | return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; | 370 | return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; |
371 | } | 371 | } |
372 | 372 | ||
373 | /* These two functions are used to setup the per zone pages min values */ | 373 | /* These two functions are used to setup the per zone pages min values */ |
374 | struct ctl_table; | 374 | struct ctl_table; |
375 | struct file; | 375 | struct file; |
376 | int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, | 376 | int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, |
377 | void __user *, size_t *, loff_t *); | 377 | void __user *, size_t *, loff_t *); |
378 | extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; | 378 | extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; |
379 | int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, | 379 | int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, |
380 | void __user *, size_t *, loff_t *); | 380 | void __user *, size_t *, loff_t *); |
381 | 381 | ||
382 | #include <linux/topology.h> | 382 | #include <linux/topology.h> |
383 | /* Returns the number of the current Node. */ | 383 | /* Returns the number of the current Node. */ |
384 | #define numa_node_id() (cpu_to_node(_smp_processor_id())) | 384 | #define numa_node_id() (cpu_to_node(raw_smp_processor_id())) |
385 | 385 | ||
386 | #ifndef CONFIG_DISCONTIGMEM | 386 | #ifndef CONFIG_DISCONTIGMEM |
387 | 387 | ||
388 | extern struct pglist_data contig_page_data; | 388 | extern struct pglist_data contig_page_data; |
389 | #define NODE_DATA(nid) (&contig_page_data) | 389 | #define NODE_DATA(nid) (&contig_page_data) |
390 | #define NODE_MEM_MAP(nid) mem_map | 390 | #define NODE_MEM_MAP(nid) mem_map |
391 | #define MAX_NODES_SHIFT 1 | 391 | #define MAX_NODES_SHIFT 1 |
392 | #define pfn_to_nid(pfn) (0) | 392 | #define pfn_to_nid(pfn) (0) |
393 | 393 | ||
394 | #else /* CONFIG_DISCONTIGMEM */ | 394 | #else /* CONFIG_DISCONTIGMEM */ |
395 | 395 | ||
396 | #include <asm/mmzone.h> | 396 | #include <asm/mmzone.h> |
397 | 397 | ||
398 | #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) | 398 | #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) |
399 | /* | 399 | /* |
400 | * with 32 bit page->flags field, we reserve 8 bits for node/zone info. | 400 | * with 32 bit page->flags field, we reserve 8 bits for node/zone info. |
401 | * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. | 401 | * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. |
402 | */ | 402 | */ |
403 | #define MAX_NODES_SHIFT 6 | 403 | #define MAX_NODES_SHIFT 6 |
404 | #elif BITS_PER_LONG == 64 | 404 | #elif BITS_PER_LONG == 64 |
405 | /* | 405 | /* |
406 | * with 64 bit flags field, there's plenty of room. | 406 | * with 64 bit flags field, there's plenty of room. |
407 | */ | 407 | */ |
408 | #define MAX_NODES_SHIFT 10 | 408 | #define MAX_NODES_SHIFT 10 |
409 | #endif | 409 | #endif |
410 | 410 | ||
411 | #endif /* !CONFIG_DISCONTIGMEM */ | 411 | #endif /* !CONFIG_DISCONTIGMEM */ |
412 | 412 | ||
413 | #if NODES_SHIFT > MAX_NODES_SHIFT | 413 | #if NODES_SHIFT > MAX_NODES_SHIFT |
414 | #error NODES_SHIFT > MAX_NODES_SHIFT | 414 | #error NODES_SHIFT > MAX_NODES_SHIFT |
415 | #endif | 415 | #endif |
416 | 416 | ||
417 | /* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */ | 417 | /* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */ |
418 | #define MAX_ZONES_SHIFT 2 | 418 | #define MAX_ZONES_SHIFT 2 |
419 | 419 | ||
420 | #if ZONES_SHIFT > MAX_ZONES_SHIFT | 420 | #if ZONES_SHIFT > MAX_ZONES_SHIFT |
421 | #error ZONES_SHIFT > MAX_ZONES_SHIFT | 421 | #error ZONES_SHIFT > MAX_ZONES_SHIFT |
422 | #endif | 422 | #endif |
423 | 423 | ||
424 | #endif /* !__ASSEMBLY__ */ | 424 | #endif /* !__ASSEMBLY__ */ |
425 | #endif /* __KERNEL__ */ | 425 | #endif /* __KERNEL__ */ |
426 | #endif /* _LINUX_MMZONE_H */ | 426 | #endif /* _LINUX_MMZONE_H */ |
427 | 427 |
include/linux/smp.h
1 | #ifndef __LINUX_SMP_H | 1 | #ifndef __LINUX_SMP_H |
2 | #define __LINUX_SMP_H | 2 | #define __LINUX_SMP_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Generic SMP support | 5 | * Generic SMP support |
6 | * Alan Cox. <alan@redhat.com> | 6 | * Alan Cox. <alan@redhat.com> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
10 | 10 | ||
11 | extern void cpu_idle(void); | 11 | extern void cpu_idle(void); |
12 | 12 | ||
13 | #ifdef CONFIG_SMP | 13 | #ifdef CONFIG_SMP |
14 | 14 | ||
15 | #include <linux/preempt.h> | 15 | #include <linux/preempt.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/compiler.h> | 17 | #include <linux/compiler.h> |
18 | #include <linux/thread_info.h> | 18 | #include <linux/thread_info.h> |
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | #include <asm/bug.h> | 20 | #include <asm/bug.h> |
21 | 21 | ||
22 | /* | 22 | /* |
23 | * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc. | 23 | * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc. |
24 | * (defined in asm header): | 24 | * (defined in asm header): |
25 | */ | 25 | */ |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * stops all CPUs but the current one: | 28 | * stops all CPUs but the current one: |
29 | */ | 29 | */ |
30 | extern void smp_send_stop(void); | 30 | extern void smp_send_stop(void); |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * sends a 'reschedule' event to another CPU: | 33 | * sends a 'reschedule' event to another CPU: |
34 | */ | 34 | */ |
35 | extern void smp_send_reschedule(int cpu); | 35 | extern void smp_send_reschedule(int cpu); |
36 | 36 | ||
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Prepare machine for booting other CPUs. | 39 | * Prepare machine for booting other CPUs. |
40 | */ | 40 | */ |
41 | extern void smp_prepare_cpus(unsigned int max_cpus); | 41 | extern void smp_prepare_cpus(unsigned int max_cpus); |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Bring a CPU up | 44 | * Bring a CPU up |
45 | */ | 45 | */ |
46 | extern int __cpu_up(unsigned int cpunum); | 46 | extern int __cpu_up(unsigned int cpunum); |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * Final polishing of CPUs | 49 | * Final polishing of CPUs |
50 | */ | 50 | */ |
51 | extern void smp_cpus_done(unsigned int max_cpus); | 51 | extern void smp_cpus_done(unsigned int max_cpus); |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Call a function on all other processors | 54 | * Call a function on all other processors |
55 | */ | 55 | */ |
56 | extern int smp_call_function (void (*func) (void *info), void *info, | 56 | extern int smp_call_function (void (*func) (void *info), void *info, |
57 | int retry, int wait); | 57 | int retry, int wait); |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Call a function on all processors | 60 | * Call a function on all processors |
61 | */ | 61 | */ |
62 | static inline int on_each_cpu(void (*func) (void *info), void *info, | 62 | static inline int on_each_cpu(void (*func) (void *info), void *info, |
63 | int retry, int wait) | 63 | int retry, int wait) |
64 | { | 64 | { |
65 | int ret = 0; | 65 | int ret = 0; |
66 | 66 | ||
67 | preempt_disable(); | 67 | preempt_disable(); |
68 | ret = smp_call_function(func, info, retry, wait); | 68 | ret = smp_call_function(func, info, retry, wait); |
69 | func(info); | 69 | func(info); |
70 | preempt_enable(); | 70 | preempt_enable(); |
71 | return ret; | 71 | return ret; |
72 | } | 72 | } |
73 | 73 | ||
74 | #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ | 74 | #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ |
75 | #define MSG_ALL 0x8001 | 75 | #define MSG_ALL 0x8001 |
76 | 76 | ||
77 | #define MSG_INVALIDATE_TLB 0x0001 /* Remote processor TLB invalidate */ | 77 | #define MSG_INVALIDATE_TLB 0x0001 /* Remote processor TLB invalidate */ |
78 | #define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's | 78 | #define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's |
79 | * when rebooting | 79 | * when rebooting |
80 | */ | 80 | */ |
81 | #define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/ | 81 | #define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/ |
82 | #define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */ | 82 | #define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */ |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Mark the boot cpu "online" so that it can call console drivers in | 85 | * Mark the boot cpu "online" so that it can call console drivers in |
86 | * printk() and can access its per-cpu storage. | 86 | * printk() and can access its per-cpu storage. |
87 | */ | 87 | */ |
88 | void smp_prepare_boot_cpu(void); | 88 | void smp_prepare_boot_cpu(void); |
89 | 89 | ||
90 | #else /* !SMP */ | 90 | #else /* !SMP */ |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * These macros fold the SMP functionality into a single CPU system | 93 | * These macros fold the SMP functionality into a single CPU system |
94 | */ | 94 | */ |
95 | 95 | #define raw_smp_processor_id() 0 | |
96 | #if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT) | ||
97 | # define smp_processor_id() 0 | ||
98 | #endif | ||
99 | #define hard_smp_processor_id() 0 | 96 | #define hard_smp_processor_id() 0 |
100 | #define smp_call_function(func,info,retry,wait) ({ 0; }) | 97 | #define smp_call_function(func,info,retry,wait) ({ 0; }) |
101 | #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) | 98 | #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) |
102 | static inline void smp_send_reschedule(int cpu) { } | 99 | static inline void smp_send_reschedule(int cpu) { } |
103 | #define num_booting_cpus() 1 | 100 | #define num_booting_cpus() 1 |
104 | #define smp_prepare_boot_cpu() do {} while (0) | 101 | #define smp_prepare_boot_cpu() do {} while (0) |
105 | 102 | ||
106 | #endif /* !SMP */ | 103 | #endif /* !SMP */ |
107 | 104 | ||
108 | /* | 105 | /* |
109 | * DEBUG_PREEMPT support: check whether smp_processor_id() is being | 106 | * smp_processor_id(): get the current CPU ID. |
110 | * used in a preemption-safe way. | ||
111 | * | 107 | * |
112 | * An architecture has to enable this debugging code explicitly. | 108 | * if DEBUG_PREEMPT is enabled the we check whether it is |
113 | * It can do so by renaming the smp_processor_id() macro to | 109 | * used in a preemption-safe way. (smp_processor_id() is safe |
114 | * __smp_processor_id(). This should only be done after some minimal | 110 | * if it's used in a preemption-off critical section, or in |
115 | * testing, because usually there are a number of false positives | 111 | * a thread that is bound to the current CPU.) |
116 | * that an architecture will trigger. | ||
117 | * | 112 | * |
118 | * To fix a false positive (i.e. smp_processor_id() use that the | 113 | * NOTE: raw_smp_processor_id() is for internal use only |
119 | * debugging code reports but which use for some reason is legal), | 114 | * (smp_processor_id() is the preferred variant), but in rare |
120 | * change the smp_processor_id() reference to _smp_processor_id(), | 115 | * instances it might also be used to turn off false positives |
121 | * which is the nondebug variant. NOTE: don't use this to hack around | 116 | * (i.e. smp_processor_id() use that the debugging code reports but |
122 | * real bugs. | 117 | * which use for some reason is legal). Don't use this to hack around |
118 | * the warning message, as your code might not work under PREEMPT. | ||
123 | */ | 119 | */ |
124 | #ifdef __smp_processor_id | 120 | #ifdef CONFIG_DEBUG_PREEMPT |
125 | # if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 121 | extern unsigned int debug_smp_processor_id(void); |
126 | extern unsigned int smp_processor_id(void); | 122 | # define smp_processor_id() debug_smp_processor_id() |
127 | # else | ||
128 | # define smp_processor_id() __smp_processor_id() | ||
129 | # endif | ||
130 | # define _smp_processor_id() __smp_processor_id() | ||
131 | #else | 123 | #else |
132 | # define _smp_processor_id() smp_processor_id() | 124 | # define smp_processor_id() raw_smp_processor_id() |
133 | #endif | 125 | #endif |
134 | 126 | ||
135 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | 127 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) |
136 | #define put_cpu() preempt_enable() | 128 | #define put_cpu() preempt_enable() |
137 | #define put_cpu_no_resched() preempt_enable_no_resched() | 129 | #define put_cpu_no_resched() preempt_enable_no_resched() |
138 | 130 | ||
139 | #endif /* __LINUX_SMP_H */ | 131 | #endif /* __LINUX_SMP_H */ |
include/net/route.h
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Definitions for the IP router. | 6 | * Definitions for the IP router. |
7 | * | 7 | * |
8 | * Version: @(#)route.h 1.0.4 05/27/93 | 8 | * Version: @(#)route.h 1.0.4 05/27/93 |
9 | * | 9 | * |
10 | * Authors: Ross Biro | 10 | * Authors: Ross Biro |
11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
12 | * Fixes: | 12 | * Fixes: |
13 | * Alan Cox : Reformatted. Added ip_rt_local() | 13 | * Alan Cox : Reformatted. Added ip_rt_local() |
14 | * Alan Cox : Support for TCP parameters. | 14 | * Alan Cox : Support for TCP parameters. |
15 | * Alexey Kuznetsov: Major changes for new routing code. | 15 | * Alexey Kuznetsov: Major changes for new routing code. |
16 | * Mike McLagan : Routing by source | 16 | * Mike McLagan : Routing by source |
17 | * Robert Olsson : Added rt_cache statistics | 17 | * Robert Olsson : Added rt_cache statistics |
18 | * | 18 | * |
19 | * This program is free software; you can redistribute it and/or | 19 | * This program is free software; you can redistribute it and/or |
20 | * modify it under the terms of the GNU General Public License | 20 | * modify it under the terms of the GNU General Public License |
21 | * as published by the Free Software Foundation; either version | 21 | * as published by the Free Software Foundation; either version |
22 | * 2 of the License, or (at your option) any later version. | 22 | * 2 of the License, or (at your option) any later version. |
23 | */ | 23 | */ |
24 | #ifndef _ROUTE_H | 24 | #ifndef _ROUTE_H |
25 | #define _ROUTE_H | 25 | #define _ROUTE_H |
26 | 26 | ||
27 | #include <linux/config.h> | 27 | #include <linux/config.h> |
28 | #include <net/dst.h> | 28 | #include <net/dst.h> |
29 | #include <net/inetpeer.h> | 29 | #include <net/inetpeer.h> |
30 | #include <net/flow.h> | 30 | #include <net/flow.h> |
31 | #include <linux/in_route.h> | 31 | #include <linux/in_route.h> |
32 | #include <linux/rtnetlink.h> | 32 | #include <linux/rtnetlink.h> |
33 | #include <linux/route.h> | 33 | #include <linux/route.h> |
34 | #include <linux/ip.h> | 34 | #include <linux/ip.h> |
35 | #include <linux/cache.h> | 35 | #include <linux/cache.h> |
36 | 36 | ||
37 | #ifndef __KERNEL__ | 37 | #ifndef __KERNEL__ |
38 | #warning This file is not supposed to be used outside of kernel. | 38 | #warning This file is not supposed to be used outside of kernel. |
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | #define RTO_ONLINK 0x01 | 41 | #define RTO_ONLINK 0x01 |
42 | 42 | ||
43 | #define RTO_CONN 0 | 43 | #define RTO_CONN 0 |
44 | /* RTO_CONN is not used (being alias for 0), but preserved not to break | 44 | /* RTO_CONN is not used (being alias for 0), but preserved not to break |
45 | * some modules referring to it. */ | 45 | * some modules referring to it. */ |
46 | 46 | ||
47 | #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) | 47 | #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) |
48 | 48 | ||
49 | struct fib_nh; | 49 | struct fib_nh; |
50 | struct inet_peer; | 50 | struct inet_peer; |
51 | struct rtable | 51 | struct rtable |
52 | { | 52 | { |
53 | union | 53 | union |
54 | { | 54 | { |
55 | struct dst_entry dst; | 55 | struct dst_entry dst; |
56 | struct rtable *rt_next; | 56 | struct rtable *rt_next; |
57 | } u; | 57 | } u; |
58 | 58 | ||
59 | struct in_device *idev; | 59 | struct in_device *idev; |
60 | 60 | ||
61 | unsigned rt_flags; | 61 | unsigned rt_flags; |
62 | __u16 rt_type; | 62 | __u16 rt_type; |
63 | __u16 rt_multipath_alg; | 63 | __u16 rt_multipath_alg; |
64 | 64 | ||
65 | __u32 rt_dst; /* Path destination */ | 65 | __u32 rt_dst; /* Path destination */ |
66 | __u32 rt_src; /* Path source */ | 66 | __u32 rt_src; /* Path source */ |
67 | int rt_iif; | 67 | int rt_iif; |
68 | 68 | ||
69 | /* Info on neighbour */ | 69 | /* Info on neighbour */ |
70 | __u32 rt_gateway; | 70 | __u32 rt_gateway; |
71 | 71 | ||
72 | /* Cache lookup keys */ | 72 | /* Cache lookup keys */ |
73 | struct flowi fl; | 73 | struct flowi fl; |
74 | 74 | ||
75 | /* Miscellaneous cached information */ | 75 | /* Miscellaneous cached information */ |
76 | __u32 rt_spec_dst; /* RFC1122 specific destination */ | 76 | __u32 rt_spec_dst; /* RFC1122 specific destination */ |
77 | struct inet_peer *peer; /* long-living peer info */ | 77 | struct inet_peer *peer; /* long-living peer info */ |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct ip_rt_acct | 80 | struct ip_rt_acct |
81 | { | 81 | { |
82 | __u32 o_bytes; | 82 | __u32 o_bytes; |
83 | __u32 o_packets; | 83 | __u32 o_packets; |
84 | __u32 i_bytes; | 84 | __u32 i_bytes; |
85 | __u32 i_packets; | 85 | __u32 i_packets; |
86 | }; | 86 | }; |
87 | 87 | ||
88 | struct rt_cache_stat | 88 | struct rt_cache_stat |
89 | { | 89 | { |
90 | unsigned int in_hit; | 90 | unsigned int in_hit; |
91 | unsigned int in_slow_tot; | 91 | unsigned int in_slow_tot; |
92 | unsigned int in_slow_mc; | 92 | unsigned int in_slow_mc; |
93 | unsigned int in_no_route; | 93 | unsigned int in_no_route; |
94 | unsigned int in_brd; | 94 | unsigned int in_brd; |
95 | unsigned int in_martian_dst; | 95 | unsigned int in_martian_dst; |
96 | unsigned int in_martian_src; | 96 | unsigned int in_martian_src; |
97 | unsigned int out_hit; | 97 | unsigned int out_hit; |
98 | unsigned int out_slow_tot; | 98 | unsigned int out_slow_tot; |
99 | unsigned int out_slow_mc; | 99 | unsigned int out_slow_mc; |
100 | unsigned int gc_total; | 100 | unsigned int gc_total; |
101 | unsigned int gc_ignored; | 101 | unsigned int gc_ignored; |
102 | unsigned int gc_goal_miss; | 102 | unsigned int gc_goal_miss; |
103 | unsigned int gc_dst_overflow; | 103 | unsigned int gc_dst_overflow; |
104 | unsigned int in_hlist_search; | 104 | unsigned int in_hlist_search; |
105 | unsigned int out_hlist_search; | 105 | unsigned int out_hlist_search; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | extern struct rt_cache_stat *rt_cache_stat; | 108 | extern struct rt_cache_stat *rt_cache_stat; |
109 | #define RT_CACHE_STAT_INC(field) \ | 109 | #define RT_CACHE_STAT_INC(field) \ |
110 | (per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++) | 110 | (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) |
111 | 111 | ||
112 | extern struct ip_rt_acct *ip_rt_acct; | 112 | extern struct ip_rt_acct *ip_rt_acct; |
113 | 113 | ||
114 | struct in_device; | 114 | struct in_device; |
115 | extern int ip_rt_init(void); | 115 | extern int ip_rt_init(void); |
116 | extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, | 116 | extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, |
117 | u32 src, u8 tos, struct net_device *dev); | 117 | u32 src, u8 tos, struct net_device *dev); |
118 | extern void ip_rt_advice(struct rtable **rp, int advice); | 118 | extern void ip_rt_advice(struct rtable **rp, int advice); |
119 | extern void rt_cache_flush(int how); | 119 | extern void rt_cache_flush(int how); |
120 | extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); | 120 | extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); |
121 | extern int ip_route_output_key(struct rtable **, struct flowi *flp); | 121 | extern int ip_route_output_key(struct rtable **, struct flowi *flp); |
122 | extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); | 122 | extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); |
123 | extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); | 123 | extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); |
124 | extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); | 124 | extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); |
125 | extern void ip_rt_send_redirect(struct sk_buff *skb); | 125 | extern void ip_rt_send_redirect(struct sk_buff *skb); |
126 | 126 | ||
127 | extern unsigned inet_addr_type(u32 addr); | 127 | extern unsigned inet_addr_type(u32 addr); |
128 | extern void ip_rt_multicast_event(struct in_device *); | 128 | extern void ip_rt_multicast_event(struct in_device *); |
129 | extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); | 129 | extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); |
130 | extern void ip_rt_get_source(u8 *src, struct rtable *rt); | 130 | extern void ip_rt_get_source(u8 *src, struct rtable *rt); |
131 | extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); | 131 | extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); |
132 | 132 | ||
133 | static inline void ip_rt_put(struct rtable * rt) | 133 | static inline void ip_rt_put(struct rtable * rt) |
134 | { | 134 | { |
135 | if (rt) | 135 | if (rt) |
136 | dst_release(&rt->u.dst); | 136 | dst_release(&rt->u.dst); |
137 | } | 137 | } |
138 | 138 | ||
139 | #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) | 139 | #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) |
140 | 140 | ||
141 | extern __u8 ip_tos2prio[16]; | 141 | extern __u8 ip_tos2prio[16]; |
142 | 142 | ||
143 | static inline char rt_tos2priority(u8 tos) | 143 | static inline char rt_tos2priority(u8 tos) |
144 | { | 144 | { |
145 | return ip_tos2prio[IPTOS_TOS(tos)>>1]; | 145 | return ip_tos2prio[IPTOS_TOS(tos)>>1]; |
146 | } | 146 | } |
147 | 147 | ||
148 | static inline int ip_route_connect(struct rtable **rp, u32 dst, | 148 | static inline int ip_route_connect(struct rtable **rp, u32 dst, |
149 | u32 src, u32 tos, int oif, u8 protocol, | 149 | u32 src, u32 tos, int oif, u8 protocol, |
150 | u16 sport, u16 dport, struct sock *sk) | 150 | u16 sport, u16 dport, struct sock *sk) |
151 | { | 151 | { |
152 | struct flowi fl = { .oif = oif, | 152 | struct flowi fl = { .oif = oif, |
153 | .nl_u = { .ip4_u = { .daddr = dst, | 153 | .nl_u = { .ip4_u = { .daddr = dst, |
154 | .saddr = src, | 154 | .saddr = src, |
155 | .tos = tos } }, | 155 | .tos = tos } }, |
156 | .proto = protocol, | 156 | .proto = protocol, |
157 | .uli_u = { .ports = | 157 | .uli_u = { .ports = |
158 | { .sport = sport, | 158 | { .sport = sport, |
159 | .dport = dport } } }; | 159 | .dport = dport } } }; |
160 | 160 | ||
161 | int err; | 161 | int err; |
162 | if (!dst || !src) { | 162 | if (!dst || !src) { |
163 | err = __ip_route_output_key(rp, &fl); | 163 | err = __ip_route_output_key(rp, &fl); |
164 | if (err) | 164 | if (err) |
165 | return err; | 165 | return err; |
166 | fl.fl4_dst = (*rp)->rt_dst; | 166 | fl.fl4_dst = (*rp)->rt_dst; |
167 | fl.fl4_src = (*rp)->rt_src; | 167 | fl.fl4_src = (*rp)->rt_src; |
168 | ip_rt_put(*rp); | 168 | ip_rt_put(*rp); |
169 | *rp = NULL; | 169 | *rp = NULL; |
170 | } | 170 | } |
171 | return ip_route_output_flow(rp, &fl, sk, 0); | 171 | return ip_route_output_flow(rp, &fl, sk, 0); |
172 | } | 172 | } |
173 | 173 | ||
174 | static inline int ip_route_newports(struct rtable **rp, u16 sport, u16 dport, | 174 | static inline int ip_route_newports(struct rtable **rp, u16 sport, u16 dport, |
175 | struct sock *sk) | 175 | struct sock *sk) |
176 | { | 176 | { |
177 | if (sport != (*rp)->fl.fl_ip_sport || | 177 | if (sport != (*rp)->fl.fl_ip_sport || |
178 | dport != (*rp)->fl.fl_ip_dport) { | 178 | dport != (*rp)->fl.fl_ip_dport) { |
179 | struct flowi fl; | 179 | struct flowi fl; |
180 | 180 | ||
181 | memcpy(&fl, &(*rp)->fl, sizeof(fl)); | 181 | memcpy(&fl, &(*rp)->fl, sizeof(fl)); |
182 | fl.fl_ip_sport = sport; | 182 | fl.fl_ip_sport = sport; |
183 | fl.fl_ip_dport = dport; | 183 | fl.fl_ip_dport = dport; |
184 | ip_rt_put(*rp); | 184 | ip_rt_put(*rp); |
185 | *rp = NULL; | 185 | *rp = NULL; |
186 | return ip_route_output_flow(rp, &fl, sk, 0); | 186 | return ip_route_output_flow(rp, &fl, sk, 0); |
187 | } | 187 | } |
188 | return 0; | 188 | return 0; |
189 | } | 189 | } |
190 | 190 | ||
191 | extern void rt_bind_peer(struct rtable *rt, int create); | 191 | extern void rt_bind_peer(struct rtable *rt, int create); |
192 | 192 | ||
193 | static inline struct inet_peer *rt_get_peer(struct rtable *rt) | 193 | static inline struct inet_peer *rt_get_peer(struct rtable *rt) |
194 | { | 194 | { |
195 | if (rt->peer) | 195 | if (rt->peer) |
196 | return rt->peer; | 196 | return rt->peer; |
197 | 197 | ||
198 | rt_bind_peer(rt, 0); | 198 | rt_bind_peer(rt, 0); |
199 | return rt->peer; | 199 | return rt->peer; |
200 | } | 200 | } |
201 | 201 | ||
202 | #endif /* _ROUTE_H */ | 202 | #endif /* _ROUTE_H */ |
203 | 203 |
include/net/snmp.h
1 | /* | 1 | /* |
2 | * | 2 | * |
3 | * SNMP MIB entries for the IP subsystem. | 3 | * SNMP MIB entries for the IP subsystem. |
4 | * | 4 | * |
5 | * Alan Cox <gw4pts@gw4pts.ampr.org> | 5 | * Alan Cox <gw4pts@gw4pts.ampr.org> |
6 | * | 6 | * |
7 | * We don't chose to implement SNMP in the kernel (this would | 7 | * We don't chose to implement SNMP in the kernel (this would |
8 | * be silly as SNMP is a pain in the backside in places). We do | 8 | * be silly as SNMP is a pain in the backside in places). We do |
9 | * however need to collect the MIB statistics and export them | 9 | * however need to collect the MIB statistics and export them |
10 | * out of /proc (eventually) | 10 | * out of /proc (eventually) |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or | 12 | * This program is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU General Public License | 13 | * modify it under the terms of the GNU General Public License |
14 | * as published by the Free Software Foundation; either version | 14 | * as published by the Free Software Foundation; either version |
15 | * 2 of the License, or (at your option) any later version. | 15 | * 2 of the License, or (at your option) any later version. |
16 | * | 16 | * |
17 | * $Id: snmp.h,v 1.19 2001/06/14 13:40:46 davem Exp $ | 17 | * $Id: snmp.h,v 1.19 2001/06/14 13:40:46 davem Exp $ |
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #ifndef _SNMP_H | 21 | #ifndef _SNMP_H |
22 | #define _SNMP_H | 22 | #define _SNMP_H |
23 | 23 | ||
24 | #include <linux/cache.h> | 24 | #include <linux/cache.h> |
25 | #include <linux/snmp.h> | 25 | #include <linux/snmp.h> |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Mibs are stored in array of unsigned long. | 28 | * Mibs are stored in array of unsigned long. |
29 | */ | 29 | */ |
30 | /* | 30 | /* |
31 | * struct snmp_mib{} | 31 | * struct snmp_mib{} |
32 | * - list of entries for particular API (such as /proc/net/snmp) | 32 | * - list of entries for particular API (such as /proc/net/snmp) |
33 | * - name of entries. | 33 | * - name of entries. |
34 | */ | 34 | */ |
35 | struct snmp_mib { | 35 | struct snmp_mib { |
36 | char *name; | 36 | char *name; |
37 | int entry; | 37 | int entry; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | #define SNMP_MIB_ITEM(_name,_entry) { \ | 40 | #define SNMP_MIB_ITEM(_name,_entry) { \ |
41 | .name = _name, \ | 41 | .name = _name, \ |
42 | .entry = _entry, \ | 42 | .entry = _entry, \ |
43 | } | 43 | } |
44 | 44 | ||
45 | #define SNMP_MIB_SENTINEL { \ | 45 | #define SNMP_MIB_SENTINEL { \ |
46 | .name = NULL, \ | 46 | .name = NULL, \ |
47 | .entry = 0, \ | 47 | .entry = 0, \ |
48 | } | 48 | } |
49 | 49 | ||
50 | /* | 50 | /* |
51 | * We use all unsigned longs. Linux will soon be so reliable that even | 51 | * We use all unsigned longs. Linux will soon be so reliable that even |
52 | * these will rapidly get too small 8-). Seriously consider the IpInReceives | 52 | * these will rapidly get too small 8-). Seriously consider the IpInReceives |
53 | * count on the 20Gb/s + networks people expect in a few years time! | 53 | * count on the 20Gb/s + networks people expect in a few years time! |
54 | */ | 54 | */ |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * The rule for padding: | 57 | * The rule for padding: |
58 | * Best is power of two because then the right structure can be found by a | 58 | * Best is power of two because then the right structure can be found by a |
59 | * simple shift. The structure should be always cache line aligned. | 59 | * simple shift. The structure should be always cache line aligned. |
60 | * gcc needs n=alignto(cachelinesize, popcnt(sizeof(bla_mib))) shift/add | 60 | * gcc needs n=alignto(cachelinesize, popcnt(sizeof(bla_mib))) shift/add |
61 | * instructions to emulate multiply in case it is not power-of-two. | 61 | * instructions to emulate multiply in case it is not power-of-two. |
62 | * Currently n is always <=3 for all sizes so simple cache line alignment | 62 | * Currently n is always <=3 for all sizes so simple cache line alignment |
63 | * is enough. | 63 | * is enough. |
64 | * | 64 | * |
65 | * The best solution would be a global CPU local area , especially on 64 | 65 | * The best solution would be a global CPU local area , especially on 64 |
66 | * and 128byte cacheline machine it makes a *lot* of sense -AK | 66 | * and 128byte cacheline machine it makes a *lot* of sense -AK |
67 | */ | 67 | */ |
68 | 68 | ||
69 | #define __SNMP_MIB_ALIGN__ ____cacheline_aligned | 69 | #define __SNMP_MIB_ALIGN__ ____cacheline_aligned |
70 | 70 | ||
71 | /* IPstats */ | 71 | /* IPstats */ |
72 | #define IPSTATS_MIB_MAX __IPSTATS_MIB_MAX | 72 | #define IPSTATS_MIB_MAX __IPSTATS_MIB_MAX |
73 | struct ipstats_mib { | 73 | struct ipstats_mib { |
74 | unsigned long mibs[IPSTATS_MIB_MAX]; | 74 | unsigned long mibs[IPSTATS_MIB_MAX]; |
75 | } __SNMP_MIB_ALIGN__; | 75 | } __SNMP_MIB_ALIGN__; |
76 | 76 | ||
77 | /* ICMP */ | 77 | /* ICMP */ |
78 | #define ICMP_MIB_DUMMY __ICMP_MIB_MAX | 78 | #define ICMP_MIB_DUMMY __ICMP_MIB_MAX |
79 | #define ICMP_MIB_MAX (__ICMP_MIB_MAX + 1) | 79 | #define ICMP_MIB_MAX (__ICMP_MIB_MAX + 1) |
80 | 80 | ||
81 | struct icmp_mib { | 81 | struct icmp_mib { |
82 | unsigned long mibs[ICMP_MIB_MAX]; | 82 | unsigned long mibs[ICMP_MIB_MAX]; |
83 | } __SNMP_MIB_ALIGN__; | 83 | } __SNMP_MIB_ALIGN__; |
84 | 84 | ||
85 | /* ICMP6 (IPv6-ICMP) */ | 85 | /* ICMP6 (IPv6-ICMP) */ |
86 | #define ICMP6_MIB_MAX __ICMP6_MIB_MAX | 86 | #define ICMP6_MIB_MAX __ICMP6_MIB_MAX |
87 | struct icmpv6_mib { | 87 | struct icmpv6_mib { |
88 | unsigned long mibs[ICMP6_MIB_MAX]; | 88 | unsigned long mibs[ICMP6_MIB_MAX]; |
89 | } __SNMP_MIB_ALIGN__; | 89 | } __SNMP_MIB_ALIGN__; |
90 | 90 | ||
91 | /* TCP */ | 91 | /* TCP */ |
92 | #define TCP_MIB_MAX __TCP_MIB_MAX | 92 | #define TCP_MIB_MAX __TCP_MIB_MAX |
93 | struct tcp_mib { | 93 | struct tcp_mib { |
94 | unsigned long mibs[TCP_MIB_MAX]; | 94 | unsigned long mibs[TCP_MIB_MAX]; |
95 | } __SNMP_MIB_ALIGN__; | 95 | } __SNMP_MIB_ALIGN__; |
96 | 96 | ||
97 | /* UDP */ | 97 | /* UDP */ |
98 | #define UDP_MIB_MAX __UDP_MIB_MAX | 98 | #define UDP_MIB_MAX __UDP_MIB_MAX |
99 | struct udp_mib { | 99 | struct udp_mib { |
100 | unsigned long mibs[UDP_MIB_MAX]; | 100 | unsigned long mibs[UDP_MIB_MAX]; |
101 | } __SNMP_MIB_ALIGN__; | 101 | } __SNMP_MIB_ALIGN__; |
102 | 102 | ||
103 | /* SCTP */ | 103 | /* SCTP */ |
104 | #define SCTP_MIB_MAX __SCTP_MIB_MAX | 104 | #define SCTP_MIB_MAX __SCTP_MIB_MAX |
105 | struct sctp_mib { | 105 | struct sctp_mib { |
106 | unsigned long mibs[SCTP_MIB_MAX]; | 106 | unsigned long mibs[SCTP_MIB_MAX]; |
107 | } __SNMP_MIB_ALIGN__; | 107 | } __SNMP_MIB_ALIGN__; |
108 | 108 | ||
109 | /* Linux */ | 109 | /* Linux */ |
110 | #define LINUX_MIB_MAX __LINUX_MIB_MAX | 110 | #define LINUX_MIB_MAX __LINUX_MIB_MAX |
111 | struct linux_mib { | 111 | struct linux_mib { |
112 | unsigned long mibs[LINUX_MIB_MAX]; | 112 | unsigned long mibs[LINUX_MIB_MAX]; |
113 | }; | 113 | }; |
114 | 114 | ||
115 | 115 | ||
116 | /* | 116 | /* |
117 | * FIXME: On x86 and some other CPUs the split into user and softirq parts | 117 | * FIXME: On x86 and some other CPUs the split into user and softirq parts |
118 | * is not needed because addl $1,memory is atomic against interrupts (but | 118 | * is not needed because addl $1,memory is atomic against interrupts (but |
119 | * atomic_inc would be overkill because of the lock cycles). Wants new | 119 | * atomic_inc would be overkill because of the lock cycles). Wants new |
120 | * nonlocked_atomic_inc() primitives -AK | 120 | * nonlocked_atomic_inc() primitives -AK |
121 | */ | 121 | */ |
122 | #define DEFINE_SNMP_STAT(type, name) \ | 122 | #define DEFINE_SNMP_STAT(type, name) \ |
123 | __typeof__(type) *name[2] | 123 | __typeof__(type) *name[2] |
124 | #define DECLARE_SNMP_STAT(type, name) \ | 124 | #define DECLARE_SNMP_STAT(type, name) \ |
125 | extern __typeof__(type) *name[2] | 125 | extern __typeof__(type) *name[2] |
126 | 126 | ||
127 | #define SNMP_STAT_BHPTR(name) (name[0]) | 127 | #define SNMP_STAT_BHPTR(name) (name[0]) |
128 | #define SNMP_STAT_USRPTR(name) (name[1]) | 128 | #define SNMP_STAT_USRPTR(name) (name[1]) |
129 | 129 | ||
130 | #define SNMP_INC_STATS_BH(mib, field) \ | 130 | #define SNMP_INC_STATS_BH(mib, field) \ |
131 | (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++) | 131 | (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field]++) |
132 | #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \ | 132 | #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \ |
133 | (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++) | 133 | (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field + (offset)]++) |
134 | #define SNMP_INC_STATS_USER(mib, field) \ | 134 | #define SNMP_INC_STATS_USER(mib, field) \ |
135 | (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++) | 135 | (per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field]++) |
136 | #define SNMP_INC_STATS(mib, field) \ | 136 | #define SNMP_INC_STATS(mib, field) \ |
137 | (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++) | 137 | (per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]++) |
138 | #define SNMP_DEC_STATS(mib, field) \ | 138 | #define SNMP_DEC_STATS(mib, field) \ |
139 | (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--) | 139 | (per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]--) |
140 | #define SNMP_ADD_STATS_BH(mib, field, addend) \ | 140 | #define SNMP_ADD_STATS_BH(mib, field, addend) \ |
141 | (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend) | 141 | (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field] += addend) |
142 | #define SNMP_ADD_STATS_USER(mib, field, addend) \ | 142 | #define SNMP_ADD_STATS_USER(mib, field, addend) \ |
143 | (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend) | 143 | (per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field] += addend) |
144 | 144 | ||
145 | #endif | 145 | #endif |
146 | 146 |
kernel/module.c
1 | /* Rewritten by Rusty Russell, on the backs of many others... | 1 | /* Rewritten by Rusty Russell, on the backs of many others... |
2 | Copyright (C) 2002 Richard Henderson | 2 | Copyright (C) 2002 Richard Henderson |
3 | Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. | 3 | Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. |
4 | 4 | ||
5 | This program is free software; you can redistribute it and/or modify | 5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by | 6 | it under the terms of the GNU General Public License as published by |
7 | the Free Software Foundation; either version 2 of the License, or | 7 | the Free Software Foundation; either version 2 of the License, or |
8 | (at your option) any later version. | 8 | (at your option) any later version. |
9 | 9 | ||
10 | This program is distributed in the hope that it will be useful, | 10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. | 13 | GNU General Public License for more details. |
14 | 14 | ||
15 | You should have received a copy of the GNU General Public License | 15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, write to the Free Software | 16 | along with this program; if not, write to the Free Software |
17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #include <linux/config.h> | 19 | #include <linux/config.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/moduleloader.h> | 21 | #include <linux/moduleloader.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | 24 | #include <linux/vmalloc.h> |
25 | #include <linux/elf.h> | 25 | #include <linux/elf.h> |
26 | #include <linux/seq_file.h> | 26 | #include <linux/seq_file.h> |
27 | #include <linux/syscalls.h> | 27 | #include <linux/syscalls.h> |
28 | #include <linux/fcntl.h> | 28 | #include <linux/fcntl.h> |
29 | #include <linux/rcupdate.h> | 29 | #include <linux/rcupdate.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/moduleparam.h> | 31 | #include <linux/moduleparam.h> |
32 | #include <linux/errno.h> | 32 | #include <linux/errno.h> |
33 | #include <linux/err.h> | 33 | #include <linux/err.h> |
34 | #include <linux/vermagic.h> | 34 | #include <linux/vermagic.h> |
35 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
36 | #include <linux/stop_machine.h> | 36 | #include <linux/stop_machine.h> |
37 | #include <linux/device.h> | 37 | #include <linux/device.h> |
38 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
39 | #include <asm/semaphore.h> | 39 | #include <asm/semaphore.h> |
40 | #include <asm/cacheflush.h> | 40 | #include <asm/cacheflush.h> |
41 | 41 | ||
42 | #if 0 | 42 | #if 0 |
43 | #define DEBUGP printk | 43 | #define DEBUGP printk |
44 | #else | 44 | #else |
45 | #define DEBUGP(fmt , a...) | 45 | #define DEBUGP(fmt , a...) |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #ifndef ARCH_SHF_SMALL | 48 | #ifndef ARCH_SHF_SMALL |
49 | #define ARCH_SHF_SMALL 0 | 49 | #define ARCH_SHF_SMALL 0 |
50 | #endif | 50 | #endif |
51 | 51 | ||
52 | /* If this is set, the section belongs in the init part of the module */ | 52 | /* If this is set, the section belongs in the init part of the module */ |
53 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) | 53 | #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) |
54 | 54 | ||
55 | /* Protects module list */ | 55 | /* Protects module list */ |
56 | static DEFINE_SPINLOCK(modlist_lock); | 56 | static DEFINE_SPINLOCK(modlist_lock); |
57 | 57 | ||
58 | /* List of modules, protected by module_mutex AND modlist_lock */ | 58 | /* List of modules, protected by module_mutex AND modlist_lock */ |
59 | static DECLARE_MUTEX(module_mutex); | 59 | static DECLARE_MUTEX(module_mutex); |
60 | static LIST_HEAD(modules); | 60 | static LIST_HEAD(modules); |
61 | 61 | ||
62 | static DECLARE_MUTEX(notify_mutex); | 62 | static DECLARE_MUTEX(notify_mutex); |
63 | static struct notifier_block * module_notify_list; | 63 | static struct notifier_block * module_notify_list; |
64 | 64 | ||
65 | int register_module_notifier(struct notifier_block * nb) | 65 | int register_module_notifier(struct notifier_block * nb) |
66 | { | 66 | { |
67 | int err; | 67 | int err; |
68 | down(¬ify_mutex); | 68 | down(¬ify_mutex); |
69 | err = notifier_chain_register(&module_notify_list, nb); | 69 | err = notifier_chain_register(&module_notify_list, nb); |
70 | up(¬ify_mutex); | 70 | up(¬ify_mutex); |
71 | return err; | 71 | return err; |
72 | } | 72 | } |
73 | EXPORT_SYMBOL(register_module_notifier); | 73 | EXPORT_SYMBOL(register_module_notifier); |
74 | 74 | ||
75 | int unregister_module_notifier(struct notifier_block * nb) | 75 | int unregister_module_notifier(struct notifier_block * nb) |
76 | { | 76 | { |
77 | int err; | 77 | int err; |
78 | down(¬ify_mutex); | 78 | down(¬ify_mutex); |
79 | err = notifier_chain_unregister(&module_notify_list, nb); | 79 | err = notifier_chain_unregister(&module_notify_list, nb); |
80 | up(¬ify_mutex); | 80 | up(¬ify_mutex); |
81 | return err; | 81 | return err; |
82 | } | 82 | } |
83 | EXPORT_SYMBOL(unregister_module_notifier); | 83 | EXPORT_SYMBOL(unregister_module_notifier); |
84 | 84 | ||
85 | /* We require a truly strong try_module_get() */ | 85 | /* We require a truly strong try_module_get() */ |
86 | static inline int strong_try_module_get(struct module *mod) | 86 | static inline int strong_try_module_get(struct module *mod) |
87 | { | 87 | { |
88 | if (mod && mod->state == MODULE_STATE_COMING) | 88 | if (mod && mod->state == MODULE_STATE_COMING) |
89 | return 0; | 89 | return 0; |
90 | return try_module_get(mod); | 90 | return try_module_get(mod); |
91 | } | 91 | } |
92 | 92 | ||
93 | /* A thread that wants to hold a reference to a module only while it | 93 | /* A thread that wants to hold a reference to a module only while it |
94 | * is running can call ths to safely exit. | 94 | * is running can call ths to safely exit. |
95 | * nfsd and lockd use this. | 95 | * nfsd and lockd use this. |
96 | */ | 96 | */ |
97 | void __module_put_and_exit(struct module *mod, long code) | 97 | void __module_put_and_exit(struct module *mod, long code) |
98 | { | 98 | { |
99 | module_put(mod); | 99 | module_put(mod); |
100 | do_exit(code); | 100 | do_exit(code); |
101 | } | 101 | } |
102 | EXPORT_SYMBOL(__module_put_and_exit); | 102 | EXPORT_SYMBOL(__module_put_and_exit); |
103 | 103 | ||
104 | /* Find a module section: 0 means not found. */ | 104 | /* Find a module section: 0 means not found. */ |
105 | static unsigned int find_sec(Elf_Ehdr *hdr, | 105 | static unsigned int find_sec(Elf_Ehdr *hdr, |
106 | Elf_Shdr *sechdrs, | 106 | Elf_Shdr *sechdrs, |
107 | const char *secstrings, | 107 | const char *secstrings, |
108 | const char *name) | 108 | const char *name) |
109 | { | 109 | { |
110 | unsigned int i; | 110 | unsigned int i; |
111 | 111 | ||
112 | for (i = 1; i < hdr->e_shnum; i++) | 112 | for (i = 1; i < hdr->e_shnum; i++) |
113 | /* Alloc bit cleared means "ignore it." */ | 113 | /* Alloc bit cleared means "ignore it." */ |
114 | if ((sechdrs[i].sh_flags & SHF_ALLOC) | 114 | if ((sechdrs[i].sh_flags & SHF_ALLOC) |
115 | && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) | 115 | && strcmp(secstrings+sechdrs[i].sh_name, name) == 0) |
116 | return i; | 116 | return i; |
117 | return 0; | 117 | return 0; |
118 | } | 118 | } |
119 | 119 | ||
120 | /* Provided by the linker */ | 120 | /* Provided by the linker */ |
121 | extern const struct kernel_symbol __start___ksymtab[]; | 121 | extern const struct kernel_symbol __start___ksymtab[]; |
122 | extern const struct kernel_symbol __stop___ksymtab[]; | 122 | extern const struct kernel_symbol __stop___ksymtab[]; |
123 | extern const struct kernel_symbol __start___ksymtab_gpl[]; | 123 | extern const struct kernel_symbol __start___ksymtab_gpl[]; |
124 | extern const struct kernel_symbol __stop___ksymtab_gpl[]; | 124 | extern const struct kernel_symbol __stop___ksymtab_gpl[]; |
125 | extern const unsigned long __start___kcrctab[]; | 125 | extern const unsigned long __start___kcrctab[]; |
126 | extern const unsigned long __start___kcrctab_gpl[]; | 126 | extern const unsigned long __start___kcrctab_gpl[]; |
127 | 127 | ||
128 | #ifndef CONFIG_MODVERSIONS | 128 | #ifndef CONFIG_MODVERSIONS |
129 | #define symversion(base, idx) NULL | 129 | #define symversion(base, idx) NULL |
130 | #else | 130 | #else |
131 | #define symversion(base, idx) ((base) ? ((base) + (idx)) : NULL) | 131 | #define symversion(base, idx) ((base) ? ((base) + (idx)) : NULL) |
132 | #endif | 132 | #endif |
133 | 133 | ||
134 | /* Find a symbol, return value, crc and module which owns it */ | 134 | /* Find a symbol, return value, crc and module which owns it */ |
135 | static unsigned long __find_symbol(const char *name, | 135 | static unsigned long __find_symbol(const char *name, |
136 | struct module **owner, | 136 | struct module **owner, |
137 | const unsigned long **crc, | 137 | const unsigned long **crc, |
138 | int gplok) | 138 | int gplok) |
139 | { | 139 | { |
140 | struct module *mod; | 140 | struct module *mod; |
141 | unsigned int i; | 141 | unsigned int i; |
142 | 142 | ||
143 | /* Core kernel first. */ | 143 | /* Core kernel first. */ |
144 | *owner = NULL; | 144 | *owner = NULL; |
145 | for (i = 0; __start___ksymtab+i < __stop___ksymtab; i++) { | 145 | for (i = 0; __start___ksymtab+i < __stop___ksymtab; i++) { |
146 | if (strcmp(__start___ksymtab[i].name, name) == 0) { | 146 | if (strcmp(__start___ksymtab[i].name, name) == 0) { |
147 | *crc = symversion(__start___kcrctab, i); | 147 | *crc = symversion(__start___kcrctab, i); |
148 | return __start___ksymtab[i].value; | 148 | return __start___ksymtab[i].value; |
149 | } | 149 | } |
150 | } | 150 | } |
151 | if (gplok) { | 151 | if (gplok) { |
152 | for (i = 0; __start___ksymtab_gpl+i<__stop___ksymtab_gpl; i++) | 152 | for (i = 0; __start___ksymtab_gpl+i<__stop___ksymtab_gpl; i++) |
153 | if (strcmp(__start___ksymtab_gpl[i].name, name) == 0) { | 153 | if (strcmp(__start___ksymtab_gpl[i].name, name) == 0) { |
154 | *crc = symversion(__start___kcrctab_gpl, i); | 154 | *crc = symversion(__start___kcrctab_gpl, i); |
155 | return __start___ksymtab_gpl[i].value; | 155 | return __start___ksymtab_gpl[i].value; |
156 | } | 156 | } |
157 | } | 157 | } |
158 | 158 | ||
159 | /* Now try modules. */ | 159 | /* Now try modules. */ |
160 | list_for_each_entry(mod, &modules, list) { | 160 | list_for_each_entry(mod, &modules, list) { |
161 | *owner = mod; | 161 | *owner = mod; |
162 | for (i = 0; i < mod->num_syms; i++) | 162 | for (i = 0; i < mod->num_syms; i++) |
163 | if (strcmp(mod->syms[i].name, name) == 0) { | 163 | if (strcmp(mod->syms[i].name, name) == 0) { |
164 | *crc = symversion(mod->crcs, i); | 164 | *crc = symversion(mod->crcs, i); |
165 | return mod->syms[i].value; | 165 | return mod->syms[i].value; |
166 | } | 166 | } |
167 | 167 | ||
168 | if (gplok) { | 168 | if (gplok) { |
169 | for (i = 0; i < mod->num_gpl_syms; i++) { | 169 | for (i = 0; i < mod->num_gpl_syms; i++) { |
170 | if (strcmp(mod->gpl_syms[i].name, name) == 0) { | 170 | if (strcmp(mod->gpl_syms[i].name, name) == 0) { |
171 | *crc = symversion(mod->gpl_crcs, i); | 171 | *crc = symversion(mod->gpl_crcs, i); |
172 | return mod->gpl_syms[i].value; | 172 | return mod->gpl_syms[i].value; |
173 | } | 173 | } |
174 | } | 174 | } |
175 | } | 175 | } |
176 | } | 176 | } |
177 | DEBUGP("Failed to find symbol %s\n", name); | 177 | DEBUGP("Failed to find symbol %s\n", name); |
178 | return 0; | 178 | return 0; |
179 | } | 179 | } |
180 | 180 | ||
181 | /* Find a symbol in this elf symbol table */ | 181 | /* Find a symbol in this elf symbol table */ |
182 | static unsigned long find_local_symbol(Elf_Shdr *sechdrs, | 182 | static unsigned long find_local_symbol(Elf_Shdr *sechdrs, |
183 | unsigned int symindex, | 183 | unsigned int symindex, |
184 | const char *strtab, | 184 | const char *strtab, |
185 | const char *name) | 185 | const char *name) |
186 | { | 186 | { |
187 | unsigned int i; | 187 | unsigned int i; |
188 | Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; | 188 | Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; |
189 | 189 | ||
190 | /* Search (defined) internal symbols first. */ | 190 | /* Search (defined) internal symbols first. */ |
191 | for (i = 1; i < sechdrs[symindex].sh_size/sizeof(*sym); i++) { | 191 | for (i = 1; i < sechdrs[symindex].sh_size/sizeof(*sym); i++) { |
192 | if (sym[i].st_shndx != SHN_UNDEF | 192 | if (sym[i].st_shndx != SHN_UNDEF |
193 | && strcmp(name, strtab + sym[i].st_name) == 0) | 193 | && strcmp(name, strtab + sym[i].st_name) == 0) |
194 | return sym[i].st_value; | 194 | return sym[i].st_value; |
195 | } | 195 | } |
196 | return 0; | 196 | return 0; |
197 | } | 197 | } |
198 | 198 | ||
199 | /* Search for module by name: must hold module_mutex. */ | 199 | /* Search for module by name: must hold module_mutex. */ |
200 | static struct module *find_module(const char *name) | 200 | static struct module *find_module(const char *name) |
201 | { | 201 | { |
202 | struct module *mod; | 202 | struct module *mod; |
203 | 203 | ||
204 | list_for_each_entry(mod, &modules, list) { | 204 | list_for_each_entry(mod, &modules, list) { |
205 | if (strcmp(mod->name, name) == 0) | 205 | if (strcmp(mod->name, name) == 0) |
206 | return mod; | 206 | return mod; |
207 | } | 207 | } |
208 | return NULL; | 208 | return NULL; |
209 | } | 209 | } |
210 | 210 | ||
211 | #ifdef CONFIG_SMP | 211 | #ifdef CONFIG_SMP |
212 | /* Number of blocks used and allocated. */ | 212 | /* Number of blocks used and allocated. */ |
213 | static unsigned int pcpu_num_used, pcpu_num_allocated; | 213 | static unsigned int pcpu_num_used, pcpu_num_allocated; |
214 | /* Size of each block. -ve means used. */ | 214 | /* Size of each block. -ve means used. */ |
215 | static int *pcpu_size; | 215 | static int *pcpu_size; |
216 | 216 | ||
217 | static int split_block(unsigned int i, unsigned short size) | 217 | static int split_block(unsigned int i, unsigned short size) |
218 | { | 218 | { |
219 | /* Reallocation required? */ | 219 | /* Reallocation required? */ |
220 | if (pcpu_num_used + 1 > pcpu_num_allocated) { | 220 | if (pcpu_num_used + 1 > pcpu_num_allocated) { |
221 | int *new = kmalloc(sizeof(new[0]) * pcpu_num_allocated*2, | 221 | int *new = kmalloc(sizeof(new[0]) * pcpu_num_allocated*2, |
222 | GFP_KERNEL); | 222 | GFP_KERNEL); |
223 | if (!new) | 223 | if (!new) |
224 | return 0; | 224 | return 0; |
225 | 225 | ||
226 | memcpy(new, pcpu_size, sizeof(new[0])*pcpu_num_allocated); | 226 | memcpy(new, pcpu_size, sizeof(new[0])*pcpu_num_allocated); |
227 | pcpu_num_allocated *= 2; | 227 | pcpu_num_allocated *= 2; |
228 | kfree(pcpu_size); | 228 | kfree(pcpu_size); |
229 | pcpu_size = new; | 229 | pcpu_size = new; |
230 | } | 230 | } |
231 | 231 | ||
232 | /* Insert a new subblock */ | 232 | /* Insert a new subblock */ |
233 | memmove(&pcpu_size[i+1], &pcpu_size[i], | 233 | memmove(&pcpu_size[i+1], &pcpu_size[i], |
234 | sizeof(pcpu_size[0]) * (pcpu_num_used - i)); | 234 | sizeof(pcpu_size[0]) * (pcpu_num_used - i)); |
235 | pcpu_num_used++; | 235 | pcpu_num_used++; |
236 | 236 | ||
237 | pcpu_size[i+1] -= size; | 237 | pcpu_size[i+1] -= size; |
238 | pcpu_size[i] = size; | 238 | pcpu_size[i] = size; |
239 | return 1; | 239 | return 1; |
240 | } | 240 | } |
241 | 241 | ||
242 | static inline unsigned int block_size(int val) | 242 | static inline unsigned int block_size(int val) |
243 | { | 243 | { |
244 | if (val < 0) | 244 | if (val < 0) |
245 | return -val; | 245 | return -val; |
246 | return val; | 246 | return val; |
247 | } | 247 | } |
248 | 248 | ||
249 | /* Created by linker magic */ | 249 | /* Created by linker magic */ |
250 | extern char __per_cpu_start[], __per_cpu_end[]; | 250 | extern char __per_cpu_start[], __per_cpu_end[]; |
251 | 251 | ||
252 | static void *percpu_modalloc(unsigned long size, unsigned long align) | 252 | static void *percpu_modalloc(unsigned long size, unsigned long align) |
253 | { | 253 | { |
254 | unsigned long extra; | 254 | unsigned long extra; |
255 | unsigned int i; | 255 | unsigned int i; |
256 | void *ptr; | 256 | void *ptr; |
257 | 257 | ||
258 | BUG_ON(align > SMP_CACHE_BYTES); | 258 | BUG_ON(align > SMP_CACHE_BYTES); |
259 | 259 | ||
260 | ptr = __per_cpu_start; | 260 | ptr = __per_cpu_start; |
261 | for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | 261 | for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { |
262 | /* Extra for alignment requirement. */ | 262 | /* Extra for alignment requirement. */ |
263 | extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; | 263 | extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; |
264 | BUG_ON(i == 0 && extra != 0); | 264 | BUG_ON(i == 0 && extra != 0); |
265 | 265 | ||
266 | if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) | 266 | if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size) |
267 | continue; | 267 | continue; |
268 | 268 | ||
269 | /* Transfer extra to previous block. */ | 269 | /* Transfer extra to previous block. */ |
270 | if (pcpu_size[i-1] < 0) | 270 | if (pcpu_size[i-1] < 0) |
271 | pcpu_size[i-1] -= extra; | 271 | pcpu_size[i-1] -= extra; |
272 | else | 272 | else |
273 | pcpu_size[i-1] += extra; | 273 | pcpu_size[i-1] += extra; |
274 | pcpu_size[i] -= extra; | 274 | pcpu_size[i] -= extra; |
275 | ptr += extra; | 275 | ptr += extra; |
276 | 276 | ||
277 | /* Split block if warranted */ | 277 | /* Split block if warranted */ |
278 | if (pcpu_size[i] - size > sizeof(unsigned long)) | 278 | if (pcpu_size[i] - size > sizeof(unsigned long)) |
279 | if (!split_block(i, size)) | 279 | if (!split_block(i, size)) |
280 | return NULL; | 280 | return NULL; |
281 | 281 | ||
282 | /* Mark allocated */ | 282 | /* Mark allocated */ |
283 | pcpu_size[i] = -pcpu_size[i]; | 283 | pcpu_size[i] = -pcpu_size[i]; |
284 | return ptr; | 284 | return ptr; |
285 | } | 285 | } |
286 | 286 | ||
287 | printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", | 287 | printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", |
288 | size); | 288 | size); |
289 | return NULL; | 289 | return NULL; |
290 | } | 290 | } |
291 | 291 | ||
292 | static void percpu_modfree(void *freeme) | 292 | static void percpu_modfree(void *freeme) |
293 | { | 293 | { |
294 | unsigned int i; | 294 | unsigned int i; |
295 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); | 295 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); |
296 | 296 | ||
297 | /* First entry is core kernel percpu data. */ | 297 | /* First entry is core kernel percpu data. */ |
298 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | 298 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { |
299 | if (ptr == freeme) { | 299 | if (ptr == freeme) { |
300 | pcpu_size[i] = -pcpu_size[i]; | 300 | pcpu_size[i] = -pcpu_size[i]; |
301 | goto free; | 301 | goto free; |
302 | } | 302 | } |
303 | } | 303 | } |
304 | BUG(); | 304 | BUG(); |
305 | 305 | ||
306 | free: | 306 | free: |
307 | /* Merge with previous? */ | 307 | /* Merge with previous? */ |
308 | if (pcpu_size[i-1] >= 0) { | 308 | if (pcpu_size[i-1] >= 0) { |
309 | pcpu_size[i-1] += pcpu_size[i]; | 309 | pcpu_size[i-1] += pcpu_size[i]; |
310 | pcpu_num_used--; | 310 | pcpu_num_used--; |
311 | memmove(&pcpu_size[i], &pcpu_size[i+1], | 311 | memmove(&pcpu_size[i], &pcpu_size[i+1], |
312 | (pcpu_num_used - i) * sizeof(pcpu_size[0])); | 312 | (pcpu_num_used - i) * sizeof(pcpu_size[0])); |
313 | i--; | 313 | i--; |
314 | } | 314 | } |
315 | /* Merge with next? */ | 315 | /* Merge with next? */ |
316 | if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { | 316 | if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) { |
317 | pcpu_size[i] += pcpu_size[i+1]; | 317 | pcpu_size[i] += pcpu_size[i+1]; |
318 | pcpu_num_used--; | 318 | pcpu_num_used--; |
319 | memmove(&pcpu_size[i+1], &pcpu_size[i+2], | 319 | memmove(&pcpu_size[i+1], &pcpu_size[i+2], |
320 | (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); | 320 | (pcpu_num_used - (i+1)) * sizeof(pcpu_size[0])); |
321 | } | 321 | } |
322 | } | 322 | } |
323 | 323 | ||
324 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 324 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
325 | Elf_Shdr *sechdrs, | 325 | Elf_Shdr *sechdrs, |
326 | const char *secstrings) | 326 | const char *secstrings) |
327 | { | 327 | { |
328 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); | 328 | return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); |
329 | } | 329 | } |
330 | 330 | ||
331 | static int percpu_modinit(void) | 331 | static int percpu_modinit(void) |
332 | { | 332 | { |
333 | pcpu_num_used = 2; | 333 | pcpu_num_used = 2; |
334 | pcpu_num_allocated = 2; | 334 | pcpu_num_allocated = 2; |
335 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, | 335 | pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, |
336 | GFP_KERNEL); | 336 | GFP_KERNEL); |
337 | /* Static in-kernel percpu data (used). */ | 337 | /* Static in-kernel percpu data (used). */ |
338 | pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); | 338 | pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES); |
339 | /* Free room. */ | 339 | /* Free room. */ |
340 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; | 340 | pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; |
341 | if (pcpu_size[1] < 0) { | 341 | if (pcpu_size[1] < 0) { |
342 | printk(KERN_ERR "No per-cpu room for modules.\n"); | 342 | printk(KERN_ERR "No per-cpu room for modules.\n"); |
343 | pcpu_num_used = 1; | 343 | pcpu_num_used = 1; |
344 | } | 344 | } |
345 | 345 | ||
346 | return 0; | 346 | return 0; |
347 | } | 347 | } |
348 | __initcall(percpu_modinit); | 348 | __initcall(percpu_modinit); |
349 | #else /* ... !CONFIG_SMP */ | 349 | #else /* ... !CONFIG_SMP */ |
350 | static inline void *percpu_modalloc(unsigned long size, unsigned long align) | 350 | static inline void *percpu_modalloc(unsigned long size, unsigned long align) |
351 | { | 351 | { |
352 | return NULL; | 352 | return NULL; |
353 | } | 353 | } |
354 | static inline void percpu_modfree(void *pcpuptr) | 354 | static inline void percpu_modfree(void *pcpuptr) |
355 | { | 355 | { |
356 | BUG(); | 356 | BUG(); |
357 | } | 357 | } |
358 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, | 358 | static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, |
359 | Elf_Shdr *sechdrs, | 359 | Elf_Shdr *sechdrs, |
360 | const char *secstrings) | 360 | const char *secstrings) |
361 | { | 361 | { |
362 | return 0; | 362 | return 0; |
363 | } | 363 | } |
364 | static inline void percpu_modcopy(void *pcpudst, const void *src, | 364 | static inline void percpu_modcopy(void *pcpudst, const void *src, |
365 | unsigned long size) | 365 | unsigned long size) |
366 | { | 366 | { |
367 | /* pcpusec should be 0, and size of that section should be 0. */ | 367 | /* pcpusec should be 0, and size of that section should be 0. */ |
368 | BUG_ON(size != 0); | 368 | BUG_ON(size != 0); |
369 | } | 369 | } |
370 | #endif /* CONFIG_SMP */ | 370 | #endif /* CONFIG_SMP */ |
371 | 371 | ||
372 | #ifdef CONFIG_MODULE_UNLOAD | 372 | #ifdef CONFIG_MODULE_UNLOAD |
373 | /* Init the unload section of the module. */ | 373 | /* Init the unload section of the module. */ |
374 | static void module_unload_init(struct module *mod) | 374 | static void module_unload_init(struct module *mod) |
375 | { | 375 | { |
376 | unsigned int i; | 376 | unsigned int i; |
377 | 377 | ||
378 | INIT_LIST_HEAD(&mod->modules_which_use_me); | 378 | INIT_LIST_HEAD(&mod->modules_which_use_me); |
379 | for (i = 0; i < NR_CPUS; i++) | 379 | for (i = 0; i < NR_CPUS; i++) |
380 | local_set(&mod->ref[i].count, 0); | 380 | local_set(&mod->ref[i].count, 0); |
381 | /* Hold reference count during initialization. */ | 381 | /* Hold reference count during initialization. */ |
382 | local_set(&mod->ref[_smp_processor_id()].count, 1); | 382 | local_set(&mod->ref[raw_smp_processor_id()].count, 1); |
383 | /* Backwards compatibility macros put refcount during init. */ | 383 | /* Backwards compatibility macros put refcount during init. */ |
384 | mod->waiter = current; | 384 | mod->waiter = current; |
385 | } | 385 | } |
386 | 386 | ||
387 | /* modules using other modules */ | 387 | /* modules using other modules */ |
388 | struct module_use | 388 | struct module_use |
389 | { | 389 | { |
390 | struct list_head list; | 390 | struct list_head list; |
391 | struct module *module_which_uses; | 391 | struct module *module_which_uses; |
392 | }; | 392 | }; |
393 | 393 | ||
394 | /* Does a already use b? */ | 394 | /* Does a already use b? */ |
395 | static int already_uses(struct module *a, struct module *b) | 395 | static int already_uses(struct module *a, struct module *b) |
396 | { | 396 | { |
397 | struct module_use *use; | 397 | struct module_use *use; |
398 | 398 | ||
399 | list_for_each_entry(use, &b->modules_which_use_me, list) { | 399 | list_for_each_entry(use, &b->modules_which_use_me, list) { |
400 | if (use->module_which_uses == a) { | 400 | if (use->module_which_uses == a) { |
401 | DEBUGP("%s uses %s!\n", a->name, b->name); | 401 | DEBUGP("%s uses %s!\n", a->name, b->name); |
402 | return 1; | 402 | return 1; |
403 | } | 403 | } |
404 | } | 404 | } |
405 | DEBUGP("%s does not use %s!\n", a->name, b->name); | 405 | DEBUGP("%s does not use %s!\n", a->name, b->name); |
406 | return 0; | 406 | return 0; |
407 | } | 407 | } |
408 | 408 | ||
409 | /* Module a uses b */ | 409 | /* Module a uses b */ |
410 | static int use_module(struct module *a, struct module *b) | 410 | static int use_module(struct module *a, struct module *b) |
411 | { | 411 | { |
412 | struct module_use *use; | 412 | struct module_use *use; |
413 | if (b == NULL || already_uses(a, b)) return 1; | 413 | if (b == NULL || already_uses(a, b)) return 1; |
414 | 414 | ||
415 | if (!strong_try_module_get(b)) | 415 | if (!strong_try_module_get(b)) |
416 | return 0; | 416 | return 0; |
417 | 417 | ||
418 | DEBUGP("Allocating new usage for %s.\n", a->name); | 418 | DEBUGP("Allocating new usage for %s.\n", a->name); |
419 | use = kmalloc(sizeof(*use), GFP_ATOMIC); | 419 | use = kmalloc(sizeof(*use), GFP_ATOMIC); |
420 | if (!use) { | 420 | if (!use) { |
421 | printk("%s: out of memory loading\n", a->name); | 421 | printk("%s: out of memory loading\n", a->name); |
422 | module_put(b); | 422 | module_put(b); |
423 | return 0; | 423 | return 0; |
424 | } | 424 | } |
425 | 425 | ||
426 | use->module_which_uses = a; | 426 | use->module_which_uses = a; |
427 | list_add(&use->list, &b->modules_which_use_me); | 427 | list_add(&use->list, &b->modules_which_use_me); |
428 | return 1; | 428 | return 1; |
429 | } | 429 | } |
430 | 430 | ||
431 | /* Clear the unload stuff of the module. */ | 431 | /* Clear the unload stuff of the module. */ |
432 | static void module_unload_free(struct module *mod) | 432 | static void module_unload_free(struct module *mod) |
433 | { | 433 | { |
434 | struct module *i; | 434 | struct module *i; |
435 | 435 | ||
436 | list_for_each_entry(i, &modules, list) { | 436 | list_for_each_entry(i, &modules, list) { |
437 | struct module_use *use; | 437 | struct module_use *use; |
438 | 438 | ||
439 | list_for_each_entry(use, &i->modules_which_use_me, list) { | 439 | list_for_each_entry(use, &i->modules_which_use_me, list) { |
440 | if (use->module_which_uses == mod) { | 440 | if (use->module_which_uses == mod) { |
441 | DEBUGP("%s unusing %s\n", mod->name, i->name); | 441 | DEBUGP("%s unusing %s\n", mod->name, i->name); |
442 | module_put(i); | 442 | module_put(i); |
443 | list_del(&use->list); | 443 | list_del(&use->list); |
444 | kfree(use); | 444 | kfree(use); |
445 | /* There can be at most one match. */ | 445 | /* There can be at most one match. */ |
446 | break; | 446 | break; |
447 | } | 447 | } |
448 | } | 448 | } |
449 | } | 449 | } |
450 | } | 450 | } |
451 | 451 | ||
452 | #ifdef CONFIG_MODULE_FORCE_UNLOAD | 452 | #ifdef CONFIG_MODULE_FORCE_UNLOAD |
453 | static inline int try_force(unsigned int flags) | 453 | static inline int try_force(unsigned int flags) |
454 | { | 454 | { |
455 | int ret = (flags & O_TRUNC); | 455 | int ret = (flags & O_TRUNC); |
456 | if (ret) | 456 | if (ret) |
457 | tainted |= TAINT_FORCED_MODULE; | 457 | tainted |= TAINT_FORCED_MODULE; |
458 | return ret; | 458 | return ret; |
459 | } | 459 | } |
460 | #else | 460 | #else |
461 | static inline int try_force(unsigned int flags) | 461 | static inline int try_force(unsigned int flags) |
462 | { | 462 | { |
463 | return 0; | 463 | return 0; |
464 | } | 464 | } |
465 | #endif /* CONFIG_MODULE_FORCE_UNLOAD */ | 465 | #endif /* CONFIG_MODULE_FORCE_UNLOAD */ |
466 | 466 | ||
467 | struct stopref | 467 | struct stopref |
468 | { | 468 | { |
469 | struct module *mod; | 469 | struct module *mod; |
470 | int flags; | 470 | int flags; |
471 | int *forced; | 471 | int *forced; |
472 | }; | 472 | }; |
473 | 473 | ||
474 | /* Whole machine is stopped with interrupts off when this runs. */ | 474 | /* Whole machine is stopped with interrupts off when this runs. */ |
475 | static int __try_stop_module(void *_sref) | 475 | static int __try_stop_module(void *_sref) |
476 | { | 476 | { |
477 | struct stopref *sref = _sref; | 477 | struct stopref *sref = _sref; |
478 | 478 | ||
479 | /* If it's not unused, quit unless we are told to block. */ | 479 | /* If it's not unused, quit unless we are told to block. */ |
480 | if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { | 480 | if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { |
481 | if (!(*sref->forced = try_force(sref->flags))) | 481 | if (!(*sref->forced = try_force(sref->flags))) |
482 | return -EWOULDBLOCK; | 482 | return -EWOULDBLOCK; |
483 | } | 483 | } |
484 | 484 | ||
485 | /* Mark it as dying. */ | 485 | /* Mark it as dying. */ |
486 | sref->mod->state = MODULE_STATE_GOING; | 486 | sref->mod->state = MODULE_STATE_GOING; |
487 | return 0; | 487 | return 0; |
488 | } | 488 | } |
489 | 489 | ||
490 | static int try_stop_module(struct module *mod, int flags, int *forced) | 490 | static int try_stop_module(struct module *mod, int flags, int *forced) |
491 | { | 491 | { |
492 | struct stopref sref = { mod, flags, forced }; | 492 | struct stopref sref = { mod, flags, forced }; |
493 | 493 | ||
494 | return stop_machine_run(__try_stop_module, &sref, NR_CPUS); | 494 | return stop_machine_run(__try_stop_module, &sref, NR_CPUS); |
495 | } | 495 | } |
496 | 496 | ||
497 | unsigned int module_refcount(struct module *mod) | 497 | unsigned int module_refcount(struct module *mod) |
498 | { | 498 | { |
499 | unsigned int i, total = 0; | 499 | unsigned int i, total = 0; |
500 | 500 | ||
501 | for (i = 0; i < NR_CPUS; i++) | 501 | for (i = 0; i < NR_CPUS; i++) |
502 | total += local_read(&mod->ref[i].count); | 502 | total += local_read(&mod->ref[i].count); |
503 | return total; | 503 | return total; |
504 | } | 504 | } |
505 | EXPORT_SYMBOL(module_refcount); | 505 | EXPORT_SYMBOL(module_refcount); |
506 | 506 | ||
507 | /* This exists whether we can unload or not */ | 507 | /* This exists whether we can unload or not */ |
508 | static void free_module(struct module *mod); | 508 | static void free_module(struct module *mod); |
509 | 509 | ||
510 | static void wait_for_zero_refcount(struct module *mod) | 510 | static void wait_for_zero_refcount(struct module *mod) |
511 | { | 511 | { |
512 | /* Since we might sleep for some time, drop the semaphore first */ | 512 | /* Since we might sleep for some time, drop the semaphore first */ |
513 | up(&module_mutex); | 513 | up(&module_mutex); |
514 | for (;;) { | 514 | for (;;) { |
515 | DEBUGP("Looking at refcount...\n"); | 515 | DEBUGP("Looking at refcount...\n"); |
516 | set_current_state(TASK_UNINTERRUPTIBLE); | 516 | set_current_state(TASK_UNINTERRUPTIBLE); |
517 | if (module_refcount(mod) == 0) | 517 | if (module_refcount(mod) == 0) |
518 | break; | 518 | break; |
519 | schedule(); | 519 | schedule(); |
520 | } | 520 | } |
521 | current->state = TASK_RUNNING; | 521 | current->state = TASK_RUNNING; |
522 | down(&module_mutex); | 522 | down(&module_mutex); |
523 | } | 523 | } |
524 | 524 | ||
525 | asmlinkage long | 525 | asmlinkage long |
526 | sys_delete_module(const char __user *name_user, unsigned int flags) | 526 | sys_delete_module(const char __user *name_user, unsigned int flags) |
527 | { | 527 | { |
528 | struct module *mod; | 528 | struct module *mod; |
529 | char name[MODULE_NAME_LEN]; | 529 | char name[MODULE_NAME_LEN]; |
530 | int ret, forced = 0; | 530 | int ret, forced = 0; |
531 | 531 | ||
532 | if (!capable(CAP_SYS_MODULE)) | 532 | if (!capable(CAP_SYS_MODULE)) |
533 | return -EPERM; | 533 | return -EPERM; |
534 | 534 | ||
535 | if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) | 535 | if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) |
536 | return -EFAULT; | 536 | return -EFAULT; |
537 | name[MODULE_NAME_LEN-1] = '\0'; | 537 | name[MODULE_NAME_LEN-1] = '\0'; |
538 | 538 | ||
539 | if (down_interruptible(&module_mutex) != 0) | 539 | if (down_interruptible(&module_mutex) != 0) |
540 | return -EINTR; | 540 | return -EINTR; |
541 | 541 | ||
542 | mod = find_module(name); | 542 | mod = find_module(name); |
543 | if (!mod) { | 543 | if (!mod) { |
544 | ret = -ENOENT; | 544 | ret = -ENOENT; |
545 | goto out; | 545 | goto out; |
546 | } | 546 | } |
547 | 547 | ||
548 | if (!list_empty(&mod->modules_which_use_me)) { | 548 | if (!list_empty(&mod->modules_which_use_me)) { |
549 | /* Other modules depend on us: get rid of them first. */ | 549 | /* Other modules depend on us: get rid of them first. */ |
550 | ret = -EWOULDBLOCK; | 550 | ret = -EWOULDBLOCK; |
551 | goto out; | 551 | goto out; |
552 | } | 552 | } |
553 | 553 | ||
554 | /* Doing init or already dying? */ | 554 | /* Doing init or already dying? */ |
555 | if (mod->state != MODULE_STATE_LIVE) { | 555 | if (mod->state != MODULE_STATE_LIVE) { |
556 | /* FIXME: if (force), slam module count and wake up | 556 | /* FIXME: if (force), slam module count and wake up |
557 | waiter --RR */ | 557 | waiter --RR */ |
558 | DEBUGP("%s already dying\n", mod->name); | 558 | DEBUGP("%s already dying\n", mod->name); |
559 | ret = -EBUSY; | 559 | ret = -EBUSY; |
560 | goto out; | 560 | goto out; |
561 | } | 561 | } |
562 | 562 | ||
563 | /* If it has an init func, it must have an exit func to unload */ | 563 | /* If it has an init func, it must have an exit func to unload */ |
564 | if ((mod->init != NULL && mod->exit == NULL) | 564 | if ((mod->init != NULL && mod->exit == NULL) |
565 | || mod->unsafe) { | 565 | || mod->unsafe) { |
566 | forced = try_force(flags); | 566 | forced = try_force(flags); |
567 | if (!forced) { | 567 | if (!forced) { |
568 | /* This module can't be removed */ | 568 | /* This module can't be removed */ |
569 | ret = -EBUSY; | 569 | ret = -EBUSY; |
570 | goto out; | 570 | goto out; |
571 | } | 571 | } |
572 | } | 572 | } |
573 | 573 | ||
574 | /* Set this up before setting mod->state */ | 574 | /* Set this up before setting mod->state */ |
575 | mod->waiter = current; | 575 | mod->waiter = current; |
576 | 576 | ||
577 | /* Stop the machine so refcounts can't move and disable module. */ | 577 | /* Stop the machine so refcounts can't move and disable module. */ |
578 | ret = try_stop_module(mod, flags, &forced); | 578 | ret = try_stop_module(mod, flags, &forced); |
579 | if (ret != 0) | 579 | if (ret != 0) |
580 | goto out; | 580 | goto out; |
581 | 581 | ||
582 | /* Never wait if forced. */ | 582 | /* Never wait if forced. */ |
583 | if (!forced && module_refcount(mod) != 0) | 583 | if (!forced && module_refcount(mod) != 0) |
584 | wait_for_zero_refcount(mod); | 584 | wait_for_zero_refcount(mod); |
585 | 585 | ||
586 | /* Final destruction now noone is using it. */ | 586 | /* Final destruction now noone is using it. */ |
587 | if (mod->exit != NULL) { | 587 | if (mod->exit != NULL) { |
588 | up(&module_mutex); | 588 | up(&module_mutex); |
589 | mod->exit(); | 589 | mod->exit(); |
590 | down(&module_mutex); | 590 | down(&module_mutex); |
591 | } | 591 | } |
592 | free_module(mod); | 592 | free_module(mod); |
593 | 593 | ||
594 | out: | 594 | out: |
595 | up(&module_mutex); | 595 | up(&module_mutex); |
596 | return ret; | 596 | return ret; |
597 | } | 597 | } |
598 | 598 | ||
599 | static void print_unload_info(struct seq_file *m, struct module *mod) | 599 | static void print_unload_info(struct seq_file *m, struct module *mod) |
600 | { | 600 | { |
601 | struct module_use *use; | 601 | struct module_use *use; |
602 | int printed_something = 0; | 602 | int printed_something = 0; |
603 | 603 | ||
604 | seq_printf(m, " %u ", module_refcount(mod)); | 604 | seq_printf(m, " %u ", module_refcount(mod)); |
605 | 605 | ||
606 | /* Always include a trailing , so userspace can differentiate | 606 | /* Always include a trailing , so userspace can differentiate |
607 | between this and the old multi-field proc format. */ | 607 | between this and the old multi-field proc format. */ |
608 | list_for_each_entry(use, &mod->modules_which_use_me, list) { | 608 | list_for_each_entry(use, &mod->modules_which_use_me, list) { |
609 | printed_something = 1; | 609 | printed_something = 1; |
610 | seq_printf(m, "%s,", use->module_which_uses->name); | 610 | seq_printf(m, "%s,", use->module_which_uses->name); |
611 | } | 611 | } |
612 | 612 | ||
613 | if (mod->unsafe) { | 613 | if (mod->unsafe) { |
614 | printed_something = 1; | 614 | printed_something = 1; |
615 | seq_printf(m, "[unsafe],"); | 615 | seq_printf(m, "[unsafe],"); |
616 | } | 616 | } |
617 | 617 | ||
618 | if (mod->init != NULL && mod->exit == NULL) { | 618 | if (mod->init != NULL && mod->exit == NULL) { |
619 | printed_something = 1; | 619 | printed_something = 1; |
620 | seq_printf(m, "[permanent],"); | 620 | seq_printf(m, "[permanent],"); |
621 | } | 621 | } |
622 | 622 | ||
623 | if (!printed_something) | 623 | if (!printed_something) |
624 | seq_printf(m, "-"); | 624 | seq_printf(m, "-"); |
625 | } | 625 | } |
626 | 626 | ||
627 | void __symbol_put(const char *symbol) | 627 | void __symbol_put(const char *symbol) |
628 | { | 628 | { |
629 | struct module *owner; | 629 | struct module *owner; |
630 | unsigned long flags; | 630 | unsigned long flags; |
631 | const unsigned long *crc; | 631 | const unsigned long *crc; |
632 | 632 | ||
633 | spin_lock_irqsave(&modlist_lock, flags); | 633 | spin_lock_irqsave(&modlist_lock, flags); |
634 | if (!__find_symbol(symbol, &owner, &crc, 1)) | 634 | if (!__find_symbol(symbol, &owner, &crc, 1)) |
635 | BUG(); | 635 | BUG(); |
636 | module_put(owner); | 636 | module_put(owner); |
637 | spin_unlock_irqrestore(&modlist_lock, flags); | 637 | spin_unlock_irqrestore(&modlist_lock, flags); |
638 | } | 638 | } |
639 | EXPORT_SYMBOL(__symbol_put); | 639 | EXPORT_SYMBOL(__symbol_put); |
640 | 640 | ||
641 | void symbol_put_addr(void *addr) | 641 | void symbol_put_addr(void *addr) |
642 | { | 642 | { |
643 | unsigned long flags; | 643 | unsigned long flags; |
644 | 644 | ||
645 | spin_lock_irqsave(&modlist_lock, flags); | 645 | spin_lock_irqsave(&modlist_lock, flags); |
646 | if (!kernel_text_address((unsigned long)addr)) | 646 | if (!kernel_text_address((unsigned long)addr)) |
647 | BUG(); | 647 | BUG(); |
648 | 648 | ||
649 | module_put(module_text_address((unsigned long)addr)); | 649 | module_put(module_text_address((unsigned long)addr)); |
650 | spin_unlock_irqrestore(&modlist_lock, flags); | 650 | spin_unlock_irqrestore(&modlist_lock, flags); |
651 | } | 651 | } |
652 | EXPORT_SYMBOL_GPL(symbol_put_addr); | 652 | EXPORT_SYMBOL_GPL(symbol_put_addr); |
653 | 653 | ||
654 | static ssize_t show_refcnt(struct module_attribute *mattr, | 654 | static ssize_t show_refcnt(struct module_attribute *mattr, |
655 | struct module *mod, char *buffer) | 655 | struct module *mod, char *buffer) |
656 | { | 656 | { |
657 | /* sysfs holds a reference */ | 657 | /* sysfs holds a reference */ |
658 | return sprintf(buffer, "%u\n", module_refcount(mod)-1); | 658 | return sprintf(buffer, "%u\n", module_refcount(mod)-1); |
659 | } | 659 | } |
660 | 660 | ||
661 | static struct module_attribute refcnt = { | 661 | static struct module_attribute refcnt = { |
662 | .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, | 662 | .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, |
663 | .show = show_refcnt, | 663 | .show = show_refcnt, |
664 | }; | 664 | }; |
665 | 665 | ||
666 | #else /* !CONFIG_MODULE_UNLOAD */ | 666 | #else /* !CONFIG_MODULE_UNLOAD */ |
667 | static void print_unload_info(struct seq_file *m, struct module *mod) | 667 | static void print_unload_info(struct seq_file *m, struct module *mod) |
668 | { | 668 | { |
669 | /* We don't know the usage count, or what modules are using. */ | 669 | /* We don't know the usage count, or what modules are using. */ |
670 | seq_printf(m, " - -"); | 670 | seq_printf(m, " - -"); |
671 | } | 671 | } |
672 | 672 | ||
673 | static inline void module_unload_free(struct module *mod) | 673 | static inline void module_unload_free(struct module *mod) |
674 | { | 674 | { |
675 | } | 675 | } |
676 | 676 | ||
677 | static inline int use_module(struct module *a, struct module *b) | 677 | static inline int use_module(struct module *a, struct module *b) |
678 | { | 678 | { |
679 | return strong_try_module_get(b); | 679 | return strong_try_module_get(b); |
680 | } | 680 | } |
681 | 681 | ||
682 | static inline void module_unload_init(struct module *mod) | 682 | static inline void module_unload_init(struct module *mod) |
683 | { | 683 | { |
684 | } | 684 | } |
685 | #endif /* CONFIG_MODULE_UNLOAD */ | 685 | #endif /* CONFIG_MODULE_UNLOAD */ |
686 | 686 | ||
687 | #ifdef CONFIG_OBSOLETE_MODPARM | 687 | #ifdef CONFIG_OBSOLETE_MODPARM |
688 | /* Bounds checking done below */ | 688 | /* Bounds checking done below */ |
689 | static int obsparm_copy_string(const char *val, struct kernel_param *kp) | 689 | static int obsparm_copy_string(const char *val, struct kernel_param *kp) |
690 | { | 690 | { |
691 | strcpy(kp->arg, val); | 691 | strcpy(kp->arg, val); |
692 | return 0; | 692 | return 0; |
693 | } | 693 | } |
694 | 694 | ||
695 | int set_obsolete(const char *val, struct kernel_param *kp) | 695 | int set_obsolete(const char *val, struct kernel_param *kp) |
696 | { | 696 | { |
697 | unsigned int min, max; | 697 | unsigned int min, max; |
698 | unsigned int size, maxsize; | 698 | unsigned int size, maxsize; |
699 | int dummy; | 699 | int dummy; |
700 | char *endp; | 700 | char *endp; |
701 | const char *p; | 701 | const char *p; |
702 | struct obsolete_modparm *obsparm = kp->arg; | 702 | struct obsolete_modparm *obsparm = kp->arg; |
703 | 703 | ||
704 | if (!val) { | 704 | if (!val) { |
705 | printk(KERN_ERR "Parameter %s needs an argument\n", kp->name); | 705 | printk(KERN_ERR "Parameter %s needs an argument\n", kp->name); |
706 | return -EINVAL; | 706 | return -EINVAL; |
707 | } | 707 | } |
708 | 708 | ||
709 | /* type is: [min[-max]]{b,h,i,l,s} */ | 709 | /* type is: [min[-max]]{b,h,i,l,s} */ |
710 | p = obsparm->type; | 710 | p = obsparm->type; |
711 | min = simple_strtol(p, &endp, 10); | 711 | min = simple_strtol(p, &endp, 10); |
712 | if (endp == obsparm->type) | 712 | if (endp == obsparm->type) |
713 | min = max = 1; | 713 | min = max = 1; |
714 | else if (*endp == '-') { | 714 | else if (*endp == '-') { |
715 | p = endp+1; | 715 | p = endp+1; |
716 | max = simple_strtol(p, &endp, 10); | 716 | max = simple_strtol(p, &endp, 10); |
717 | } else | 717 | } else |
718 | max = min; | 718 | max = min; |
719 | switch (*endp) { | 719 | switch (*endp) { |
720 | case 'b': | 720 | case 'b': |
721 | return param_array(kp->name, val, min, max, obsparm->addr, | 721 | return param_array(kp->name, val, min, max, obsparm->addr, |
722 | 1, param_set_byte, &dummy); | 722 | 1, param_set_byte, &dummy); |
723 | case 'h': | 723 | case 'h': |
724 | return param_array(kp->name, val, min, max, obsparm->addr, | 724 | return param_array(kp->name, val, min, max, obsparm->addr, |
725 | sizeof(short), param_set_short, &dummy); | 725 | sizeof(short), param_set_short, &dummy); |
726 | case 'i': | 726 | case 'i': |
727 | return param_array(kp->name, val, min, max, obsparm->addr, | 727 | return param_array(kp->name, val, min, max, obsparm->addr, |
728 | sizeof(int), param_set_int, &dummy); | 728 | sizeof(int), param_set_int, &dummy); |
729 | case 'l': | 729 | case 'l': |
730 | return param_array(kp->name, val, min, max, obsparm->addr, | 730 | return param_array(kp->name, val, min, max, obsparm->addr, |
731 | sizeof(long), param_set_long, &dummy); | 731 | sizeof(long), param_set_long, &dummy); |
732 | case 's': | 732 | case 's': |
733 | return param_array(kp->name, val, min, max, obsparm->addr, | 733 | return param_array(kp->name, val, min, max, obsparm->addr, |
734 | sizeof(char *), param_set_charp, &dummy); | 734 | sizeof(char *), param_set_charp, &dummy); |
735 | 735 | ||
736 | case 'c': | 736 | case 'c': |
737 | /* Undocumented: 1-5c50 means 1-5 strings of up to 49 chars, | 737 | /* Undocumented: 1-5c50 means 1-5 strings of up to 49 chars, |
738 | and the decl is "char xxx[5][50];" */ | 738 | and the decl is "char xxx[5][50];" */ |
739 | p = endp+1; | 739 | p = endp+1; |
740 | maxsize = simple_strtol(p, &endp, 10); | 740 | maxsize = simple_strtol(p, &endp, 10); |
741 | /* We check lengths here (yes, this is a hack). */ | 741 | /* We check lengths here (yes, this is a hack). */ |
742 | p = val; | 742 | p = val; |
743 | while (p[size = strcspn(p, ",")]) { | 743 | while (p[size = strcspn(p, ",")]) { |
744 | if (size >= maxsize) | 744 | if (size >= maxsize) |
745 | goto oversize; | 745 | goto oversize; |
746 | p += size+1; | 746 | p += size+1; |
747 | } | 747 | } |
748 | if (size >= maxsize) | 748 | if (size >= maxsize) |
749 | goto oversize; | 749 | goto oversize; |
750 | return param_array(kp->name, val, min, max, obsparm->addr, | 750 | return param_array(kp->name, val, min, max, obsparm->addr, |
751 | maxsize, obsparm_copy_string, &dummy); | 751 | maxsize, obsparm_copy_string, &dummy); |
752 | } | 752 | } |
753 | printk(KERN_ERR "Unknown obsolete parameter type %s\n", obsparm->type); | 753 | printk(KERN_ERR "Unknown obsolete parameter type %s\n", obsparm->type); |
754 | return -EINVAL; | 754 | return -EINVAL; |
755 | oversize: | 755 | oversize: |
756 | printk(KERN_ERR | 756 | printk(KERN_ERR |
757 | "Parameter %s doesn't fit in %u chars.\n", kp->name, maxsize); | 757 | "Parameter %s doesn't fit in %u chars.\n", kp->name, maxsize); |
758 | return -EINVAL; | 758 | return -EINVAL; |
759 | } | 759 | } |
760 | 760 | ||
761 | static int obsolete_params(const char *name, | 761 | static int obsolete_params(const char *name, |
762 | char *args, | 762 | char *args, |
763 | struct obsolete_modparm obsparm[], | 763 | struct obsolete_modparm obsparm[], |
764 | unsigned int num, | 764 | unsigned int num, |
765 | Elf_Shdr *sechdrs, | 765 | Elf_Shdr *sechdrs, |
766 | unsigned int symindex, | 766 | unsigned int symindex, |
767 | const char *strtab) | 767 | const char *strtab) |
768 | { | 768 | { |
769 | struct kernel_param *kp; | 769 | struct kernel_param *kp; |
770 | unsigned int i; | 770 | unsigned int i; |
771 | int ret; | 771 | int ret; |
772 | 772 | ||
773 | kp = kmalloc(sizeof(kp[0]) * num, GFP_KERNEL); | 773 | kp = kmalloc(sizeof(kp[0]) * num, GFP_KERNEL); |
774 | if (!kp) | 774 | if (!kp) |
775 | return -ENOMEM; | 775 | return -ENOMEM; |
776 | 776 | ||
777 | for (i = 0; i < num; i++) { | 777 | for (i = 0; i < num; i++) { |
778 | char sym_name[128 + sizeof(MODULE_SYMBOL_PREFIX)]; | 778 | char sym_name[128 + sizeof(MODULE_SYMBOL_PREFIX)]; |
779 | 779 | ||
780 | snprintf(sym_name, sizeof(sym_name), "%s%s", | 780 | snprintf(sym_name, sizeof(sym_name), "%s%s", |
781 | MODULE_SYMBOL_PREFIX, obsparm[i].name); | 781 | MODULE_SYMBOL_PREFIX, obsparm[i].name); |
782 | 782 | ||
783 | kp[i].name = obsparm[i].name; | 783 | kp[i].name = obsparm[i].name; |
784 | kp[i].perm = 000; | 784 | kp[i].perm = 000; |
785 | kp[i].set = set_obsolete; | 785 | kp[i].set = set_obsolete; |
786 | kp[i].get = NULL; | 786 | kp[i].get = NULL; |
787 | obsparm[i].addr | 787 | obsparm[i].addr |
788 | = (void *)find_local_symbol(sechdrs, symindex, strtab, | 788 | = (void *)find_local_symbol(sechdrs, symindex, strtab, |
789 | sym_name); | 789 | sym_name); |
790 | if (!obsparm[i].addr) { | 790 | if (!obsparm[i].addr) { |
791 | printk("%s: falsely claims to have parameter %s\n", | 791 | printk("%s: falsely claims to have parameter %s\n", |
792 | name, obsparm[i].name); | 792 | name, obsparm[i].name); |
793 | ret = -EINVAL; | 793 | ret = -EINVAL; |
794 | goto out; | 794 | goto out; |
795 | } | 795 | } |
796 | kp[i].arg = &obsparm[i]; | 796 | kp[i].arg = &obsparm[i]; |
797 | } | 797 | } |
798 | 798 | ||
799 | ret = parse_args(name, args, kp, num, NULL); | 799 | ret = parse_args(name, args, kp, num, NULL); |
800 | out: | 800 | out: |
801 | kfree(kp); | 801 | kfree(kp); |
802 | return ret; | 802 | return ret; |
803 | } | 803 | } |
804 | #else | 804 | #else |
805 | static int obsolete_params(const char *name, | 805 | static int obsolete_params(const char *name, |
806 | char *args, | 806 | char *args, |
807 | struct obsolete_modparm obsparm[], | 807 | struct obsolete_modparm obsparm[], |
808 | unsigned int num, | 808 | unsigned int num, |
809 | Elf_Shdr *sechdrs, | 809 | Elf_Shdr *sechdrs, |
810 | unsigned int symindex, | 810 | unsigned int symindex, |
811 | const char *strtab) | 811 | const char *strtab) |
812 | { | 812 | { |
813 | if (num != 0) | 813 | if (num != 0) |
814 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", | 814 | printk(KERN_WARNING "%s: Ignoring obsolete parameters\n", |
815 | name); | 815 | name); |
816 | return 0; | 816 | return 0; |
817 | } | 817 | } |
818 | #endif /* CONFIG_OBSOLETE_MODPARM */ | 818 | #endif /* CONFIG_OBSOLETE_MODPARM */ |
819 | 819 | ||
820 | static const char vermagic[] = VERMAGIC_STRING; | 820 | static const char vermagic[] = VERMAGIC_STRING; |
821 | 821 | ||
822 | #ifdef CONFIG_MODVERSIONS | 822 | #ifdef CONFIG_MODVERSIONS |
823 | static int check_version(Elf_Shdr *sechdrs, | 823 | static int check_version(Elf_Shdr *sechdrs, |
824 | unsigned int versindex, | 824 | unsigned int versindex, |
825 | const char *symname, | 825 | const char *symname, |
826 | struct module *mod, | 826 | struct module *mod, |
827 | const unsigned long *crc) | 827 | const unsigned long *crc) |
828 | { | 828 | { |
829 | unsigned int i, num_versions; | 829 | unsigned int i, num_versions; |
830 | struct modversion_info *versions; | 830 | struct modversion_info *versions; |
831 | 831 | ||
832 | /* Exporting module didn't supply crcs? OK, we're already tainted. */ | 832 | /* Exporting module didn't supply crcs? OK, we're already tainted. */ |
833 | if (!crc) | 833 | if (!crc) |
834 | return 1; | 834 | return 1; |
835 | 835 | ||
836 | versions = (void *) sechdrs[versindex].sh_addr; | 836 | versions = (void *) sechdrs[versindex].sh_addr; |
837 | num_versions = sechdrs[versindex].sh_size | 837 | num_versions = sechdrs[versindex].sh_size |
838 | / sizeof(struct modversion_info); | 838 | / sizeof(struct modversion_info); |
839 | 839 | ||
840 | for (i = 0; i < num_versions; i++) { | 840 | for (i = 0; i < num_versions; i++) { |
841 | if (strcmp(versions[i].name, symname) != 0) | 841 | if (strcmp(versions[i].name, symname) != 0) |
842 | continue; | 842 | continue; |
843 | 843 | ||
844 | if (versions[i].crc == *crc) | 844 | if (versions[i].crc == *crc) |
845 | return 1; | 845 | return 1; |
846 | printk("%s: disagrees about version of symbol %s\n", | 846 | printk("%s: disagrees about version of symbol %s\n", |
847 | mod->name, symname); | 847 | mod->name, symname); |
848 | DEBUGP("Found checksum %lX vs module %lX\n", | 848 | DEBUGP("Found checksum %lX vs module %lX\n", |
849 | *crc, versions[i].crc); | 849 | *crc, versions[i].crc); |
850 | return 0; | 850 | return 0; |
851 | } | 851 | } |
852 | /* Not in module's version table. OK, but that taints the kernel. */ | 852 | /* Not in module's version table. OK, but that taints the kernel. */ |
853 | if (!(tainted & TAINT_FORCED_MODULE)) { | 853 | if (!(tainted & TAINT_FORCED_MODULE)) { |
854 | printk("%s: no version for \"%s\" found: kernel tainted.\n", | 854 | printk("%s: no version for \"%s\" found: kernel tainted.\n", |
855 | mod->name, symname); | 855 | mod->name, symname); |
856 | tainted |= TAINT_FORCED_MODULE; | 856 | tainted |= TAINT_FORCED_MODULE; |
857 | } | 857 | } |
858 | return 1; | 858 | return 1; |
859 | } | 859 | } |
860 | 860 | ||
861 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, | 861 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, |
862 | unsigned int versindex, | 862 | unsigned int versindex, |
863 | struct module *mod) | 863 | struct module *mod) |
864 | { | 864 | { |
865 | const unsigned long *crc; | 865 | const unsigned long *crc; |
866 | struct module *owner; | 866 | struct module *owner; |
867 | 867 | ||
868 | if (!__find_symbol("struct_module", &owner, &crc, 1)) | 868 | if (!__find_symbol("struct_module", &owner, &crc, 1)) |
869 | BUG(); | 869 | BUG(); |
870 | return check_version(sechdrs, versindex, "struct_module", mod, | 870 | return check_version(sechdrs, versindex, "struct_module", mod, |
871 | crc); | 871 | crc); |
872 | } | 872 | } |
873 | 873 | ||
874 | /* First part is kernel version, which we ignore. */ | 874 | /* First part is kernel version, which we ignore. */ |
875 | static inline int same_magic(const char *amagic, const char *bmagic) | 875 | static inline int same_magic(const char *amagic, const char *bmagic) |
876 | { | 876 | { |
877 | amagic += strcspn(amagic, " "); | 877 | amagic += strcspn(amagic, " "); |
878 | bmagic += strcspn(bmagic, " "); | 878 | bmagic += strcspn(bmagic, " "); |
879 | return strcmp(amagic, bmagic) == 0; | 879 | return strcmp(amagic, bmagic) == 0; |
880 | } | 880 | } |
881 | #else | 881 | #else |
882 | static inline int check_version(Elf_Shdr *sechdrs, | 882 | static inline int check_version(Elf_Shdr *sechdrs, |
883 | unsigned int versindex, | 883 | unsigned int versindex, |
884 | const char *symname, | 884 | const char *symname, |
885 | struct module *mod, | 885 | struct module *mod, |
886 | const unsigned long *crc) | 886 | const unsigned long *crc) |
887 | { | 887 | { |
888 | return 1; | 888 | return 1; |
889 | } | 889 | } |
890 | 890 | ||
891 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, | 891 | static inline int check_modstruct_version(Elf_Shdr *sechdrs, |
892 | unsigned int versindex, | 892 | unsigned int versindex, |
893 | struct module *mod) | 893 | struct module *mod) |
894 | { | 894 | { |
895 | return 1; | 895 | return 1; |
896 | } | 896 | } |
897 | 897 | ||
898 | static inline int same_magic(const char *amagic, const char *bmagic) | 898 | static inline int same_magic(const char *amagic, const char *bmagic) |
899 | { | 899 | { |
900 | return strcmp(amagic, bmagic) == 0; | 900 | return strcmp(amagic, bmagic) == 0; |
901 | } | 901 | } |
902 | #endif /* CONFIG_MODVERSIONS */ | 902 | #endif /* CONFIG_MODVERSIONS */ |
903 | 903 | ||
904 | /* Resolve a symbol for this module. I.e. if we find one, record usage. | 904 | /* Resolve a symbol for this module. I.e. if we find one, record usage. |
905 | Must be holding module_mutex. */ | 905 | Must be holding module_mutex. */ |
906 | static unsigned long resolve_symbol(Elf_Shdr *sechdrs, | 906 | static unsigned long resolve_symbol(Elf_Shdr *sechdrs, |
907 | unsigned int versindex, | 907 | unsigned int versindex, |
908 | const char *name, | 908 | const char *name, |
909 | struct module *mod) | 909 | struct module *mod) |
910 | { | 910 | { |
911 | struct module *owner; | 911 | struct module *owner; |
912 | unsigned long ret; | 912 | unsigned long ret; |
913 | const unsigned long *crc; | 913 | const unsigned long *crc; |
914 | 914 | ||
915 | spin_lock_irq(&modlist_lock); | 915 | spin_lock_irq(&modlist_lock); |
916 | ret = __find_symbol(name, &owner, &crc, mod->license_gplok); | 916 | ret = __find_symbol(name, &owner, &crc, mod->license_gplok); |
917 | if (ret) { | 917 | if (ret) { |
918 | /* use_module can fail due to OOM, or module unloading */ | 918 | /* use_module can fail due to OOM, or module unloading */ |
919 | if (!check_version(sechdrs, versindex, name, mod, crc) || | 919 | if (!check_version(sechdrs, versindex, name, mod, crc) || |
920 | !use_module(mod, owner)) | 920 | !use_module(mod, owner)) |
921 | ret = 0; | 921 | ret = 0; |
922 | } | 922 | } |
923 | spin_unlock_irq(&modlist_lock); | 923 | spin_unlock_irq(&modlist_lock); |
924 | return ret; | 924 | return ret; |
925 | } | 925 | } |
926 | 926 | ||
927 | 927 | ||
928 | /* | 928 | /* |
929 | * /sys/module/foo/sections stuff | 929 | * /sys/module/foo/sections stuff |
930 | * J. Corbet <corbet@lwn.net> | 930 | * J. Corbet <corbet@lwn.net> |
931 | */ | 931 | */ |
932 | #ifdef CONFIG_KALLSYMS | 932 | #ifdef CONFIG_KALLSYMS |
933 | static ssize_t module_sect_show(struct module_attribute *mattr, | 933 | static ssize_t module_sect_show(struct module_attribute *mattr, |
934 | struct module *mod, char *buf) | 934 | struct module *mod, char *buf) |
935 | { | 935 | { |
936 | struct module_sect_attr *sattr = | 936 | struct module_sect_attr *sattr = |
937 | container_of(mattr, struct module_sect_attr, mattr); | 937 | container_of(mattr, struct module_sect_attr, mattr); |
938 | return sprintf(buf, "0x%lx\n", sattr->address); | 938 | return sprintf(buf, "0x%lx\n", sattr->address); |
939 | } | 939 | } |
940 | 940 | ||
941 | static void add_sect_attrs(struct module *mod, unsigned int nsect, | 941 | static void add_sect_attrs(struct module *mod, unsigned int nsect, |
942 | char *secstrings, Elf_Shdr *sechdrs) | 942 | char *secstrings, Elf_Shdr *sechdrs) |
943 | { | 943 | { |
944 | unsigned int nloaded = 0, i, size[2]; | 944 | unsigned int nloaded = 0, i, size[2]; |
945 | struct module_sect_attrs *sect_attrs; | 945 | struct module_sect_attrs *sect_attrs; |
946 | struct module_sect_attr *sattr; | 946 | struct module_sect_attr *sattr; |
947 | struct attribute **gattr; | 947 | struct attribute **gattr; |
948 | 948 | ||
949 | /* Count loaded sections and allocate structures */ | 949 | /* Count loaded sections and allocate structures */ |
950 | for (i = 0; i < nsect; i++) | 950 | for (i = 0; i < nsect; i++) |
951 | if (sechdrs[i].sh_flags & SHF_ALLOC) | 951 | if (sechdrs[i].sh_flags & SHF_ALLOC) |
952 | nloaded++; | 952 | nloaded++; |
953 | size[0] = ALIGN(sizeof(*sect_attrs) | 953 | size[0] = ALIGN(sizeof(*sect_attrs) |
954 | + nloaded * sizeof(sect_attrs->attrs[0]), | 954 | + nloaded * sizeof(sect_attrs->attrs[0]), |
955 | sizeof(sect_attrs->grp.attrs[0])); | 955 | sizeof(sect_attrs->grp.attrs[0])); |
956 | size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); | 956 | size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]); |
957 | if (! (sect_attrs = kmalloc(size[0] + size[1], GFP_KERNEL))) | 957 | if (! (sect_attrs = kmalloc(size[0] + size[1], GFP_KERNEL))) |
958 | return; | 958 | return; |
959 | 959 | ||
960 | /* Setup section attributes. */ | 960 | /* Setup section attributes. */ |
961 | sect_attrs->grp.name = "sections"; | 961 | sect_attrs->grp.name = "sections"; |
962 | sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; | 962 | sect_attrs->grp.attrs = (void *)sect_attrs + size[0]; |
963 | 963 | ||
964 | sattr = §_attrs->attrs[0]; | 964 | sattr = §_attrs->attrs[0]; |
965 | gattr = §_attrs->grp.attrs[0]; | 965 | gattr = §_attrs->grp.attrs[0]; |
966 | for (i = 0; i < nsect; i++) { | 966 | for (i = 0; i < nsect; i++) { |
967 | if (! (sechdrs[i].sh_flags & SHF_ALLOC)) | 967 | if (! (sechdrs[i].sh_flags & SHF_ALLOC)) |
968 | continue; | 968 | continue; |
969 | sattr->address = sechdrs[i].sh_addr; | 969 | sattr->address = sechdrs[i].sh_addr; |
970 | strlcpy(sattr->name, secstrings + sechdrs[i].sh_name, | 970 | strlcpy(sattr->name, secstrings + sechdrs[i].sh_name, |
971 | MODULE_SECT_NAME_LEN); | 971 | MODULE_SECT_NAME_LEN); |
972 | sattr->mattr.show = module_sect_show; | 972 | sattr->mattr.show = module_sect_show; |
973 | sattr->mattr.store = NULL; | 973 | sattr->mattr.store = NULL; |
974 | sattr->mattr.attr.name = sattr->name; | 974 | sattr->mattr.attr.name = sattr->name; |
975 | sattr->mattr.attr.owner = mod; | 975 | sattr->mattr.attr.owner = mod; |
976 | sattr->mattr.attr.mode = S_IRUGO; | 976 | sattr->mattr.attr.mode = S_IRUGO; |
977 | *(gattr++) = &(sattr++)->mattr.attr; | 977 | *(gattr++) = &(sattr++)->mattr.attr; |
978 | } | 978 | } |
979 | *gattr = NULL; | 979 | *gattr = NULL; |
980 | 980 | ||
981 | if (sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp)) | 981 | if (sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp)) |
982 | goto out; | 982 | goto out; |
983 | 983 | ||
984 | mod->sect_attrs = sect_attrs; | 984 | mod->sect_attrs = sect_attrs; |
985 | return; | 985 | return; |
986 | out: | 986 | out: |
987 | kfree(sect_attrs); | 987 | kfree(sect_attrs); |
988 | } | 988 | } |
989 | 989 | ||
990 | static void remove_sect_attrs(struct module *mod) | 990 | static void remove_sect_attrs(struct module *mod) |
991 | { | 991 | { |
992 | if (mod->sect_attrs) { | 992 | if (mod->sect_attrs) { |
993 | sysfs_remove_group(&mod->mkobj.kobj, | 993 | sysfs_remove_group(&mod->mkobj.kobj, |
994 | &mod->sect_attrs->grp); | 994 | &mod->sect_attrs->grp); |
995 | /* We are positive that no one is using any sect attrs | 995 | /* We are positive that no one is using any sect attrs |
996 | * at this point. Deallocate immediately. */ | 996 | * at this point. Deallocate immediately. */ |
997 | kfree(mod->sect_attrs); | 997 | kfree(mod->sect_attrs); |
998 | mod->sect_attrs = NULL; | 998 | mod->sect_attrs = NULL; |
999 | } | 999 | } |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | 1002 | ||
1003 | #else | 1003 | #else |
1004 | static inline void add_sect_attrs(struct module *mod, unsigned int nsect, | 1004 | static inline void add_sect_attrs(struct module *mod, unsigned int nsect, |
1005 | char *sectstrings, Elf_Shdr *sechdrs) | 1005 | char *sectstrings, Elf_Shdr *sechdrs) |
1006 | { | 1006 | { |
1007 | } | 1007 | } |
1008 | 1008 | ||
1009 | static inline void remove_sect_attrs(struct module *mod) | 1009 | static inline void remove_sect_attrs(struct module *mod) |
1010 | { | 1010 | { |
1011 | } | 1011 | } |
1012 | #endif /* CONFIG_KALLSYMS */ | 1012 | #endif /* CONFIG_KALLSYMS */ |
1013 | 1013 | ||
1014 | 1014 | ||
1015 | #ifdef CONFIG_MODULE_UNLOAD | 1015 | #ifdef CONFIG_MODULE_UNLOAD |
1016 | static inline int module_add_refcnt_attr(struct module *mod) | 1016 | static inline int module_add_refcnt_attr(struct module *mod) |
1017 | { | 1017 | { |
1018 | return sysfs_create_file(&mod->mkobj.kobj, &refcnt.attr); | 1018 | return sysfs_create_file(&mod->mkobj.kobj, &refcnt.attr); |
1019 | } | 1019 | } |
1020 | static void module_remove_refcnt_attr(struct module *mod) | 1020 | static void module_remove_refcnt_attr(struct module *mod) |
1021 | { | 1021 | { |
1022 | return sysfs_remove_file(&mod->mkobj.kobj, &refcnt.attr); | 1022 | return sysfs_remove_file(&mod->mkobj.kobj, &refcnt.attr); |
1023 | } | 1023 | } |
1024 | #else | 1024 | #else |
1025 | static inline int module_add_refcnt_attr(struct module *mod) | 1025 | static inline int module_add_refcnt_attr(struct module *mod) |
1026 | { | 1026 | { |
1027 | return 0; | 1027 | return 0; |
1028 | } | 1028 | } |
1029 | static void module_remove_refcnt_attr(struct module *mod) | 1029 | static void module_remove_refcnt_attr(struct module *mod) |
1030 | { | 1030 | { |
1031 | } | 1031 | } |
1032 | #endif | 1032 | #endif |
1033 | 1033 | ||
1034 | 1034 | ||
1035 | static int mod_sysfs_setup(struct module *mod, | 1035 | static int mod_sysfs_setup(struct module *mod, |
1036 | struct kernel_param *kparam, | 1036 | struct kernel_param *kparam, |
1037 | unsigned int num_params) | 1037 | unsigned int num_params) |
1038 | { | 1038 | { |
1039 | int err; | 1039 | int err; |
1040 | 1040 | ||
1041 | memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); | 1041 | memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj)); |
1042 | err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); | 1042 | err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name); |
1043 | if (err) | 1043 | if (err) |
1044 | goto out; | 1044 | goto out; |
1045 | kobj_set_kset_s(&mod->mkobj, module_subsys); | 1045 | kobj_set_kset_s(&mod->mkobj, module_subsys); |
1046 | mod->mkobj.mod = mod; | 1046 | mod->mkobj.mod = mod; |
1047 | err = kobject_register(&mod->mkobj.kobj); | 1047 | err = kobject_register(&mod->mkobj.kobj); |
1048 | if (err) | 1048 | if (err) |
1049 | goto out; | 1049 | goto out; |
1050 | 1050 | ||
1051 | err = module_add_refcnt_attr(mod); | 1051 | err = module_add_refcnt_attr(mod); |
1052 | if (err) | 1052 | if (err) |
1053 | goto out_unreg; | 1053 | goto out_unreg; |
1054 | 1054 | ||
1055 | err = module_param_sysfs_setup(mod, kparam, num_params); | 1055 | err = module_param_sysfs_setup(mod, kparam, num_params); |
1056 | if (err) | 1056 | if (err) |
1057 | goto out_unreg; | 1057 | goto out_unreg; |
1058 | 1058 | ||
1059 | return 0; | 1059 | return 0; |
1060 | 1060 | ||
1061 | out_unreg: | 1061 | out_unreg: |
1062 | kobject_unregister(&mod->mkobj.kobj); | 1062 | kobject_unregister(&mod->mkobj.kobj); |
1063 | out: | 1063 | out: |
1064 | return err; | 1064 | return err; |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | static void mod_kobject_remove(struct module *mod) | 1067 | static void mod_kobject_remove(struct module *mod) |
1068 | { | 1068 | { |
1069 | module_remove_refcnt_attr(mod); | 1069 | module_remove_refcnt_attr(mod); |
1070 | module_param_sysfs_remove(mod); | 1070 | module_param_sysfs_remove(mod); |
1071 | 1071 | ||
1072 | kobject_unregister(&mod->mkobj.kobj); | 1072 | kobject_unregister(&mod->mkobj.kobj); |
1073 | } | 1073 | } |
1074 | 1074 | ||
1075 | /* | 1075 | /* |
1076 | * unlink the module with the whole machine is stopped with interrupts off | 1076 | * unlink the module with the whole machine is stopped with interrupts off |
1077 | * - this defends against kallsyms not taking locks | 1077 | * - this defends against kallsyms not taking locks |
1078 | */ | 1078 | */ |
1079 | static int __unlink_module(void *_mod) | 1079 | static int __unlink_module(void *_mod) |
1080 | { | 1080 | { |
1081 | struct module *mod = _mod; | 1081 | struct module *mod = _mod; |
1082 | list_del(&mod->list); | 1082 | list_del(&mod->list); |
1083 | return 0; | 1083 | return 0; |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | /* Free a module, remove from lists, etc (must hold module mutex). */ | 1086 | /* Free a module, remove from lists, etc (must hold module mutex). */ |
1087 | static void free_module(struct module *mod) | 1087 | static void free_module(struct module *mod) |
1088 | { | 1088 | { |
1089 | /* Delete from various lists */ | 1089 | /* Delete from various lists */ |
1090 | stop_machine_run(__unlink_module, mod, NR_CPUS); | 1090 | stop_machine_run(__unlink_module, mod, NR_CPUS); |
1091 | remove_sect_attrs(mod); | 1091 | remove_sect_attrs(mod); |
1092 | mod_kobject_remove(mod); | 1092 | mod_kobject_remove(mod); |
1093 | 1093 | ||
1094 | /* Arch-specific cleanup. */ | 1094 | /* Arch-specific cleanup. */ |
1095 | module_arch_cleanup(mod); | 1095 | module_arch_cleanup(mod); |
1096 | 1096 | ||
1097 | /* Module unload stuff */ | 1097 | /* Module unload stuff */ |
1098 | module_unload_free(mod); | 1098 | module_unload_free(mod); |
1099 | 1099 | ||
1100 | /* This may be NULL, but that's OK */ | 1100 | /* This may be NULL, but that's OK */ |
1101 | module_free(mod, mod->module_init); | 1101 | module_free(mod, mod->module_init); |
1102 | kfree(mod->args); | 1102 | kfree(mod->args); |
1103 | if (mod->percpu) | 1103 | if (mod->percpu) |
1104 | percpu_modfree(mod->percpu); | 1104 | percpu_modfree(mod->percpu); |
1105 | 1105 | ||
1106 | /* Finally, free the core (containing the module structure) */ | 1106 | /* Finally, free the core (containing the module structure) */ |
1107 | module_free(mod, mod->module_core); | 1107 | module_free(mod, mod->module_core); |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | void *__symbol_get(const char *symbol) | 1110 | void *__symbol_get(const char *symbol) |
1111 | { | 1111 | { |
1112 | struct module *owner; | 1112 | struct module *owner; |
1113 | unsigned long value, flags; | 1113 | unsigned long value, flags; |
1114 | const unsigned long *crc; | 1114 | const unsigned long *crc; |
1115 | 1115 | ||
1116 | spin_lock_irqsave(&modlist_lock, flags); | 1116 | spin_lock_irqsave(&modlist_lock, flags); |
1117 | value = __find_symbol(symbol, &owner, &crc, 1); | 1117 | value = __find_symbol(symbol, &owner, &crc, 1); |
1118 | if (value && !strong_try_module_get(owner)) | 1118 | if (value && !strong_try_module_get(owner)) |
1119 | value = 0; | 1119 | value = 0; |
1120 | spin_unlock_irqrestore(&modlist_lock, flags); | 1120 | spin_unlock_irqrestore(&modlist_lock, flags); |
1121 | 1121 | ||
1122 | return (void *)value; | 1122 | return (void *)value; |
1123 | } | 1123 | } |
1124 | EXPORT_SYMBOL_GPL(__symbol_get); | 1124 | EXPORT_SYMBOL_GPL(__symbol_get); |
1125 | 1125 | ||
1126 | /* Change all symbols so that sh_value encodes the pointer directly. */ | 1126 | /* Change all symbols so that sh_value encodes the pointer directly. */ |
1127 | static int simplify_symbols(Elf_Shdr *sechdrs, | 1127 | static int simplify_symbols(Elf_Shdr *sechdrs, |
1128 | unsigned int symindex, | 1128 | unsigned int symindex, |
1129 | const char *strtab, | 1129 | const char *strtab, |
1130 | unsigned int versindex, | 1130 | unsigned int versindex, |
1131 | unsigned int pcpuindex, | 1131 | unsigned int pcpuindex, |
1132 | struct module *mod) | 1132 | struct module *mod) |
1133 | { | 1133 | { |
1134 | Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; | 1134 | Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; |
1135 | unsigned long secbase; | 1135 | unsigned long secbase; |
1136 | unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); | 1136 | unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); |
1137 | int ret = 0; | 1137 | int ret = 0; |
1138 | 1138 | ||
1139 | for (i = 1; i < n; i++) { | 1139 | for (i = 1; i < n; i++) { |
1140 | switch (sym[i].st_shndx) { | 1140 | switch (sym[i].st_shndx) { |
1141 | case SHN_COMMON: | 1141 | case SHN_COMMON: |
1142 | /* We compiled with -fno-common. These are not | 1142 | /* We compiled with -fno-common. These are not |
1143 | supposed to happen. */ | 1143 | supposed to happen. */ |
1144 | DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); | 1144 | DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); |
1145 | printk("%s: please compile with -fno-common\n", | 1145 | printk("%s: please compile with -fno-common\n", |
1146 | mod->name); | 1146 | mod->name); |
1147 | ret = -ENOEXEC; | 1147 | ret = -ENOEXEC; |
1148 | break; | 1148 | break; |
1149 | 1149 | ||
1150 | case SHN_ABS: | 1150 | case SHN_ABS: |
1151 | /* Don't need to do anything */ | 1151 | /* Don't need to do anything */ |
1152 | DEBUGP("Absolute symbol: 0x%08lx\n", | 1152 | DEBUGP("Absolute symbol: 0x%08lx\n", |
1153 | (long)sym[i].st_value); | 1153 | (long)sym[i].st_value); |
1154 | break; | 1154 | break; |
1155 | 1155 | ||
1156 | case SHN_UNDEF: | 1156 | case SHN_UNDEF: |
1157 | sym[i].st_value | 1157 | sym[i].st_value |
1158 | = resolve_symbol(sechdrs, versindex, | 1158 | = resolve_symbol(sechdrs, versindex, |
1159 | strtab + sym[i].st_name, mod); | 1159 | strtab + sym[i].st_name, mod); |
1160 | 1160 | ||
1161 | /* Ok if resolved. */ | 1161 | /* Ok if resolved. */ |
1162 | if (sym[i].st_value != 0) | 1162 | if (sym[i].st_value != 0) |
1163 | break; | 1163 | break; |
1164 | /* Ok if weak. */ | 1164 | /* Ok if weak. */ |
1165 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) | 1165 | if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) |
1166 | break; | 1166 | break; |
1167 | 1167 | ||
1168 | printk(KERN_WARNING "%s: Unknown symbol %s\n", | 1168 | printk(KERN_WARNING "%s: Unknown symbol %s\n", |
1169 | mod->name, strtab + sym[i].st_name); | 1169 | mod->name, strtab + sym[i].st_name); |
1170 | ret = -ENOENT; | 1170 | ret = -ENOENT; |
1171 | break; | 1171 | break; |
1172 | 1172 | ||
1173 | default: | 1173 | default: |
1174 | /* Divert to percpu allocation if a percpu var. */ | 1174 | /* Divert to percpu allocation if a percpu var. */ |
1175 | if (sym[i].st_shndx == pcpuindex) | 1175 | if (sym[i].st_shndx == pcpuindex) |
1176 | secbase = (unsigned long)mod->percpu; | 1176 | secbase = (unsigned long)mod->percpu; |
1177 | else | 1177 | else |
1178 | secbase = sechdrs[sym[i].st_shndx].sh_addr; | 1178 | secbase = sechdrs[sym[i].st_shndx].sh_addr; |
1179 | sym[i].st_value += secbase; | 1179 | sym[i].st_value += secbase; |
1180 | break; | 1180 | break; |
1181 | } | 1181 | } |
1182 | } | 1182 | } |
1183 | 1183 | ||
1184 | return ret; | 1184 | return ret; |
1185 | } | 1185 | } |
1186 | 1186 | ||
1187 | /* Update size with this section: return offset. */ | 1187 | /* Update size with this section: return offset. */ |
1188 | static long get_offset(unsigned long *size, Elf_Shdr *sechdr) | 1188 | static long get_offset(unsigned long *size, Elf_Shdr *sechdr) |
1189 | { | 1189 | { |
1190 | long ret; | 1190 | long ret; |
1191 | 1191 | ||
1192 | ret = ALIGN(*size, sechdr->sh_addralign ?: 1); | 1192 | ret = ALIGN(*size, sechdr->sh_addralign ?: 1); |
1193 | *size = ret + sechdr->sh_size; | 1193 | *size = ret + sechdr->sh_size; |
1194 | return ret; | 1194 | return ret; |
1195 | } | 1195 | } |
1196 | 1196 | ||
1197 | /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld | 1197 | /* Lay out the SHF_ALLOC sections in a way not dissimilar to how ld |
1198 | might -- code, read-only data, read-write data, small data. Tally | 1198 | might -- code, read-only data, read-write data, small data. Tally |
1199 | sizes, and place the offsets into sh_entsize fields: high bit means it | 1199 | sizes, and place the offsets into sh_entsize fields: high bit means it |
1200 | belongs in init. */ | 1200 | belongs in init. */ |
1201 | static void layout_sections(struct module *mod, | 1201 | static void layout_sections(struct module *mod, |
1202 | const Elf_Ehdr *hdr, | 1202 | const Elf_Ehdr *hdr, |
1203 | Elf_Shdr *sechdrs, | 1203 | Elf_Shdr *sechdrs, |
1204 | const char *secstrings) | 1204 | const char *secstrings) |
1205 | { | 1205 | { |
1206 | static unsigned long const masks[][2] = { | 1206 | static unsigned long const masks[][2] = { |
1207 | /* NOTE: all executable code must be the first section | 1207 | /* NOTE: all executable code must be the first section |
1208 | * in this array; otherwise modify the text_size | 1208 | * in this array; otherwise modify the text_size |
1209 | * finder in the two loops below */ | 1209 | * finder in the two loops below */ |
1210 | { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, | 1210 | { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, |
1211 | { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, | 1211 | { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, |
1212 | { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, | 1212 | { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, |
1213 | { ARCH_SHF_SMALL | SHF_ALLOC, 0 } | 1213 | { ARCH_SHF_SMALL | SHF_ALLOC, 0 } |
1214 | }; | 1214 | }; |
1215 | unsigned int m, i; | 1215 | unsigned int m, i; |
1216 | 1216 | ||
1217 | for (i = 0; i < hdr->e_shnum; i++) | 1217 | for (i = 0; i < hdr->e_shnum; i++) |
1218 | sechdrs[i].sh_entsize = ~0UL; | 1218 | sechdrs[i].sh_entsize = ~0UL; |
1219 | 1219 | ||
1220 | DEBUGP("Core section allocation order:\n"); | 1220 | DEBUGP("Core section allocation order:\n"); |
1221 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { | 1221 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { |
1222 | for (i = 0; i < hdr->e_shnum; ++i) { | 1222 | for (i = 0; i < hdr->e_shnum; ++i) { |
1223 | Elf_Shdr *s = &sechdrs[i]; | 1223 | Elf_Shdr *s = &sechdrs[i]; |
1224 | 1224 | ||
1225 | if ((s->sh_flags & masks[m][0]) != masks[m][0] | 1225 | if ((s->sh_flags & masks[m][0]) != masks[m][0] |
1226 | || (s->sh_flags & masks[m][1]) | 1226 | || (s->sh_flags & masks[m][1]) |
1227 | || s->sh_entsize != ~0UL | 1227 | || s->sh_entsize != ~0UL |
1228 | || strncmp(secstrings + s->sh_name, | 1228 | || strncmp(secstrings + s->sh_name, |
1229 | ".init", 5) == 0) | 1229 | ".init", 5) == 0) |
1230 | continue; | 1230 | continue; |
1231 | s->sh_entsize = get_offset(&mod->core_size, s); | 1231 | s->sh_entsize = get_offset(&mod->core_size, s); |
1232 | DEBUGP("\t%s\n", secstrings + s->sh_name); | 1232 | DEBUGP("\t%s\n", secstrings + s->sh_name); |
1233 | } | 1233 | } |
1234 | if (m == 0) | 1234 | if (m == 0) |
1235 | mod->core_text_size = mod->core_size; | 1235 | mod->core_text_size = mod->core_size; |
1236 | } | 1236 | } |
1237 | 1237 | ||
1238 | DEBUGP("Init section allocation order:\n"); | 1238 | DEBUGP("Init section allocation order:\n"); |
1239 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { | 1239 | for (m = 0; m < ARRAY_SIZE(masks); ++m) { |
1240 | for (i = 0; i < hdr->e_shnum; ++i) { | 1240 | for (i = 0; i < hdr->e_shnum; ++i) { |
1241 | Elf_Shdr *s = &sechdrs[i]; | 1241 | Elf_Shdr *s = &sechdrs[i]; |
1242 | 1242 | ||
1243 | if ((s->sh_flags & masks[m][0]) != masks[m][0] | 1243 | if ((s->sh_flags & masks[m][0]) != masks[m][0] |
1244 | || (s->sh_flags & masks[m][1]) | 1244 | || (s->sh_flags & masks[m][1]) |
1245 | || s->sh_entsize != ~0UL | 1245 | || s->sh_entsize != ~0UL |
1246 | || strncmp(secstrings + s->sh_name, | 1246 | || strncmp(secstrings + s->sh_name, |
1247 | ".init", 5) != 0) | 1247 | ".init", 5) != 0) |
1248 | continue; | 1248 | continue; |
1249 | s->sh_entsize = (get_offset(&mod->init_size, s) | 1249 | s->sh_entsize = (get_offset(&mod->init_size, s) |
1250 | | INIT_OFFSET_MASK); | 1250 | | INIT_OFFSET_MASK); |
1251 | DEBUGP("\t%s\n", secstrings + s->sh_name); | 1251 | DEBUGP("\t%s\n", secstrings + s->sh_name); |
1252 | } | 1252 | } |
1253 | if (m == 0) | 1253 | if (m == 0) |
1254 | mod->init_text_size = mod->init_size; | 1254 | mod->init_text_size = mod->init_size; |
1255 | } | 1255 | } |
1256 | } | 1256 | } |
1257 | 1257 | ||
1258 | static inline int license_is_gpl_compatible(const char *license) | 1258 | static inline int license_is_gpl_compatible(const char *license) |
1259 | { | 1259 | { |
1260 | return (strcmp(license, "GPL") == 0 | 1260 | return (strcmp(license, "GPL") == 0 |
1261 | || strcmp(license, "GPL v2") == 0 | 1261 | || strcmp(license, "GPL v2") == 0 |
1262 | || strcmp(license, "GPL and additional rights") == 0 | 1262 | || strcmp(license, "GPL and additional rights") == 0 |
1263 | || strcmp(license, "Dual BSD/GPL") == 0 | 1263 | || strcmp(license, "Dual BSD/GPL") == 0 |
1264 | || strcmp(license, "Dual MPL/GPL") == 0); | 1264 | || strcmp(license, "Dual MPL/GPL") == 0); |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | static void set_license(struct module *mod, const char *license) | 1267 | static void set_license(struct module *mod, const char *license) |
1268 | { | 1268 | { |
1269 | if (!license) | 1269 | if (!license) |
1270 | license = "unspecified"; | 1270 | license = "unspecified"; |
1271 | 1271 | ||
1272 | mod->license_gplok = license_is_gpl_compatible(license); | 1272 | mod->license_gplok = license_is_gpl_compatible(license); |
1273 | if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) { | 1273 | if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) { |
1274 | printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", | 1274 | printk(KERN_WARNING "%s: module license '%s' taints kernel.\n", |
1275 | mod->name, license); | 1275 | mod->name, license); |
1276 | tainted |= TAINT_PROPRIETARY_MODULE; | 1276 | tainted |= TAINT_PROPRIETARY_MODULE; |
1277 | } | 1277 | } |
1278 | } | 1278 | } |
1279 | 1279 | ||
1280 | /* Parse tag=value strings from .modinfo section */ | 1280 | /* Parse tag=value strings from .modinfo section */ |
1281 | static char *next_string(char *string, unsigned long *secsize) | 1281 | static char *next_string(char *string, unsigned long *secsize) |
1282 | { | 1282 | { |
1283 | /* Skip non-zero chars */ | 1283 | /* Skip non-zero chars */ |
1284 | while (string[0]) { | 1284 | while (string[0]) { |
1285 | string++; | 1285 | string++; |
1286 | if ((*secsize)-- <= 1) | 1286 | if ((*secsize)-- <= 1) |
1287 | return NULL; | 1287 | return NULL; |
1288 | } | 1288 | } |
1289 | 1289 | ||
1290 | /* Skip any zero padding. */ | 1290 | /* Skip any zero padding. */ |
1291 | while (!string[0]) { | 1291 | while (!string[0]) { |
1292 | string++; | 1292 | string++; |
1293 | if ((*secsize)-- <= 1) | 1293 | if ((*secsize)-- <= 1) |
1294 | return NULL; | 1294 | return NULL; |
1295 | } | 1295 | } |
1296 | return string; | 1296 | return string; |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | static char *get_modinfo(Elf_Shdr *sechdrs, | 1299 | static char *get_modinfo(Elf_Shdr *sechdrs, |
1300 | unsigned int info, | 1300 | unsigned int info, |
1301 | const char *tag) | 1301 | const char *tag) |
1302 | { | 1302 | { |
1303 | char *p; | 1303 | char *p; |
1304 | unsigned int taglen = strlen(tag); | 1304 | unsigned int taglen = strlen(tag); |
1305 | unsigned long size = sechdrs[info].sh_size; | 1305 | unsigned long size = sechdrs[info].sh_size; |
1306 | 1306 | ||
1307 | for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { | 1307 | for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) { |
1308 | if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') | 1308 | if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=') |
1309 | return p + taglen + 1; | 1309 | return p + taglen + 1; |
1310 | } | 1310 | } |
1311 | return NULL; | 1311 | return NULL; |
1312 | } | 1312 | } |
1313 | 1313 | ||
1314 | #ifdef CONFIG_KALLSYMS | 1314 | #ifdef CONFIG_KALLSYMS |
1315 | int is_exported(const char *name, const struct module *mod) | 1315 | int is_exported(const char *name, const struct module *mod) |
1316 | { | 1316 | { |
1317 | unsigned int i; | 1317 | unsigned int i; |
1318 | 1318 | ||
1319 | if (!mod) { | 1319 | if (!mod) { |
1320 | for (i = 0; __start___ksymtab+i < __stop___ksymtab; i++) | 1320 | for (i = 0; __start___ksymtab+i < __stop___ksymtab; i++) |
1321 | if (strcmp(__start___ksymtab[i].name, name) == 0) | 1321 | if (strcmp(__start___ksymtab[i].name, name) == 0) |
1322 | return 1; | 1322 | return 1; |
1323 | return 0; | 1323 | return 0; |
1324 | } | 1324 | } |
1325 | for (i = 0; i < mod->num_syms; i++) | 1325 | for (i = 0; i < mod->num_syms; i++) |
1326 | if (strcmp(mod->syms[i].name, name) == 0) | 1326 | if (strcmp(mod->syms[i].name, name) == 0) |
1327 | return 1; | 1327 | return 1; |
1328 | return 0; | 1328 | return 0; |
1329 | } | 1329 | } |
1330 | 1330 | ||
1331 | /* As per nm */ | 1331 | /* As per nm */ |
1332 | static char elf_type(const Elf_Sym *sym, | 1332 | static char elf_type(const Elf_Sym *sym, |
1333 | Elf_Shdr *sechdrs, | 1333 | Elf_Shdr *sechdrs, |
1334 | const char *secstrings, | 1334 | const char *secstrings, |
1335 | struct module *mod) | 1335 | struct module *mod) |
1336 | { | 1336 | { |
1337 | if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { | 1337 | if (ELF_ST_BIND(sym->st_info) == STB_WEAK) { |
1338 | if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) | 1338 | if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT) |
1339 | return 'v'; | 1339 | return 'v'; |
1340 | else | 1340 | else |
1341 | return 'w'; | 1341 | return 'w'; |
1342 | } | 1342 | } |
1343 | if (sym->st_shndx == SHN_UNDEF) | 1343 | if (sym->st_shndx == SHN_UNDEF) |
1344 | return 'U'; | 1344 | return 'U'; |
1345 | if (sym->st_shndx == SHN_ABS) | 1345 | if (sym->st_shndx == SHN_ABS) |
1346 | return 'a'; | 1346 | return 'a'; |
1347 | if (sym->st_shndx >= SHN_LORESERVE) | 1347 | if (sym->st_shndx >= SHN_LORESERVE) |
1348 | return '?'; | 1348 | return '?'; |
1349 | if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) | 1349 | if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR) |
1350 | return 't'; | 1350 | return 't'; |
1351 | if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC | 1351 | if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC |
1352 | && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { | 1352 | && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) { |
1353 | if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) | 1353 | if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE)) |
1354 | return 'r'; | 1354 | return 'r'; |
1355 | else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) | 1355 | else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) |
1356 | return 'g'; | 1356 | return 'g'; |
1357 | else | 1357 | else |
1358 | return 'd'; | 1358 | return 'd'; |
1359 | } | 1359 | } |
1360 | if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { | 1360 | if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { |
1361 | if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) | 1361 | if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL) |
1362 | return 's'; | 1362 | return 's'; |
1363 | else | 1363 | else |
1364 | return 'b'; | 1364 | return 'b'; |
1365 | } | 1365 | } |
1366 | if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, | 1366 | if (strncmp(secstrings + sechdrs[sym->st_shndx].sh_name, |
1367 | ".debug", strlen(".debug")) == 0) | 1367 | ".debug", strlen(".debug")) == 0) |
1368 | return 'n'; | 1368 | return 'n'; |
1369 | return '?'; | 1369 | return '?'; |
1370 | } | 1370 | } |
1371 | 1371 | ||
1372 | static void add_kallsyms(struct module *mod, | 1372 | static void add_kallsyms(struct module *mod, |
1373 | Elf_Shdr *sechdrs, | 1373 | Elf_Shdr *sechdrs, |
1374 | unsigned int symindex, | 1374 | unsigned int symindex, |
1375 | unsigned int strindex, | 1375 | unsigned int strindex, |
1376 | const char *secstrings) | 1376 | const char *secstrings) |
1377 | { | 1377 | { |
1378 | unsigned int i; | 1378 | unsigned int i; |
1379 | 1379 | ||
1380 | mod->symtab = (void *)sechdrs[symindex].sh_addr; | 1380 | mod->symtab = (void *)sechdrs[symindex].sh_addr; |
1381 | mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); | 1381 | mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); |
1382 | mod->strtab = (void *)sechdrs[strindex].sh_addr; | 1382 | mod->strtab = (void *)sechdrs[strindex].sh_addr; |
1383 | 1383 | ||
1384 | /* Set types up while we still have access to sections. */ | 1384 | /* Set types up while we still have access to sections. */ |
1385 | for (i = 0; i < mod->num_symtab; i++) | 1385 | for (i = 0; i < mod->num_symtab; i++) |
1386 | mod->symtab[i].st_info | 1386 | mod->symtab[i].st_info |
1387 | = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); | 1387 | = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); |
1388 | } | 1388 | } |
1389 | #else | 1389 | #else |
1390 | static inline void add_kallsyms(struct module *mod, | 1390 | static inline void add_kallsyms(struct module *mod, |
1391 | Elf_Shdr *sechdrs, | 1391 | Elf_Shdr *sechdrs, |
1392 | unsigned int symindex, | 1392 | unsigned int symindex, |
1393 | unsigned int strindex, | 1393 | unsigned int strindex, |
1394 | const char *secstrings) | 1394 | const char *secstrings) |
1395 | { | 1395 | { |
1396 | } | 1396 | } |
1397 | #endif /* CONFIG_KALLSYMS */ | 1397 | #endif /* CONFIG_KALLSYMS */ |
1398 | 1398 | ||
1399 | /* Allocate and load the module: note that size of section 0 is always | 1399 | /* Allocate and load the module: note that size of section 0 is always |
1400 | zero, and we rely on this for optional sections. */ | 1400 | zero, and we rely on this for optional sections. */ |
1401 | static struct module *load_module(void __user *umod, | 1401 | static struct module *load_module(void __user *umod, |
1402 | unsigned long len, | 1402 | unsigned long len, |
1403 | const char __user *uargs) | 1403 | const char __user *uargs) |
1404 | { | 1404 | { |
1405 | Elf_Ehdr *hdr; | 1405 | Elf_Ehdr *hdr; |
1406 | Elf_Shdr *sechdrs; | 1406 | Elf_Shdr *sechdrs; |
1407 | char *secstrings, *args, *modmagic, *strtab = NULL; | 1407 | char *secstrings, *args, *modmagic, *strtab = NULL; |
1408 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, | 1408 | unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, |
1409 | exportindex, modindex, obsparmindex, infoindex, gplindex, | 1409 | exportindex, modindex, obsparmindex, infoindex, gplindex, |
1410 | crcindex, gplcrcindex, versindex, pcpuindex; | 1410 | crcindex, gplcrcindex, versindex, pcpuindex; |
1411 | long arglen; | 1411 | long arglen; |
1412 | struct module *mod; | 1412 | struct module *mod; |
1413 | long err = 0; | 1413 | long err = 0; |
1414 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ | 1414 | void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ |
1415 | struct exception_table_entry *extable; | 1415 | struct exception_table_entry *extable; |
1416 | 1416 | ||
1417 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", | 1417 | DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", |
1418 | umod, len, uargs); | 1418 | umod, len, uargs); |
1419 | if (len < sizeof(*hdr)) | 1419 | if (len < sizeof(*hdr)) |
1420 | return ERR_PTR(-ENOEXEC); | 1420 | return ERR_PTR(-ENOEXEC); |
1421 | 1421 | ||
1422 | /* Suck in entire file: we'll want most of it. */ | 1422 | /* Suck in entire file: we'll want most of it. */ |
1423 | /* vmalloc barfs on "unusual" numbers. Check here */ | 1423 | /* vmalloc barfs on "unusual" numbers. Check here */ |
1424 | if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) | 1424 | if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL) |
1425 | return ERR_PTR(-ENOMEM); | 1425 | return ERR_PTR(-ENOMEM); |
1426 | if (copy_from_user(hdr, umod, len) != 0) { | 1426 | if (copy_from_user(hdr, umod, len) != 0) { |
1427 | err = -EFAULT; | 1427 | err = -EFAULT; |
1428 | goto free_hdr; | 1428 | goto free_hdr; |
1429 | } | 1429 | } |
1430 | 1430 | ||
1431 | /* Sanity checks against insmoding binaries or wrong arch, | 1431 | /* Sanity checks against insmoding binaries or wrong arch, |
1432 | weird elf version */ | 1432 | weird elf version */ |
1433 | if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 | 1433 | if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 |
1434 | || hdr->e_type != ET_REL | 1434 | || hdr->e_type != ET_REL |
1435 | || !elf_check_arch(hdr) | 1435 | || !elf_check_arch(hdr) |
1436 | || hdr->e_shentsize != sizeof(*sechdrs)) { | 1436 | || hdr->e_shentsize != sizeof(*sechdrs)) { |
1437 | err = -ENOEXEC; | 1437 | err = -ENOEXEC; |
1438 | goto free_hdr; | 1438 | goto free_hdr; |
1439 | } | 1439 | } |
1440 | 1440 | ||
1441 | if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) | 1441 | if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) |
1442 | goto truncated; | 1442 | goto truncated; |
1443 | 1443 | ||
1444 | /* Convenience variables */ | 1444 | /* Convenience variables */ |
1445 | sechdrs = (void *)hdr + hdr->e_shoff; | 1445 | sechdrs = (void *)hdr + hdr->e_shoff; |
1446 | secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 1446 | secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
1447 | sechdrs[0].sh_addr = 0; | 1447 | sechdrs[0].sh_addr = 0; |
1448 | 1448 | ||
1449 | for (i = 1; i < hdr->e_shnum; i++) { | 1449 | for (i = 1; i < hdr->e_shnum; i++) { |
1450 | if (sechdrs[i].sh_type != SHT_NOBITS | 1450 | if (sechdrs[i].sh_type != SHT_NOBITS |
1451 | && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) | 1451 | && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) |
1452 | goto truncated; | 1452 | goto truncated; |
1453 | 1453 | ||
1454 | /* Mark all sections sh_addr with their address in the | 1454 | /* Mark all sections sh_addr with their address in the |
1455 | temporary image. */ | 1455 | temporary image. */ |
1456 | sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; | 1456 | sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset; |
1457 | 1457 | ||
1458 | /* Internal symbols and strings. */ | 1458 | /* Internal symbols and strings. */ |
1459 | if (sechdrs[i].sh_type == SHT_SYMTAB) { | 1459 | if (sechdrs[i].sh_type == SHT_SYMTAB) { |
1460 | symindex = i; | 1460 | symindex = i; |
1461 | strindex = sechdrs[i].sh_link; | 1461 | strindex = sechdrs[i].sh_link; |
1462 | strtab = (char *)hdr + sechdrs[strindex].sh_offset; | 1462 | strtab = (char *)hdr + sechdrs[strindex].sh_offset; |
1463 | } | 1463 | } |
1464 | #ifndef CONFIG_MODULE_UNLOAD | 1464 | #ifndef CONFIG_MODULE_UNLOAD |
1465 | /* Don't load .exit sections */ | 1465 | /* Don't load .exit sections */ |
1466 | if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) | 1466 | if (strncmp(secstrings+sechdrs[i].sh_name, ".exit", 5) == 0) |
1467 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1467 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1468 | #endif | 1468 | #endif |
1469 | } | 1469 | } |
1470 | 1470 | ||
1471 | modindex = find_sec(hdr, sechdrs, secstrings, | 1471 | modindex = find_sec(hdr, sechdrs, secstrings, |
1472 | ".gnu.linkonce.this_module"); | 1472 | ".gnu.linkonce.this_module"); |
1473 | if (!modindex) { | 1473 | if (!modindex) { |
1474 | printk(KERN_WARNING "No module found in object\n"); | 1474 | printk(KERN_WARNING "No module found in object\n"); |
1475 | err = -ENOEXEC; | 1475 | err = -ENOEXEC; |
1476 | goto free_hdr; | 1476 | goto free_hdr; |
1477 | } | 1477 | } |
1478 | mod = (void *)sechdrs[modindex].sh_addr; | 1478 | mod = (void *)sechdrs[modindex].sh_addr; |
1479 | 1479 | ||
1480 | if (symindex == 0) { | 1480 | if (symindex == 0) { |
1481 | printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", | 1481 | printk(KERN_WARNING "%s: module has no symbols (stripped?)\n", |
1482 | mod->name); | 1482 | mod->name); |
1483 | err = -ENOEXEC; | 1483 | err = -ENOEXEC; |
1484 | goto free_hdr; | 1484 | goto free_hdr; |
1485 | } | 1485 | } |
1486 | 1486 | ||
1487 | /* Optional sections */ | 1487 | /* Optional sections */ |
1488 | exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); | 1488 | exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); |
1489 | gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); | 1489 | gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); |
1490 | crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); | 1490 | crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); |
1491 | gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); | 1491 | gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); |
1492 | setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); | 1492 | setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); |
1493 | exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); | 1493 | exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); |
1494 | obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); | 1494 | obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); |
1495 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1495 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
1496 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1496 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
1497 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1497 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
1498 | 1498 | ||
1499 | /* Don't keep modinfo section */ | 1499 | /* Don't keep modinfo section */ |
1500 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1500 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1501 | #ifdef CONFIG_KALLSYMS | 1501 | #ifdef CONFIG_KALLSYMS |
1502 | /* Keep symbol and string tables for decoding later. */ | 1502 | /* Keep symbol and string tables for decoding later. */ |
1503 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1503 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
1504 | sechdrs[strindex].sh_flags |= SHF_ALLOC; | 1504 | sechdrs[strindex].sh_flags |= SHF_ALLOC; |
1505 | #endif | 1505 | #endif |
1506 | 1506 | ||
1507 | /* Check module struct version now, before we try to use module. */ | 1507 | /* Check module struct version now, before we try to use module. */ |
1508 | if (!check_modstruct_version(sechdrs, versindex, mod)) { | 1508 | if (!check_modstruct_version(sechdrs, versindex, mod)) { |
1509 | err = -ENOEXEC; | 1509 | err = -ENOEXEC; |
1510 | goto free_hdr; | 1510 | goto free_hdr; |
1511 | } | 1511 | } |
1512 | 1512 | ||
1513 | modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); | 1513 | modmagic = get_modinfo(sechdrs, infoindex, "vermagic"); |
1514 | /* This is allowed: modprobe --force will invalidate it. */ | 1514 | /* This is allowed: modprobe --force will invalidate it. */ |
1515 | if (!modmagic) { | 1515 | if (!modmagic) { |
1516 | tainted |= TAINT_FORCED_MODULE; | 1516 | tainted |= TAINT_FORCED_MODULE; |
1517 | printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", | 1517 | printk(KERN_WARNING "%s: no version magic, tainting kernel.\n", |
1518 | mod->name); | 1518 | mod->name); |
1519 | } else if (!same_magic(modmagic, vermagic)) { | 1519 | } else if (!same_magic(modmagic, vermagic)) { |
1520 | printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", | 1520 | printk(KERN_ERR "%s: version magic '%s' should be '%s'\n", |
1521 | mod->name, modmagic, vermagic); | 1521 | mod->name, modmagic, vermagic); |
1522 | err = -ENOEXEC; | 1522 | err = -ENOEXEC; |
1523 | goto free_hdr; | 1523 | goto free_hdr; |
1524 | } | 1524 | } |
1525 | 1525 | ||
1526 | /* Now copy in args */ | 1526 | /* Now copy in args */ |
1527 | arglen = strlen_user(uargs); | 1527 | arglen = strlen_user(uargs); |
1528 | if (!arglen) { | 1528 | if (!arglen) { |
1529 | err = -EFAULT; | 1529 | err = -EFAULT; |
1530 | goto free_hdr; | 1530 | goto free_hdr; |
1531 | } | 1531 | } |
1532 | args = kmalloc(arglen, GFP_KERNEL); | 1532 | args = kmalloc(arglen, GFP_KERNEL); |
1533 | if (!args) { | 1533 | if (!args) { |
1534 | err = -ENOMEM; | 1534 | err = -ENOMEM; |
1535 | goto free_hdr; | 1535 | goto free_hdr; |
1536 | } | 1536 | } |
1537 | if (copy_from_user(args, uargs, arglen) != 0) { | 1537 | if (copy_from_user(args, uargs, arglen) != 0) { |
1538 | err = -EFAULT; | 1538 | err = -EFAULT; |
1539 | goto free_mod; | 1539 | goto free_mod; |
1540 | } | 1540 | } |
1541 | 1541 | ||
1542 | if (find_module(mod->name)) { | 1542 | if (find_module(mod->name)) { |
1543 | err = -EEXIST; | 1543 | err = -EEXIST; |
1544 | goto free_mod; | 1544 | goto free_mod; |
1545 | } | 1545 | } |
1546 | 1546 | ||
1547 | mod->state = MODULE_STATE_COMING; | 1547 | mod->state = MODULE_STATE_COMING; |
1548 | 1548 | ||
1549 | /* Allow arches to frob section contents and sizes. */ | 1549 | /* Allow arches to frob section contents and sizes. */ |
1550 | err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); | 1550 | err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod); |
1551 | if (err < 0) | 1551 | if (err < 0) |
1552 | goto free_mod; | 1552 | goto free_mod; |
1553 | 1553 | ||
1554 | if (pcpuindex) { | 1554 | if (pcpuindex) { |
1555 | /* We have a special allocation for this section. */ | 1555 | /* We have a special allocation for this section. */ |
1556 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, | 1556 | percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, |
1557 | sechdrs[pcpuindex].sh_addralign); | 1557 | sechdrs[pcpuindex].sh_addralign); |
1558 | if (!percpu) { | 1558 | if (!percpu) { |
1559 | err = -ENOMEM; | 1559 | err = -ENOMEM; |
1560 | goto free_mod; | 1560 | goto free_mod; |
1561 | } | 1561 | } |
1562 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1562 | sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1563 | mod->percpu = percpu; | 1563 | mod->percpu = percpu; |
1564 | } | 1564 | } |
1565 | 1565 | ||
1566 | /* Determine total sizes, and put offsets in sh_entsize. For now | 1566 | /* Determine total sizes, and put offsets in sh_entsize. For now |
1567 | this is done generically; there doesn't appear to be any | 1567 | this is done generically; there doesn't appear to be any |
1568 | special cases for the architectures. */ | 1568 | special cases for the architectures. */ |
1569 | layout_sections(mod, hdr, sechdrs, secstrings); | 1569 | layout_sections(mod, hdr, sechdrs, secstrings); |
1570 | 1570 | ||
1571 | /* Do the allocs. */ | 1571 | /* Do the allocs. */ |
1572 | ptr = module_alloc(mod->core_size); | 1572 | ptr = module_alloc(mod->core_size); |
1573 | if (!ptr) { | 1573 | if (!ptr) { |
1574 | err = -ENOMEM; | 1574 | err = -ENOMEM; |
1575 | goto free_percpu; | 1575 | goto free_percpu; |
1576 | } | 1576 | } |
1577 | memset(ptr, 0, mod->core_size); | 1577 | memset(ptr, 0, mod->core_size); |
1578 | mod->module_core = ptr; | 1578 | mod->module_core = ptr; |
1579 | 1579 | ||
1580 | ptr = module_alloc(mod->init_size); | 1580 | ptr = module_alloc(mod->init_size); |
1581 | if (!ptr && mod->init_size) { | 1581 | if (!ptr && mod->init_size) { |
1582 | err = -ENOMEM; | 1582 | err = -ENOMEM; |
1583 | goto free_core; | 1583 | goto free_core; |
1584 | } | 1584 | } |
1585 | memset(ptr, 0, mod->init_size); | 1585 | memset(ptr, 0, mod->init_size); |
1586 | mod->module_init = ptr; | 1586 | mod->module_init = ptr; |
1587 | 1587 | ||
1588 | /* Transfer each section which specifies SHF_ALLOC */ | 1588 | /* Transfer each section which specifies SHF_ALLOC */ |
1589 | DEBUGP("final section addresses:\n"); | 1589 | DEBUGP("final section addresses:\n"); |
1590 | for (i = 0; i < hdr->e_shnum; i++) { | 1590 | for (i = 0; i < hdr->e_shnum; i++) { |
1591 | void *dest; | 1591 | void *dest; |
1592 | 1592 | ||
1593 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | 1593 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) |
1594 | continue; | 1594 | continue; |
1595 | 1595 | ||
1596 | if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) | 1596 | if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK) |
1597 | dest = mod->module_init | 1597 | dest = mod->module_init |
1598 | + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); | 1598 | + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK); |
1599 | else | 1599 | else |
1600 | dest = mod->module_core + sechdrs[i].sh_entsize; | 1600 | dest = mod->module_core + sechdrs[i].sh_entsize; |
1601 | 1601 | ||
1602 | if (sechdrs[i].sh_type != SHT_NOBITS) | 1602 | if (sechdrs[i].sh_type != SHT_NOBITS) |
1603 | memcpy(dest, (void *)sechdrs[i].sh_addr, | 1603 | memcpy(dest, (void *)sechdrs[i].sh_addr, |
1604 | sechdrs[i].sh_size); | 1604 | sechdrs[i].sh_size); |
1605 | /* Update sh_addr to point to copy in image. */ | 1605 | /* Update sh_addr to point to copy in image. */ |
1606 | sechdrs[i].sh_addr = (unsigned long)dest; | 1606 | sechdrs[i].sh_addr = (unsigned long)dest; |
1607 | DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); | 1607 | DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name); |
1608 | } | 1608 | } |
1609 | /* Module has been moved. */ | 1609 | /* Module has been moved. */ |
1610 | mod = (void *)sechdrs[modindex].sh_addr; | 1610 | mod = (void *)sechdrs[modindex].sh_addr; |
1611 | 1611 | ||
1612 | /* Now we've moved module, initialize linked lists, etc. */ | 1612 | /* Now we've moved module, initialize linked lists, etc. */ |
1613 | module_unload_init(mod); | 1613 | module_unload_init(mod); |
1614 | 1614 | ||
1615 | /* Set up license info based on the info section */ | 1615 | /* Set up license info based on the info section */ |
1616 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); | 1616 | set_license(mod, get_modinfo(sechdrs, infoindex, "license")); |
1617 | 1617 | ||
1618 | /* Fix up syms, so that st_value is a pointer to location. */ | 1618 | /* Fix up syms, so that st_value is a pointer to location. */ |
1619 | err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, | 1619 | err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, |
1620 | mod); | 1620 | mod); |
1621 | if (err < 0) | 1621 | if (err < 0) |
1622 | goto cleanup; | 1622 | goto cleanup; |
1623 | 1623 | ||
1624 | /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ | 1624 | /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */ |
1625 | mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); | 1625 | mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms); |
1626 | mod->syms = (void *)sechdrs[exportindex].sh_addr; | 1626 | mod->syms = (void *)sechdrs[exportindex].sh_addr; |
1627 | if (crcindex) | 1627 | if (crcindex) |
1628 | mod->crcs = (void *)sechdrs[crcindex].sh_addr; | 1628 | mod->crcs = (void *)sechdrs[crcindex].sh_addr; |
1629 | mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); | 1629 | mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms); |
1630 | mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; | 1630 | mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; |
1631 | if (gplcrcindex) | 1631 | if (gplcrcindex) |
1632 | mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; | 1632 | mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; |
1633 | 1633 | ||
1634 | #ifdef CONFIG_MODVERSIONS | 1634 | #ifdef CONFIG_MODVERSIONS |
1635 | if ((mod->num_syms && !crcindex) || | 1635 | if ((mod->num_syms && !crcindex) || |
1636 | (mod->num_gpl_syms && !gplcrcindex)) { | 1636 | (mod->num_gpl_syms && !gplcrcindex)) { |
1637 | printk(KERN_WARNING "%s: No versions for exported symbols." | 1637 | printk(KERN_WARNING "%s: No versions for exported symbols." |
1638 | " Tainting kernel.\n", mod->name); | 1638 | " Tainting kernel.\n", mod->name); |
1639 | tainted |= TAINT_FORCED_MODULE; | 1639 | tainted |= TAINT_FORCED_MODULE; |
1640 | } | 1640 | } |
1641 | #endif | 1641 | #endif |
1642 | 1642 | ||
1643 | /* Now do relocations. */ | 1643 | /* Now do relocations. */ |
1644 | for (i = 1; i < hdr->e_shnum; i++) { | 1644 | for (i = 1; i < hdr->e_shnum; i++) { |
1645 | const char *strtab = (char *)sechdrs[strindex].sh_addr; | 1645 | const char *strtab = (char *)sechdrs[strindex].sh_addr; |
1646 | unsigned int info = sechdrs[i].sh_info; | 1646 | unsigned int info = sechdrs[i].sh_info; |
1647 | 1647 | ||
1648 | /* Not a valid relocation section? */ | 1648 | /* Not a valid relocation section? */ |
1649 | if (info >= hdr->e_shnum) | 1649 | if (info >= hdr->e_shnum) |
1650 | continue; | 1650 | continue; |
1651 | 1651 | ||
1652 | /* Don't bother with non-allocated sections */ | 1652 | /* Don't bother with non-allocated sections */ |
1653 | if (!(sechdrs[info].sh_flags & SHF_ALLOC)) | 1653 | if (!(sechdrs[info].sh_flags & SHF_ALLOC)) |
1654 | continue; | 1654 | continue; |
1655 | 1655 | ||
1656 | if (sechdrs[i].sh_type == SHT_REL) | 1656 | if (sechdrs[i].sh_type == SHT_REL) |
1657 | err = apply_relocate(sechdrs, strtab, symindex, i,mod); | 1657 | err = apply_relocate(sechdrs, strtab, symindex, i,mod); |
1658 | else if (sechdrs[i].sh_type == SHT_RELA) | 1658 | else if (sechdrs[i].sh_type == SHT_RELA) |
1659 | err = apply_relocate_add(sechdrs, strtab, symindex, i, | 1659 | err = apply_relocate_add(sechdrs, strtab, symindex, i, |
1660 | mod); | 1660 | mod); |
1661 | if (err < 0) | 1661 | if (err < 0) |
1662 | goto cleanup; | 1662 | goto cleanup; |
1663 | } | 1663 | } |
1664 | 1664 | ||
1665 | /* Set up and sort exception table */ | 1665 | /* Set up and sort exception table */ |
1666 | mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); | 1666 | mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable); |
1667 | mod->extable = extable = (void *)sechdrs[exindex].sh_addr; | 1667 | mod->extable = extable = (void *)sechdrs[exindex].sh_addr; |
1668 | sort_extable(extable, extable + mod->num_exentries); | 1668 | sort_extable(extable, extable + mod->num_exentries); |
1669 | 1669 | ||
1670 | /* Finally, copy percpu area over. */ | 1670 | /* Finally, copy percpu area over. */ |
1671 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, | 1671 | percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, |
1672 | sechdrs[pcpuindex].sh_size); | 1672 | sechdrs[pcpuindex].sh_size); |
1673 | 1673 | ||
1674 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); | 1674 | add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); |
1675 | 1675 | ||
1676 | err = module_finalize(hdr, sechdrs, mod); | 1676 | err = module_finalize(hdr, sechdrs, mod); |
1677 | if (err < 0) | 1677 | if (err < 0) |
1678 | goto cleanup; | 1678 | goto cleanup; |
1679 | 1679 | ||
1680 | mod->args = args; | 1680 | mod->args = args; |
1681 | if (obsparmindex) { | 1681 | if (obsparmindex) { |
1682 | err = obsolete_params(mod->name, mod->args, | 1682 | err = obsolete_params(mod->name, mod->args, |
1683 | (struct obsolete_modparm *) | 1683 | (struct obsolete_modparm *) |
1684 | sechdrs[obsparmindex].sh_addr, | 1684 | sechdrs[obsparmindex].sh_addr, |
1685 | sechdrs[obsparmindex].sh_size | 1685 | sechdrs[obsparmindex].sh_size |
1686 | / sizeof(struct obsolete_modparm), | 1686 | / sizeof(struct obsolete_modparm), |
1687 | sechdrs, symindex, | 1687 | sechdrs, symindex, |
1688 | (char *)sechdrs[strindex].sh_addr); | 1688 | (char *)sechdrs[strindex].sh_addr); |
1689 | if (setupindex) | 1689 | if (setupindex) |
1690 | printk(KERN_WARNING "%s: Ignoring new-style " | 1690 | printk(KERN_WARNING "%s: Ignoring new-style " |
1691 | "parameters in presence of obsolete ones\n", | 1691 | "parameters in presence of obsolete ones\n", |
1692 | mod->name); | 1692 | mod->name); |
1693 | } else { | 1693 | } else { |
1694 | /* Size of section 0 is 0, so this works well if no params */ | 1694 | /* Size of section 0 is 0, so this works well if no params */ |
1695 | err = parse_args(mod->name, mod->args, | 1695 | err = parse_args(mod->name, mod->args, |
1696 | (struct kernel_param *) | 1696 | (struct kernel_param *) |
1697 | sechdrs[setupindex].sh_addr, | 1697 | sechdrs[setupindex].sh_addr, |
1698 | sechdrs[setupindex].sh_size | 1698 | sechdrs[setupindex].sh_size |
1699 | / sizeof(struct kernel_param), | 1699 | / sizeof(struct kernel_param), |
1700 | NULL); | 1700 | NULL); |
1701 | } | 1701 | } |
1702 | if (err < 0) | 1702 | if (err < 0) |
1703 | goto arch_cleanup; | 1703 | goto arch_cleanup; |
1704 | 1704 | ||
1705 | err = mod_sysfs_setup(mod, | 1705 | err = mod_sysfs_setup(mod, |
1706 | (struct kernel_param *) | 1706 | (struct kernel_param *) |
1707 | sechdrs[setupindex].sh_addr, | 1707 | sechdrs[setupindex].sh_addr, |
1708 | sechdrs[setupindex].sh_size | 1708 | sechdrs[setupindex].sh_size |
1709 | / sizeof(struct kernel_param)); | 1709 | / sizeof(struct kernel_param)); |
1710 | if (err < 0) | 1710 | if (err < 0) |
1711 | goto arch_cleanup; | 1711 | goto arch_cleanup; |
1712 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 1712 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
1713 | 1713 | ||
1714 | /* Get rid of temporary copy */ | 1714 | /* Get rid of temporary copy */ |
1715 | vfree(hdr); | 1715 | vfree(hdr); |
1716 | 1716 | ||
1717 | /* Done! */ | 1717 | /* Done! */ |
1718 | return mod; | 1718 | return mod; |
1719 | 1719 | ||
1720 | arch_cleanup: | 1720 | arch_cleanup: |
1721 | module_arch_cleanup(mod); | 1721 | module_arch_cleanup(mod); |
1722 | cleanup: | 1722 | cleanup: |
1723 | module_unload_free(mod); | 1723 | module_unload_free(mod); |
1724 | module_free(mod, mod->module_init); | 1724 | module_free(mod, mod->module_init); |
1725 | free_core: | 1725 | free_core: |
1726 | module_free(mod, mod->module_core); | 1726 | module_free(mod, mod->module_core); |
1727 | free_percpu: | 1727 | free_percpu: |
1728 | if (percpu) | 1728 | if (percpu) |
1729 | percpu_modfree(percpu); | 1729 | percpu_modfree(percpu); |
1730 | free_mod: | 1730 | free_mod: |
1731 | kfree(args); | 1731 | kfree(args); |
1732 | free_hdr: | 1732 | free_hdr: |
1733 | vfree(hdr); | 1733 | vfree(hdr); |
1734 | if (err < 0) return ERR_PTR(err); | 1734 | if (err < 0) return ERR_PTR(err); |
1735 | else return ptr; | 1735 | else return ptr; |
1736 | 1736 | ||
1737 | truncated: | 1737 | truncated: |
1738 | printk(KERN_ERR "Module len %lu truncated\n", len); | 1738 | printk(KERN_ERR "Module len %lu truncated\n", len); |
1739 | err = -ENOEXEC; | 1739 | err = -ENOEXEC; |
1740 | goto free_hdr; | 1740 | goto free_hdr; |
1741 | } | 1741 | } |
1742 | 1742 | ||
1743 | /* | 1743 | /* |
1744 | * link the module with the whole machine is stopped with interrupts off | 1744 | * link the module with the whole machine is stopped with interrupts off |
1745 | * - this defends against kallsyms not taking locks | 1745 | * - this defends against kallsyms not taking locks |
1746 | */ | 1746 | */ |
1747 | static int __link_module(void *_mod) | 1747 | static int __link_module(void *_mod) |
1748 | { | 1748 | { |
1749 | struct module *mod = _mod; | 1749 | struct module *mod = _mod; |
1750 | list_add(&mod->list, &modules); | 1750 | list_add(&mod->list, &modules); |
1751 | return 0; | 1751 | return 0; |
1752 | } | 1752 | } |
1753 | 1753 | ||
1754 | /* This is where the real work happens */ | 1754 | /* This is where the real work happens */ |
1755 | asmlinkage long | 1755 | asmlinkage long |
1756 | sys_init_module(void __user *umod, | 1756 | sys_init_module(void __user *umod, |
1757 | unsigned long len, | 1757 | unsigned long len, |
1758 | const char __user *uargs) | 1758 | const char __user *uargs) |
1759 | { | 1759 | { |
1760 | struct module *mod; | 1760 | struct module *mod; |
1761 | mm_segment_t old_fs = get_fs(); | 1761 | mm_segment_t old_fs = get_fs(); |
1762 | int ret = 0; | 1762 | int ret = 0; |
1763 | 1763 | ||
1764 | /* Must have permission */ | 1764 | /* Must have permission */ |
1765 | if (!capable(CAP_SYS_MODULE)) | 1765 | if (!capable(CAP_SYS_MODULE)) |
1766 | return -EPERM; | 1766 | return -EPERM; |
1767 | 1767 | ||
1768 | /* Only one module load at a time, please */ | 1768 | /* Only one module load at a time, please */ |
1769 | if (down_interruptible(&module_mutex) != 0) | 1769 | if (down_interruptible(&module_mutex) != 0) |
1770 | return -EINTR; | 1770 | return -EINTR; |
1771 | 1771 | ||
1772 | /* Do all the hard work */ | 1772 | /* Do all the hard work */ |
1773 | mod = load_module(umod, len, uargs); | 1773 | mod = load_module(umod, len, uargs); |
1774 | if (IS_ERR(mod)) { | 1774 | if (IS_ERR(mod)) { |
1775 | up(&module_mutex); | 1775 | up(&module_mutex); |
1776 | return PTR_ERR(mod); | 1776 | return PTR_ERR(mod); |
1777 | } | 1777 | } |
1778 | 1778 | ||
1779 | /* flush the icache in correct context */ | 1779 | /* flush the icache in correct context */ |
1780 | set_fs(KERNEL_DS); | 1780 | set_fs(KERNEL_DS); |
1781 | 1781 | ||
1782 | /* Flush the instruction cache, since we've played with text */ | 1782 | /* Flush the instruction cache, since we've played with text */ |
1783 | if (mod->module_init) | 1783 | if (mod->module_init) |
1784 | flush_icache_range((unsigned long)mod->module_init, | 1784 | flush_icache_range((unsigned long)mod->module_init, |
1785 | (unsigned long)mod->module_init | 1785 | (unsigned long)mod->module_init |
1786 | + mod->init_size); | 1786 | + mod->init_size); |
1787 | flush_icache_range((unsigned long)mod->module_core, | 1787 | flush_icache_range((unsigned long)mod->module_core, |
1788 | (unsigned long)mod->module_core + mod->core_size); | 1788 | (unsigned long)mod->module_core + mod->core_size); |
1789 | 1789 | ||
1790 | set_fs(old_fs); | 1790 | set_fs(old_fs); |
1791 | 1791 | ||
1792 | /* Now sew it into the lists. They won't access us, since | 1792 | /* Now sew it into the lists. They won't access us, since |
1793 | strong_try_module_get() will fail. */ | 1793 | strong_try_module_get() will fail. */ |
1794 | stop_machine_run(__link_module, mod, NR_CPUS); | 1794 | stop_machine_run(__link_module, mod, NR_CPUS); |
1795 | 1795 | ||
1796 | /* Drop lock so they can recurse */ | 1796 | /* Drop lock so they can recurse */ |
1797 | up(&module_mutex); | 1797 | up(&module_mutex); |
1798 | 1798 | ||
1799 | down(¬ify_mutex); | 1799 | down(¬ify_mutex); |
1800 | notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); | 1800 | notifier_call_chain(&module_notify_list, MODULE_STATE_COMING, mod); |
1801 | up(¬ify_mutex); | 1801 | up(¬ify_mutex); |
1802 | 1802 | ||
1803 | /* Start the module */ | 1803 | /* Start the module */ |
1804 | if (mod->init != NULL) | 1804 | if (mod->init != NULL) |
1805 | ret = mod->init(); | 1805 | ret = mod->init(); |
1806 | if (ret < 0) { | 1806 | if (ret < 0) { |
1807 | /* Init routine failed: abort. Try to protect us from | 1807 | /* Init routine failed: abort. Try to protect us from |
1808 | buggy refcounters. */ | 1808 | buggy refcounters. */ |
1809 | mod->state = MODULE_STATE_GOING; | 1809 | mod->state = MODULE_STATE_GOING; |
1810 | synchronize_sched(); | 1810 | synchronize_sched(); |
1811 | if (mod->unsafe) | 1811 | if (mod->unsafe) |
1812 | printk(KERN_ERR "%s: module is now stuck!\n", | 1812 | printk(KERN_ERR "%s: module is now stuck!\n", |
1813 | mod->name); | 1813 | mod->name); |
1814 | else { | 1814 | else { |
1815 | module_put(mod); | 1815 | module_put(mod); |
1816 | down(&module_mutex); | 1816 | down(&module_mutex); |
1817 | free_module(mod); | 1817 | free_module(mod); |
1818 | up(&module_mutex); | 1818 | up(&module_mutex); |
1819 | } | 1819 | } |
1820 | return ret; | 1820 | return ret; |
1821 | } | 1821 | } |
1822 | 1822 | ||
1823 | /* Now it's a first class citizen! */ | 1823 | /* Now it's a first class citizen! */ |
1824 | down(&module_mutex); | 1824 | down(&module_mutex); |
1825 | mod->state = MODULE_STATE_LIVE; | 1825 | mod->state = MODULE_STATE_LIVE; |
1826 | /* Drop initial reference. */ | 1826 | /* Drop initial reference. */ |
1827 | module_put(mod); | 1827 | module_put(mod); |
1828 | module_free(mod, mod->module_init); | 1828 | module_free(mod, mod->module_init); |
1829 | mod->module_init = NULL; | 1829 | mod->module_init = NULL; |
1830 | mod->init_size = 0; | 1830 | mod->init_size = 0; |
1831 | mod->init_text_size = 0; | 1831 | mod->init_text_size = 0; |
1832 | up(&module_mutex); | 1832 | up(&module_mutex); |
1833 | 1833 | ||
1834 | return 0; | 1834 | return 0; |
1835 | } | 1835 | } |
1836 | 1836 | ||
1837 | static inline int within(unsigned long addr, void *start, unsigned long size) | 1837 | static inline int within(unsigned long addr, void *start, unsigned long size) |
1838 | { | 1838 | { |
1839 | return ((void *)addr >= start && (void *)addr < start + size); | 1839 | return ((void *)addr >= start && (void *)addr < start + size); |
1840 | } | 1840 | } |
1841 | 1841 | ||
1842 | #ifdef CONFIG_KALLSYMS | 1842 | #ifdef CONFIG_KALLSYMS |
1843 | /* | 1843 | /* |
1844 | * This ignores the intensely annoying "mapping symbols" found | 1844 | * This ignores the intensely annoying "mapping symbols" found |
1845 | * in ARM ELF files: $a, $t and $d. | 1845 | * in ARM ELF files: $a, $t and $d. |
1846 | */ | 1846 | */ |
1847 | static inline int is_arm_mapping_symbol(const char *str) | 1847 | static inline int is_arm_mapping_symbol(const char *str) |
1848 | { | 1848 | { |
1849 | return str[0] == '$' && strchr("atd", str[1]) | 1849 | return str[0] == '$' && strchr("atd", str[1]) |
1850 | && (str[2] == '\0' || str[2] == '.'); | 1850 | && (str[2] == '\0' || str[2] == '.'); |
1851 | } | 1851 | } |
1852 | 1852 | ||
1853 | static const char *get_ksymbol(struct module *mod, | 1853 | static const char *get_ksymbol(struct module *mod, |
1854 | unsigned long addr, | 1854 | unsigned long addr, |
1855 | unsigned long *size, | 1855 | unsigned long *size, |
1856 | unsigned long *offset) | 1856 | unsigned long *offset) |
1857 | { | 1857 | { |
1858 | unsigned int i, best = 0; | 1858 | unsigned int i, best = 0; |
1859 | unsigned long nextval; | 1859 | unsigned long nextval; |
1860 | 1860 | ||
1861 | /* At worse, next value is at end of module */ | 1861 | /* At worse, next value is at end of module */ |
1862 | if (within(addr, mod->module_init, mod->init_size)) | 1862 | if (within(addr, mod->module_init, mod->init_size)) |
1863 | nextval = (unsigned long)mod->module_init+mod->init_text_size; | 1863 | nextval = (unsigned long)mod->module_init+mod->init_text_size; |
1864 | else | 1864 | else |
1865 | nextval = (unsigned long)mod->module_core+mod->core_text_size; | 1865 | nextval = (unsigned long)mod->module_core+mod->core_text_size; |
1866 | 1866 | ||
1867 | /* Scan for closest preceeding symbol, and next symbol. (ELF | 1867 | /* Scan for closest preceeding symbol, and next symbol. (ELF |
1868 | starts real symbols at 1). */ | 1868 | starts real symbols at 1). */ |
1869 | for (i = 1; i < mod->num_symtab; i++) { | 1869 | for (i = 1; i < mod->num_symtab; i++) { |
1870 | if (mod->symtab[i].st_shndx == SHN_UNDEF) | 1870 | if (mod->symtab[i].st_shndx == SHN_UNDEF) |
1871 | continue; | 1871 | continue; |
1872 | 1872 | ||
1873 | /* We ignore unnamed symbols: they're uninformative | 1873 | /* We ignore unnamed symbols: they're uninformative |
1874 | * and inserted at a whim. */ | 1874 | * and inserted at a whim. */ |
1875 | if (mod->symtab[i].st_value <= addr | 1875 | if (mod->symtab[i].st_value <= addr |
1876 | && mod->symtab[i].st_value > mod->symtab[best].st_value | 1876 | && mod->symtab[i].st_value > mod->symtab[best].st_value |
1877 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' | 1877 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' |
1878 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) | 1878 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) |
1879 | best = i; | 1879 | best = i; |
1880 | if (mod->symtab[i].st_value > addr | 1880 | if (mod->symtab[i].st_value > addr |
1881 | && mod->symtab[i].st_value < nextval | 1881 | && mod->symtab[i].st_value < nextval |
1882 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' | 1882 | && *(mod->strtab + mod->symtab[i].st_name) != '\0' |
1883 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) | 1883 | && !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name)) |
1884 | nextval = mod->symtab[i].st_value; | 1884 | nextval = mod->symtab[i].st_value; |
1885 | } | 1885 | } |
1886 | 1886 | ||
1887 | if (!best) | 1887 | if (!best) |
1888 | return NULL; | 1888 | return NULL; |
1889 | 1889 | ||
1890 | *size = nextval - mod->symtab[best].st_value; | 1890 | *size = nextval - mod->symtab[best].st_value; |
1891 | *offset = addr - mod->symtab[best].st_value; | 1891 | *offset = addr - mod->symtab[best].st_value; |
1892 | return mod->strtab + mod->symtab[best].st_name; | 1892 | return mod->strtab + mod->symtab[best].st_name; |
1893 | } | 1893 | } |
1894 | 1894 | ||
1895 | /* For kallsyms to ask for address resolution. NULL means not found. | 1895 | /* For kallsyms to ask for address resolution. NULL means not found. |
1896 | We don't lock, as this is used for oops resolution and races are a | 1896 | We don't lock, as this is used for oops resolution and races are a |
1897 | lesser concern. */ | 1897 | lesser concern. */ |
1898 | const char *module_address_lookup(unsigned long addr, | 1898 | const char *module_address_lookup(unsigned long addr, |
1899 | unsigned long *size, | 1899 | unsigned long *size, |
1900 | unsigned long *offset, | 1900 | unsigned long *offset, |
1901 | char **modname) | 1901 | char **modname) |
1902 | { | 1902 | { |
1903 | struct module *mod; | 1903 | struct module *mod; |
1904 | 1904 | ||
1905 | list_for_each_entry(mod, &modules, list) { | 1905 | list_for_each_entry(mod, &modules, list) { |
1906 | if (within(addr, mod->module_init, mod->init_size) | 1906 | if (within(addr, mod->module_init, mod->init_size) |
1907 | || within(addr, mod->module_core, mod->core_size)) { | 1907 | || within(addr, mod->module_core, mod->core_size)) { |
1908 | *modname = mod->name; | 1908 | *modname = mod->name; |
1909 | return get_ksymbol(mod, addr, size, offset); | 1909 | return get_ksymbol(mod, addr, size, offset); |
1910 | } | 1910 | } |
1911 | } | 1911 | } |
1912 | return NULL; | 1912 | return NULL; |
1913 | } | 1913 | } |
1914 | 1914 | ||
1915 | struct module *module_get_kallsym(unsigned int symnum, | 1915 | struct module *module_get_kallsym(unsigned int symnum, |
1916 | unsigned long *value, | 1916 | unsigned long *value, |
1917 | char *type, | 1917 | char *type, |
1918 | char namebuf[128]) | 1918 | char namebuf[128]) |
1919 | { | 1919 | { |
1920 | struct module *mod; | 1920 | struct module *mod; |
1921 | 1921 | ||
1922 | down(&module_mutex); | 1922 | down(&module_mutex); |
1923 | list_for_each_entry(mod, &modules, list) { | 1923 | list_for_each_entry(mod, &modules, list) { |
1924 | if (symnum < mod->num_symtab) { | 1924 | if (symnum < mod->num_symtab) { |
1925 | *value = mod->symtab[symnum].st_value; | 1925 | *value = mod->symtab[symnum].st_value; |
1926 | *type = mod->symtab[symnum].st_info; | 1926 | *type = mod->symtab[symnum].st_info; |
1927 | strncpy(namebuf, | 1927 | strncpy(namebuf, |
1928 | mod->strtab + mod->symtab[symnum].st_name, | 1928 | mod->strtab + mod->symtab[symnum].st_name, |
1929 | 127); | 1929 | 127); |
1930 | up(&module_mutex); | 1930 | up(&module_mutex); |
1931 | return mod; | 1931 | return mod; |
1932 | } | 1932 | } |
1933 | symnum -= mod->num_symtab; | 1933 | symnum -= mod->num_symtab; |
1934 | } | 1934 | } |
1935 | up(&module_mutex); | 1935 | up(&module_mutex); |
1936 | return NULL; | 1936 | return NULL; |
1937 | } | 1937 | } |
1938 | 1938 | ||
1939 | static unsigned long mod_find_symname(struct module *mod, const char *name) | 1939 | static unsigned long mod_find_symname(struct module *mod, const char *name) |
1940 | { | 1940 | { |
1941 | unsigned int i; | 1941 | unsigned int i; |
1942 | 1942 | ||
1943 | for (i = 0; i < mod->num_symtab; i++) | 1943 | for (i = 0; i < mod->num_symtab; i++) |
1944 | if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0) | 1944 | if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0) |
1945 | return mod->symtab[i].st_value; | 1945 | return mod->symtab[i].st_value; |
1946 | return 0; | 1946 | return 0; |
1947 | } | 1947 | } |
1948 | 1948 | ||
1949 | /* Look for this name: can be of form module:name. */ | 1949 | /* Look for this name: can be of form module:name. */ |
1950 | unsigned long module_kallsyms_lookup_name(const char *name) | 1950 | unsigned long module_kallsyms_lookup_name(const char *name) |
1951 | { | 1951 | { |
1952 | struct module *mod; | 1952 | struct module *mod; |
1953 | char *colon; | 1953 | char *colon; |
1954 | unsigned long ret = 0; | 1954 | unsigned long ret = 0; |
1955 | 1955 | ||
1956 | /* Don't lock: we're in enough trouble already. */ | 1956 | /* Don't lock: we're in enough trouble already. */ |
1957 | if ((colon = strchr(name, ':')) != NULL) { | 1957 | if ((colon = strchr(name, ':')) != NULL) { |
1958 | *colon = '\0'; | 1958 | *colon = '\0'; |
1959 | if ((mod = find_module(name)) != NULL) | 1959 | if ((mod = find_module(name)) != NULL) |
1960 | ret = mod_find_symname(mod, colon+1); | 1960 | ret = mod_find_symname(mod, colon+1); |
1961 | *colon = ':'; | 1961 | *colon = ':'; |
1962 | } else { | 1962 | } else { |
1963 | list_for_each_entry(mod, &modules, list) | 1963 | list_for_each_entry(mod, &modules, list) |
1964 | if ((ret = mod_find_symname(mod, name)) != 0) | 1964 | if ((ret = mod_find_symname(mod, name)) != 0) |
1965 | break; | 1965 | break; |
1966 | } | 1966 | } |
1967 | return ret; | 1967 | return ret; |
1968 | } | 1968 | } |
1969 | #endif /* CONFIG_KALLSYMS */ | 1969 | #endif /* CONFIG_KALLSYMS */ |
1970 | 1970 | ||
1971 | /* Called by the /proc file system to return a list of modules. */ | 1971 | /* Called by the /proc file system to return a list of modules. */ |
1972 | static void *m_start(struct seq_file *m, loff_t *pos) | 1972 | static void *m_start(struct seq_file *m, loff_t *pos) |
1973 | { | 1973 | { |
1974 | struct list_head *i; | 1974 | struct list_head *i; |
1975 | loff_t n = 0; | 1975 | loff_t n = 0; |
1976 | 1976 | ||
1977 | down(&module_mutex); | 1977 | down(&module_mutex); |
1978 | list_for_each(i, &modules) { | 1978 | list_for_each(i, &modules) { |
1979 | if (n++ == *pos) | 1979 | if (n++ == *pos) |
1980 | break; | 1980 | break; |
1981 | } | 1981 | } |
1982 | if (i == &modules) | 1982 | if (i == &modules) |
1983 | return NULL; | 1983 | return NULL; |
1984 | return i; | 1984 | return i; |
1985 | } | 1985 | } |
1986 | 1986 | ||
1987 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) | 1987 | static void *m_next(struct seq_file *m, void *p, loff_t *pos) |
1988 | { | 1988 | { |
1989 | struct list_head *i = p; | 1989 | struct list_head *i = p; |
1990 | (*pos)++; | 1990 | (*pos)++; |
1991 | if (i->next == &modules) | 1991 | if (i->next == &modules) |
1992 | return NULL; | 1992 | return NULL; |
1993 | return i->next; | 1993 | return i->next; |
1994 | } | 1994 | } |
1995 | 1995 | ||
1996 | static void m_stop(struct seq_file *m, void *p) | 1996 | static void m_stop(struct seq_file *m, void *p) |
1997 | { | 1997 | { |
1998 | up(&module_mutex); | 1998 | up(&module_mutex); |
1999 | } | 1999 | } |
2000 | 2000 | ||
2001 | static int m_show(struct seq_file *m, void *p) | 2001 | static int m_show(struct seq_file *m, void *p) |
2002 | { | 2002 | { |
2003 | struct module *mod = list_entry(p, struct module, list); | 2003 | struct module *mod = list_entry(p, struct module, list); |
2004 | seq_printf(m, "%s %lu", | 2004 | seq_printf(m, "%s %lu", |
2005 | mod->name, mod->init_size + mod->core_size); | 2005 | mod->name, mod->init_size + mod->core_size); |
2006 | print_unload_info(m, mod); | 2006 | print_unload_info(m, mod); |
2007 | 2007 | ||
2008 | /* Informative for users. */ | 2008 | /* Informative for users. */ |
2009 | seq_printf(m, " %s", | 2009 | seq_printf(m, " %s", |
2010 | mod->state == MODULE_STATE_GOING ? "Unloading": | 2010 | mod->state == MODULE_STATE_GOING ? "Unloading": |
2011 | mod->state == MODULE_STATE_COMING ? "Loading": | 2011 | mod->state == MODULE_STATE_COMING ? "Loading": |
2012 | "Live"); | 2012 | "Live"); |
2013 | /* Used by oprofile and other similar tools. */ | 2013 | /* Used by oprofile and other similar tools. */ |
2014 | seq_printf(m, " 0x%p", mod->module_core); | 2014 | seq_printf(m, " 0x%p", mod->module_core); |
2015 | 2015 | ||
2016 | seq_printf(m, "\n"); | 2016 | seq_printf(m, "\n"); |
2017 | return 0; | 2017 | return 0; |
2018 | } | 2018 | } |
2019 | 2019 | ||
2020 | /* Format: modulename size refcount deps address | 2020 | /* Format: modulename size refcount deps address |
2021 | 2021 | ||
2022 | Where refcount is a number or -, and deps is a comma-separated list | 2022 | Where refcount is a number or -, and deps is a comma-separated list |
2023 | of depends or -. | 2023 | of depends or -. |
2024 | */ | 2024 | */ |
2025 | struct seq_operations modules_op = { | 2025 | struct seq_operations modules_op = { |
2026 | .start = m_start, | 2026 | .start = m_start, |
2027 | .next = m_next, | 2027 | .next = m_next, |
2028 | .stop = m_stop, | 2028 | .stop = m_stop, |
2029 | .show = m_show | 2029 | .show = m_show |
2030 | }; | 2030 | }; |
2031 | 2031 | ||
2032 | /* Given an address, look for it in the module exception tables. */ | 2032 | /* Given an address, look for it in the module exception tables. */ |
2033 | const struct exception_table_entry *search_module_extables(unsigned long addr) | 2033 | const struct exception_table_entry *search_module_extables(unsigned long addr) |
2034 | { | 2034 | { |
2035 | unsigned long flags; | 2035 | unsigned long flags; |
2036 | const struct exception_table_entry *e = NULL; | 2036 | const struct exception_table_entry *e = NULL; |
2037 | struct module *mod; | 2037 | struct module *mod; |
2038 | 2038 | ||
2039 | spin_lock_irqsave(&modlist_lock, flags); | 2039 | spin_lock_irqsave(&modlist_lock, flags); |
2040 | list_for_each_entry(mod, &modules, list) { | 2040 | list_for_each_entry(mod, &modules, list) { |
2041 | if (mod->num_exentries == 0) | 2041 | if (mod->num_exentries == 0) |
2042 | continue; | 2042 | continue; |
2043 | 2043 | ||
2044 | e = search_extable(mod->extable, | 2044 | e = search_extable(mod->extable, |
2045 | mod->extable + mod->num_exentries - 1, | 2045 | mod->extable + mod->num_exentries - 1, |
2046 | addr); | 2046 | addr); |
2047 | if (e) | 2047 | if (e) |
2048 | break; | 2048 | break; |
2049 | } | 2049 | } |
2050 | spin_unlock_irqrestore(&modlist_lock, flags); | 2050 | spin_unlock_irqrestore(&modlist_lock, flags); |
2051 | 2051 | ||
2052 | /* Now, if we found one, we are running inside it now, hence | 2052 | /* Now, if we found one, we are running inside it now, hence |
2053 | we cannot unload the module, hence no refcnt needed. */ | 2053 | we cannot unload the module, hence no refcnt needed. */ |
2054 | return e; | 2054 | return e; |
2055 | } | 2055 | } |
2056 | 2056 | ||
2057 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ | 2057 | /* Is this a valid kernel address? We don't grab the lock: we are oopsing. */ |
2058 | struct module *__module_text_address(unsigned long addr) | 2058 | struct module *__module_text_address(unsigned long addr) |
2059 | { | 2059 | { |
2060 | struct module *mod; | 2060 | struct module *mod; |
2061 | 2061 | ||
2062 | list_for_each_entry(mod, &modules, list) | 2062 | list_for_each_entry(mod, &modules, list) |
2063 | if (within(addr, mod->module_init, mod->init_text_size) | 2063 | if (within(addr, mod->module_init, mod->init_text_size) |
2064 | || within(addr, mod->module_core, mod->core_text_size)) | 2064 | || within(addr, mod->module_core, mod->core_text_size)) |
2065 | return mod; | 2065 | return mod; |
2066 | return NULL; | 2066 | return NULL; |
2067 | } | 2067 | } |
2068 | 2068 | ||
2069 | struct module *module_text_address(unsigned long addr) | 2069 | struct module *module_text_address(unsigned long addr) |
2070 | { | 2070 | { |
2071 | struct module *mod; | 2071 | struct module *mod; |
2072 | unsigned long flags; | 2072 | unsigned long flags; |
2073 | 2073 | ||
2074 | spin_lock_irqsave(&modlist_lock, flags); | 2074 | spin_lock_irqsave(&modlist_lock, flags); |
2075 | mod = __module_text_address(addr); | 2075 | mod = __module_text_address(addr); |
2076 | spin_unlock_irqrestore(&modlist_lock, flags); | 2076 | spin_unlock_irqrestore(&modlist_lock, flags); |
2077 | 2077 | ||
2078 | return mod; | 2078 | return mod; |
2079 | } | 2079 | } |
2080 | 2080 | ||
2081 | /* Don't grab lock, we're oopsing. */ | 2081 | /* Don't grab lock, we're oopsing. */ |
2082 | void print_modules(void) | 2082 | void print_modules(void) |
2083 | { | 2083 | { |
2084 | struct module *mod; | 2084 | struct module *mod; |
2085 | 2085 | ||
2086 | printk("Modules linked in:"); | 2086 | printk("Modules linked in:"); |
2087 | list_for_each_entry(mod, &modules, list) | 2087 | list_for_each_entry(mod, &modules, list) |
2088 | printk(" %s", mod->name); | 2088 | printk(" %s", mod->name); |
2089 | printk("\n"); | 2089 | printk("\n"); |
2090 | } | 2090 | } |
2091 | 2091 | ||
2092 | void module_add_driver(struct module *mod, struct device_driver *drv) | 2092 | void module_add_driver(struct module *mod, struct device_driver *drv) |
2093 | { | 2093 | { |
2094 | if (!mod || !drv) | 2094 | if (!mod || !drv) |
2095 | return; | 2095 | return; |
2096 | 2096 | ||
2097 | /* Don't check return code; this call is idempotent */ | 2097 | /* Don't check return code; this call is idempotent */ |
2098 | sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module"); | 2098 | sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module"); |
2099 | } | 2099 | } |
2100 | EXPORT_SYMBOL(module_add_driver); | 2100 | EXPORT_SYMBOL(module_add_driver); |
2101 | 2101 | ||
2102 | void module_remove_driver(struct device_driver *drv) | 2102 | void module_remove_driver(struct device_driver *drv) |
2103 | { | 2103 | { |
2104 | if (!drv) | 2104 | if (!drv) |
2105 | return; | 2105 | return; |
2106 | sysfs_remove_link(&drv->kobj, "module"); | 2106 | sysfs_remove_link(&drv->kobj, "module"); |
2107 | } | 2107 | } |
2108 | EXPORT_SYMBOL(module_remove_driver); | 2108 | EXPORT_SYMBOL(module_remove_driver); |
2109 | 2109 | ||
2110 | #ifdef CONFIG_MODVERSIONS | 2110 | #ifdef CONFIG_MODVERSIONS |
2111 | /* Generate the signature for struct module here, too, for modversions. */ | 2111 | /* Generate the signature for struct module here, too, for modversions. */ |
2112 | void struct_module(struct module *mod) { return; } | 2112 | void struct_module(struct module *mod) { return; } |
2113 | EXPORT_SYMBOL(struct_module); | 2113 | EXPORT_SYMBOL(struct_module); |
2114 | #endif | 2114 | #endif |
2115 | 2115 |
kernel/power/smp.c
1 | /* | 1 | /* |
2 | * drivers/power/smp.c - Functions for stopping other CPUs. | 2 | * drivers/power/smp.c - Functions for stopping other CPUs. |
3 | * | 3 | * |
4 | * Copyright 2004 Pavel Machek <pavel@suse.cz> | 4 | * Copyright 2004 Pavel Machek <pavel@suse.cz> |
5 | * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> | 5 | * Copyright (C) 2002-2003 Nigel Cunningham <ncunningham@clear.net.nz> |
6 | * | 6 | * |
7 | * This file is released under the GPLv2. | 7 | * This file is released under the GPLv2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #undef DEBUG | 10 | #undef DEBUG |
11 | 11 | ||
12 | #include <linux/smp_lock.h> | 12 | #include <linux/smp_lock.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/suspend.h> | 14 | #include <linux/suspend.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <asm/atomic.h> | 16 | #include <asm/atomic.h> |
17 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
18 | 18 | ||
19 | static atomic_t cpu_counter, freeze; | 19 | static atomic_t cpu_counter, freeze; |
20 | 20 | ||
21 | 21 | ||
22 | static void smp_pause(void * data) | 22 | static void smp_pause(void * data) |
23 | { | 23 | { |
24 | struct saved_context ctxt; | 24 | struct saved_context ctxt; |
25 | __save_processor_state(&ctxt); | 25 | __save_processor_state(&ctxt); |
26 | printk("Sleeping in:\n"); | 26 | printk("Sleeping in:\n"); |
27 | dump_stack(); | 27 | dump_stack(); |
28 | atomic_inc(&cpu_counter); | 28 | atomic_inc(&cpu_counter); |
29 | while (atomic_read(&freeze)) { | 29 | while (atomic_read(&freeze)) { |
30 | /* FIXME: restore takes place at random piece inside this. | 30 | /* FIXME: restore takes place at random piece inside this. |
31 | This should probably be written in assembly, and | 31 | This should probably be written in assembly, and |
32 | preserve general-purpose registers, too | 32 | preserve general-purpose registers, too |
33 | 33 | ||
34 | What about stack? We may need to move to new stack here. | 34 | What about stack? We may need to move to new stack here. |
35 | 35 | ||
36 | This should better be ran with interrupts disabled. | 36 | This should better be ran with interrupts disabled. |
37 | */ | 37 | */ |
38 | cpu_relax(); | 38 | cpu_relax(); |
39 | barrier(); | 39 | barrier(); |
40 | } | 40 | } |
41 | atomic_dec(&cpu_counter); | 41 | atomic_dec(&cpu_counter); |
42 | __restore_processor_state(&ctxt); | 42 | __restore_processor_state(&ctxt); |
43 | } | 43 | } |
44 | 44 | ||
45 | static cpumask_t oldmask; | 45 | static cpumask_t oldmask; |
46 | 46 | ||
47 | void disable_nonboot_cpus(void) | 47 | void disable_nonboot_cpus(void) |
48 | { | 48 | { |
49 | oldmask = current->cpus_allowed; | 49 | oldmask = current->cpus_allowed; |
50 | set_cpus_allowed(current, cpumask_of_cpu(0)); | 50 | set_cpus_allowed(current, cpumask_of_cpu(0)); |
51 | printk("Freezing CPUs (at %d)", _smp_processor_id()); | 51 | printk("Freezing CPUs (at %d)", raw_smp_processor_id()); |
52 | current->state = TASK_INTERRUPTIBLE; | 52 | current->state = TASK_INTERRUPTIBLE; |
53 | schedule_timeout(HZ); | 53 | schedule_timeout(HZ); |
54 | printk("..."); | 54 | printk("..."); |
55 | BUG_ON(_smp_processor_id() != 0); | 55 | BUG_ON(raw_smp_processor_id() != 0); |
56 | 56 | ||
57 | /* FIXME: for this to work, all the CPUs must be running | 57 | /* FIXME: for this to work, all the CPUs must be running |
58 | * "idle" thread (or we deadlock). Is that guaranteed? */ | 58 | * "idle" thread (or we deadlock). Is that guaranteed? */ |
59 | 59 | ||
60 | atomic_set(&cpu_counter, 0); | 60 | atomic_set(&cpu_counter, 0); |
61 | atomic_set(&freeze, 1); | 61 | atomic_set(&freeze, 1); |
62 | smp_call_function(smp_pause, NULL, 0, 0); | 62 | smp_call_function(smp_pause, NULL, 0, 0); |
63 | while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) { | 63 | while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) { |
64 | cpu_relax(); | 64 | cpu_relax(); |
65 | barrier(); | 65 | barrier(); |
66 | } | 66 | } |
67 | printk("ok\n"); | 67 | printk("ok\n"); |
68 | } | 68 | } |
69 | 69 | ||
70 | void enable_nonboot_cpus(void) | 70 | void enable_nonboot_cpus(void) |
71 | { | 71 | { |
72 | printk("Restarting CPUs"); | 72 | printk("Restarting CPUs"); |
73 | atomic_set(&freeze, 0); | 73 | atomic_set(&freeze, 0); |
74 | while (atomic_read(&cpu_counter)) { | 74 | while (atomic_read(&cpu_counter)) { |
75 | cpu_relax(); | 75 | cpu_relax(); |
76 | barrier(); | 76 | barrier(); |
77 | } | 77 | } |
78 | printk("..."); | 78 | printk("..."); |
79 | set_cpus_allowed(current, oldmask); | 79 | set_cpus_allowed(current, oldmask); |
80 | schedule(); | 80 | schedule(); |
81 | printk("ok\n"); | 81 | printk("ok\n"); |
82 | 82 | ||
83 | } | 83 | } |
84 | 84 | ||
85 | 85 | ||
86 | 86 |
kernel/sched.c
1 | /* | 1 | /* |
2 | * kernel/sched.c | 2 | * kernel/sched.c |
3 | * | 3 | * |
4 | * Kernel scheduler and related syscalls | 4 | * Kernel scheduler and related syscalls |
5 | * | 5 | * |
6 | * Copyright (C) 1991-2002 Linus Torvalds | 6 | * Copyright (C) 1991-2002 Linus Torvalds |
7 | * | 7 | * |
8 | * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and | 8 | * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and |
9 | * make semaphores SMP safe | 9 | * make semaphores SMP safe |
10 | * 1998-11-19 Implemented schedule_timeout() and related stuff | 10 | * 1998-11-19 Implemented schedule_timeout() and related stuff |
11 | * by Andrea Arcangeli | 11 | * by Andrea Arcangeli |
12 | * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: | 12 | * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: |
13 | * hybrid priority-list and round-robin design with | 13 | * hybrid priority-list and round-robin design with |
14 | * an array-switch method of distributing timeslices | 14 | * an array-switch method of distributing timeslices |
15 | * and per-CPU runqueues. Cleanups and useful suggestions | 15 | * and per-CPU runqueues. Cleanups and useful suggestions |
16 | * by Davide Libenzi, preemptible kernel bits by Robert Love. | 16 | * by Davide Libenzi, preemptible kernel bits by Robert Love. |
17 | * 2003-09-03 Interactivity tuning by Con Kolivas. | 17 | * 2003-09-03 Interactivity tuning by Con Kolivas. |
18 | * 2004-04-02 Scheduler domains code by Nick Piggin | 18 | * 2004-04-02 Scheduler domains code by Nick Piggin |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/nmi.h> | 23 | #include <linux/nmi.h> |
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | #include <linux/highmem.h> | 26 | #include <linux/highmem.h> |
27 | #include <linux/smp_lock.h> | 27 | #include <linux/smp_lock.h> |
28 | #include <asm/mmu_context.h> | 28 | #include <asm/mmu_context.h> |
29 | #include <linux/interrupt.h> | 29 | #include <linux/interrupt.h> |
30 | #include <linux/completion.h> | 30 | #include <linux/completion.h> |
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/notifier.h> | 33 | #include <linux/notifier.h> |
34 | #include <linux/profile.h> | 34 | #include <linux/profile.h> |
35 | #include <linux/suspend.h> | 35 | #include <linux/suspend.h> |
36 | #include <linux/blkdev.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/smp.h> | 38 | #include <linux/smp.h> |
39 | #include <linux/threads.h> | 39 | #include <linux/threads.h> |
40 | #include <linux/timer.h> | 40 | #include <linux/timer.h> |
41 | #include <linux/rcupdate.h> | 41 | #include <linux/rcupdate.h> |
42 | #include <linux/cpu.h> | 42 | #include <linux/cpu.h> |
43 | #include <linux/cpuset.h> | 43 | #include <linux/cpuset.h> |
44 | #include <linux/percpu.h> | 44 | #include <linux/percpu.h> |
45 | #include <linux/kthread.h> | 45 | #include <linux/kthread.h> |
46 | #include <linux/seq_file.h> | 46 | #include <linux/seq_file.h> |
47 | #include <linux/syscalls.h> | 47 | #include <linux/syscalls.h> |
48 | #include <linux/times.h> | 48 | #include <linux/times.h> |
49 | #include <linux/acct.h> | 49 | #include <linux/acct.h> |
50 | #include <asm/tlb.h> | 50 | #include <asm/tlb.h> |
51 | 51 | ||
52 | #include <asm/unistd.h> | 52 | #include <asm/unistd.h> |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * Convert user-nice values [ -20 ... 0 ... 19 ] | 55 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
56 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | 56 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], |
57 | * and back. | 57 | * and back. |
58 | */ | 58 | */ |
59 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) | 59 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) |
60 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) | 60 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) |
61 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) | 61 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * 'User priority' is the nice value converted to something we | 64 | * 'User priority' is the nice value converted to something we |
65 | * can work with better when scaling various scheduler parameters, | 65 | * can work with better when scaling various scheduler parameters, |
66 | * it's a [ 0 ... 39 ] range. | 66 | * it's a [ 0 ... 39 ] range. |
67 | */ | 67 | */ |
68 | #define USER_PRIO(p) ((p)-MAX_RT_PRIO) | 68 | #define USER_PRIO(p) ((p)-MAX_RT_PRIO) |
69 | #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) | 69 | #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) |
70 | #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) | 70 | #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * Some helpers for converting nanosecond timing to jiffy resolution | 73 | * Some helpers for converting nanosecond timing to jiffy resolution |
74 | */ | 74 | */ |
75 | #define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) | 75 | #define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) |
76 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) | 76 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * These are the 'tuning knobs' of the scheduler: | 79 | * These are the 'tuning knobs' of the scheduler: |
80 | * | 80 | * |
81 | * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger), | 81 | * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger), |
82 | * default timeslice is 100 msecs, maximum timeslice is 800 msecs. | 82 | * default timeslice is 100 msecs, maximum timeslice is 800 msecs. |
83 | * Timeslices get refilled after they expire. | 83 | * Timeslices get refilled after they expire. |
84 | */ | 84 | */ |
85 | #define MIN_TIMESLICE max(5 * HZ / 1000, 1) | 85 | #define MIN_TIMESLICE max(5 * HZ / 1000, 1) |
86 | #define DEF_TIMESLICE (100 * HZ / 1000) | 86 | #define DEF_TIMESLICE (100 * HZ / 1000) |
87 | #define ON_RUNQUEUE_WEIGHT 30 | 87 | #define ON_RUNQUEUE_WEIGHT 30 |
88 | #define CHILD_PENALTY 95 | 88 | #define CHILD_PENALTY 95 |
89 | #define PARENT_PENALTY 100 | 89 | #define PARENT_PENALTY 100 |
90 | #define EXIT_WEIGHT 3 | 90 | #define EXIT_WEIGHT 3 |
91 | #define PRIO_BONUS_RATIO 25 | 91 | #define PRIO_BONUS_RATIO 25 |
92 | #define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) | 92 | #define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100) |
93 | #define INTERACTIVE_DELTA 2 | 93 | #define INTERACTIVE_DELTA 2 |
94 | #define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS) | 94 | #define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS) |
95 | #define STARVATION_LIMIT (MAX_SLEEP_AVG) | 95 | #define STARVATION_LIMIT (MAX_SLEEP_AVG) |
96 | #define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) | 96 | #define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * If a task is 'interactive' then we reinsert it in the active | 99 | * If a task is 'interactive' then we reinsert it in the active |
100 | * array after it has expired its current timeslice. (it will not | 100 | * array after it has expired its current timeslice. (it will not |
101 | * continue to run immediately, it will still roundrobin with | 101 | * continue to run immediately, it will still roundrobin with |
102 | * other interactive tasks.) | 102 | * other interactive tasks.) |
103 | * | 103 | * |
104 | * This part scales the interactivity limit depending on niceness. | 104 | * This part scales the interactivity limit depending on niceness. |
105 | * | 105 | * |
106 | * We scale it linearly, offset by the INTERACTIVE_DELTA delta. | 106 | * We scale it linearly, offset by the INTERACTIVE_DELTA delta. |
107 | * Here are a few examples of different nice levels: | 107 | * Here are a few examples of different nice levels: |
108 | * | 108 | * |
109 | * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] | 109 | * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0] |
110 | * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] | 110 | * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0] |
111 | * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] | 111 | * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0] |
112 | * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] | 112 | * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0] |
113 | * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] | 113 | * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0] |
114 | * | 114 | * |
115 | * (the X axis represents the possible -5 ... 0 ... +5 dynamic | 115 | * (the X axis represents the possible -5 ... 0 ... +5 dynamic |
116 | * priority range a task can explore, a value of '1' means the | 116 | * priority range a task can explore, a value of '1' means the |
117 | * task is rated interactive.) | 117 | * task is rated interactive.) |
118 | * | 118 | * |
119 | * Ie. nice +19 tasks can never get 'interactive' enough to be | 119 | * Ie. nice +19 tasks can never get 'interactive' enough to be |
120 | * reinserted into the active array. And only heavily CPU-hog nice -20 | 120 | * reinserted into the active array. And only heavily CPU-hog nice -20 |
121 | * tasks will be expired. Default nice 0 tasks are somewhere between, | 121 | * tasks will be expired. Default nice 0 tasks are somewhere between, |
122 | * it takes some effort for them to get interactive, but it's not | 122 | * it takes some effort for them to get interactive, but it's not |
123 | * too hard. | 123 | * too hard. |
124 | */ | 124 | */ |
125 | 125 | ||
126 | #define CURRENT_BONUS(p) \ | 126 | #define CURRENT_BONUS(p) \ |
127 | (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \ | 127 | (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \ |
128 | MAX_SLEEP_AVG) | 128 | MAX_SLEEP_AVG) |
129 | 129 | ||
130 | #define GRANULARITY (10 * HZ / 1000 ? : 1) | 130 | #define GRANULARITY (10 * HZ / 1000 ? : 1) |
131 | 131 | ||
132 | #ifdef CONFIG_SMP | 132 | #ifdef CONFIG_SMP |
133 | #define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ | 133 | #define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ |
134 | (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \ | 134 | (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \ |
135 | num_online_cpus()) | 135 | num_online_cpus()) |
136 | #else | 136 | #else |
137 | #define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ | 137 | #define TIMESLICE_GRANULARITY(p) (GRANULARITY * \ |
138 | (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1))) | 138 | (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1))) |
139 | #endif | 139 | #endif |
140 | 140 | ||
141 | #define SCALE(v1,v1_max,v2_max) \ | 141 | #define SCALE(v1,v1_max,v2_max) \ |
142 | (v1) * (v2_max) / (v1_max) | 142 | (v1) * (v2_max) / (v1_max) |
143 | 143 | ||
144 | #define DELTA(p) \ | 144 | #define DELTA(p) \ |
145 | (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) | 145 | (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) |
146 | 146 | ||
147 | #define TASK_INTERACTIVE(p) \ | 147 | #define TASK_INTERACTIVE(p) \ |
148 | ((p)->prio <= (p)->static_prio - DELTA(p)) | 148 | ((p)->prio <= (p)->static_prio - DELTA(p)) |
149 | 149 | ||
150 | #define INTERACTIVE_SLEEP(p) \ | 150 | #define INTERACTIVE_SLEEP(p) \ |
151 | (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ | 151 | (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ |
152 | (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1)) | 152 | (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1)) |
153 | 153 | ||
154 | #define TASK_PREEMPTS_CURR(p, rq) \ | 154 | #define TASK_PREEMPTS_CURR(p, rq) \ |
155 | ((p)->prio < (rq)->curr->prio) | 155 | ((p)->prio < (rq)->curr->prio) |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] | 158 | * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] |
159 | * to time slice values: [800ms ... 100ms ... 5ms] | 159 | * to time slice values: [800ms ... 100ms ... 5ms] |
160 | * | 160 | * |
161 | * The higher a thread's priority, the bigger timeslices | 161 | * The higher a thread's priority, the bigger timeslices |
162 | * it gets during one round of execution. But even the lowest | 162 | * it gets during one round of execution. But even the lowest |
163 | * priority thread gets MIN_TIMESLICE worth of execution time. | 163 | * priority thread gets MIN_TIMESLICE worth of execution time. |
164 | */ | 164 | */ |
165 | 165 | ||
166 | #define SCALE_PRIO(x, prio) \ | 166 | #define SCALE_PRIO(x, prio) \ |
167 | max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO/2), MIN_TIMESLICE) | 167 | max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO/2), MIN_TIMESLICE) |
168 | 168 | ||
169 | static inline unsigned int task_timeslice(task_t *p) | 169 | static inline unsigned int task_timeslice(task_t *p) |
170 | { | 170 | { |
171 | if (p->static_prio < NICE_TO_PRIO(0)) | 171 | if (p->static_prio < NICE_TO_PRIO(0)) |
172 | return SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio); | 172 | return SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio); |
173 | else | 173 | else |
174 | return SCALE_PRIO(DEF_TIMESLICE, p->static_prio); | 174 | return SCALE_PRIO(DEF_TIMESLICE, p->static_prio); |
175 | } | 175 | } |
176 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ | 176 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ |
177 | < (long long) (sd)->cache_hot_time) | 177 | < (long long) (sd)->cache_hot_time) |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * These are the runqueue data structures: | 180 | * These are the runqueue data structures: |
181 | */ | 181 | */ |
182 | 182 | ||
183 | #define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) | 183 | #define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) |
184 | 184 | ||
185 | typedef struct runqueue runqueue_t; | 185 | typedef struct runqueue runqueue_t; |
186 | 186 | ||
187 | struct prio_array { | 187 | struct prio_array { |
188 | unsigned int nr_active; | 188 | unsigned int nr_active; |
189 | unsigned long bitmap[BITMAP_SIZE]; | 189 | unsigned long bitmap[BITMAP_SIZE]; |
190 | struct list_head queue[MAX_PRIO]; | 190 | struct list_head queue[MAX_PRIO]; |
191 | }; | 191 | }; |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * This is the main, per-CPU runqueue data structure. | 194 | * This is the main, per-CPU runqueue data structure. |
195 | * | 195 | * |
196 | * Locking rule: those places that want to lock multiple runqueues | 196 | * Locking rule: those places that want to lock multiple runqueues |
197 | * (such as the load balancing or the thread migration code), lock | 197 | * (such as the load balancing or the thread migration code), lock |
198 | * acquire operations must be ordered by ascending &runqueue. | 198 | * acquire operations must be ordered by ascending &runqueue. |
199 | */ | 199 | */ |
200 | struct runqueue { | 200 | struct runqueue { |
201 | spinlock_t lock; | 201 | spinlock_t lock; |
202 | 202 | ||
203 | /* | 203 | /* |
204 | * nr_running and cpu_load should be in the same cacheline because | 204 | * nr_running and cpu_load should be in the same cacheline because |
205 | * remote CPUs use both these fields when doing load calculation. | 205 | * remote CPUs use both these fields when doing load calculation. |
206 | */ | 206 | */ |
207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
209 | unsigned long cpu_load; | 209 | unsigned long cpu_load; |
210 | #endif | 210 | #endif |
211 | unsigned long long nr_switches; | 211 | unsigned long long nr_switches; |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * This is part of a global counter where only the total sum | 214 | * This is part of a global counter where only the total sum |
215 | * over all CPUs matters. A task can increase this counter on | 215 | * over all CPUs matters. A task can increase this counter on |
216 | * one CPU and if it got migrated afterwards it may decrease | 216 | * one CPU and if it got migrated afterwards it may decrease |
217 | * it on another CPU. Always updated under the runqueue lock: | 217 | * it on another CPU. Always updated under the runqueue lock: |
218 | */ | 218 | */ |
219 | unsigned long nr_uninterruptible; | 219 | unsigned long nr_uninterruptible; |
220 | 220 | ||
221 | unsigned long expired_timestamp; | 221 | unsigned long expired_timestamp; |
222 | unsigned long long timestamp_last_tick; | 222 | unsigned long long timestamp_last_tick; |
223 | task_t *curr, *idle; | 223 | task_t *curr, *idle; |
224 | struct mm_struct *prev_mm; | 224 | struct mm_struct *prev_mm; |
225 | prio_array_t *active, *expired, arrays[2]; | 225 | prio_array_t *active, *expired, arrays[2]; |
226 | int best_expired_prio; | 226 | int best_expired_prio; |
227 | atomic_t nr_iowait; | 227 | atomic_t nr_iowait; |
228 | 228 | ||
229 | #ifdef CONFIG_SMP | 229 | #ifdef CONFIG_SMP |
230 | struct sched_domain *sd; | 230 | struct sched_domain *sd; |
231 | 231 | ||
232 | /* For active balancing */ | 232 | /* For active balancing */ |
233 | int active_balance; | 233 | int active_balance; |
234 | int push_cpu; | 234 | int push_cpu; |
235 | 235 | ||
236 | task_t *migration_thread; | 236 | task_t *migration_thread; |
237 | struct list_head migration_queue; | 237 | struct list_head migration_queue; |
238 | #endif | 238 | #endif |
239 | 239 | ||
240 | #ifdef CONFIG_SCHEDSTATS | 240 | #ifdef CONFIG_SCHEDSTATS |
241 | /* latency stats */ | 241 | /* latency stats */ |
242 | struct sched_info rq_sched_info; | 242 | struct sched_info rq_sched_info; |
243 | 243 | ||
244 | /* sys_sched_yield() stats */ | 244 | /* sys_sched_yield() stats */ |
245 | unsigned long yld_exp_empty; | 245 | unsigned long yld_exp_empty; |
246 | unsigned long yld_act_empty; | 246 | unsigned long yld_act_empty; |
247 | unsigned long yld_both_empty; | 247 | unsigned long yld_both_empty; |
248 | unsigned long yld_cnt; | 248 | unsigned long yld_cnt; |
249 | 249 | ||
250 | /* schedule() stats */ | 250 | /* schedule() stats */ |
251 | unsigned long sched_switch; | 251 | unsigned long sched_switch; |
252 | unsigned long sched_cnt; | 252 | unsigned long sched_cnt; |
253 | unsigned long sched_goidle; | 253 | unsigned long sched_goidle; |
254 | 254 | ||
255 | /* try_to_wake_up() stats */ | 255 | /* try_to_wake_up() stats */ |
256 | unsigned long ttwu_cnt; | 256 | unsigned long ttwu_cnt; |
257 | unsigned long ttwu_local; | 257 | unsigned long ttwu_local; |
258 | #endif | 258 | #endif |
259 | }; | 259 | }; |
260 | 260 | ||
261 | static DEFINE_PER_CPU(struct runqueue, runqueues); | 261 | static DEFINE_PER_CPU(struct runqueue, runqueues); |
262 | 262 | ||
263 | #define for_each_domain(cpu, domain) \ | 263 | #define for_each_domain(cpu, domain) \ |
264 | for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent) | 264 | for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent) |
265 | 265 | ||
266 | #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) | 266 | #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) |
267 | #define this_rq() (&__get_cpu_var(runqueues)) | 267 | #define this_rq() (&__get_cpu_var(runqueues)) |
268 | #define task_rq(p) cpu_rq(task_cpu(p)) | 268 | #define task_rq(p) cpu_rq(task_cpu(p)) |
269 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 269 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
270 | 270 | ||
271 | /* | 271 | /* |
272 | * Default context-switch locking: | 272 | * Default context-switch locking: |
273 | */ | 273 | */ |
274 | #ifndef prepare_arch_switch | 274 | #ifndef prepare_arch_switch |
275 | # define prepare_arch_switch(rq, next) do { } while (0) | 275 | # define prepare_arch_switch(rq, next) do { } while (0) |
276 | # define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) | 276 | # define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) |
277 | # define task_running(rq, p) ((rq)->curr == (p)) | 277 | # define task_running(rq, p) ((rq)->curr == (p)) |
278 | #endif | 278 | #endif |
279 | 279 | ||
280 | /* | 280 | /* |
281 | * task_rq_lock - lock the runqueue a given task resides on and disable | 281 | * task_rq_lock - lock the runqueue a given task resides on and disable |
282 | * interrupts. Note the ordering: we can safely lookup the task_rq without | 282 | * interrupts. Note the ordering: we can safely lookup the task_rq without |
283 | * explicitly disabling preemption. | 283 | * explicitly disabling preemption. |
284 | */ | 284 | */ |
285 | static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) | 285 | static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) |
286 | __acquires(rq->lock) | 286 | __acquires(rq->lock) |
287 | { | 287 | { |
288 | struct runqueue *rq; | 288 | struct runqueue *rq; |
289 | 289 | ||
290 | repeat_lock_task: | 290 | repeat_lock_task: |
291 | local_irq_save(*flags); | 291 | local_irq_save(*flags); |
292 | rq = task_rq(p); | 292 | rq = task_rq(p); |
293 | spin_lock(&rq->lock); | 293 | spin_lock(&rq->lock); |
294 | if (unlikely(rq != task_rq(p))) { | 294 | if (unlikely(rq != task_rq(p))) { |
295 | spin_unlock_irqrestore(&rq->lock, *flags); | 295 | spin_unlock_irqrestore(&rq->lock, *flags); |
296 | goto repeat_lock_task; | 296 | goto repeat_lock_task; |
297 | } | 297 | } |
298 | return rq; | 298 | return rq; |
299 | } | 299 | } |
300 | 300 | ||
301 | static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) | 301 | static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) |
302 | __releases(rq->lock) | 302 | __releases(rq->lock) |
303 | { | 303 | { |
304 | spin_unlock_irqrestore(&rq->lock, *flags); | 304 | spin_unlock_irqrestore(&rq->lock, *flags); |
305 | } | 305 | } |
306 | 306 | ||
307 | #ifdef CONFIG_SCHEDSTATS | 307 | #ifdef CONFIG_SCHEDSTATS |
308 | /* | 308 | /* |
309 | * bump this up when changing the output format or the meaning of an existing | 309 | * bump this up when changing the output format or the meaning of an existing |
310 | * format, so that tools can adapt (or abort) | 310 | * format, so that tools can adapt (or abort) |
311 | */ | 311 | */ |
312 | #define SCHEDSTAT_VERSION 11 | 312 | #define SCHEDSTAT_VERSION 11 |
313 | 313 | ||
314 | static int show_schedstat(struct seq_file *seq, void *v) | 314 | static int show_schedstat(struct seq_file *seq, void *v) |
315 | { | 315 | { |
316 | int cpu; | 316 | int cpu; |
317 | 317 | ||
318 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | 318 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); |
319 | seq_printf(seq, "timestamp %lu\n", jiffies); | 319 | seq_printf(seq, "timestamp %lu\n", jiffies); |
320 | for_each_online_cpu(cpu) { | 320 | for_each_online_cpu(cpu) { |
321 | runqueue_t *rq = cpu_rq(cpu); | 321 | runqueue_t *rq = cpu_rq(cpu); |
322 | #ifdef CONFIG_SMP | 322 | #ifdef CONFIG_SMP |
323 | struct sched_domain *sd; | 323 | struct sched_domain *sd; |
324 | int dcnt = 0; | 324 | int dcnt = 0; |
325 | #endif | 325 | #endif |
326 | 326 | ||
327 | /* runqueue-specific stats */ | 327 | /* runqueue-specific stats */ |
328 | seq_printf(seq, | 328 | seq_printf(seq, |
329 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | 329 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", |
330 | cpu, rq->yld_both_empty, | 330 | cpu, rq->yld_both_empty, |
331 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, | 331 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, |
332 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, | 332 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, |
333 | rq->ttwu_cnt, rq->ttwu_local, | 333 | rq->ttwu_cnt, rq->ttwu_local, |
334 | rq->rq_sched_info.cpu_time, | 334 | rq->rq_sched_info.cpu_time, |
335 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); | 335 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); |
336 | 336 | ||
337 | seq_printf(seq, "\n"); | 337 | seq_printf(seq, "\n"); |
338 | 338 | ||
339 | #ifdef CONFIG_SMP | 339 | #ifdef CONFIG_SMP |
340 | /* domain-specific stats */ | 340 | /* domain-specific stats */ |
341 | for_each_domain(cpu, sd) { | 341 | for_each_domain(cpu, sd) { |
342 | enum idle_type itype; | 342 | enum idle_type itype; |
343 | char mask_str[NR_CPUS]; | 343 | char mask_str[NR_CPUS]; |
344 | 344 | ||
345 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); | 345 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); |
346 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | 346 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); |
347 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; | 347 | for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; |
348 | itype++) { | 348 | itype++) { |
349 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", | 349 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", |
350 | sd->lb_cnt[itype], | 350 | sd->lb_cnt[itype], |
351 | sd->lb_balanced[itype], | 351 | sd->lb_balanced[itype], |
352 | sd->lb_failed[itype], | 352 | sd->lb_failed[itype], |
353 | sd->lb_imbalance[itype], | 353 | sd->lb_imbalance[itype], |
354 | sd->lb_gained[itype], | 354 | sd->lb_gained[itype], |
355 | sd->lb_hot_gained[itype], | 355 | sd->lb_hot_gained[itype], |
356 | sd->lb_nobusyq[itype], | 356 | sd->lb_nobusyq[itype], |
357 | sd->lb_nobusyg[itype]); | 357 | sd->lb_nobusyg[itype]); |
358 | } | 358 | } |
359 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu\n", | 359 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu\n", |
360 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | 360 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, |
361 | sd->sbe_pushed, sd->sbe_attempts, | 361 | sd->sbe_pushed, sd->sbe_attempts, |
362 | sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); | 362 | sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); |
363 | } | 363 | } |
364 | #endif | 364 | #endif |
365 | } | 365 | } |
366 | return 0; | 366 | return 0; |
367 | } | 367 | } |
368 | 368 | ||
369 | static int schedstat_open(struct inode *inode, struct file *file) | 369 | static int schedstat_open(struct inode *inode, struct file *file) |
370 | { | 370 | { |
371 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | 371 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); |
372 | char *buf = kmalloc(size, GFP_KERNEL); | 372 | char *buf = kmalloc(size, GFP_KERNEL); |
373 | struct seq_file *m; | 373 | struct seq_file *m; |
374 | int res; | 374 | int res; |
375 | 375 | ||
376 | if (!buf) | 376 | if (!buf) |
377 | return -ENOMEM; | 377 | return -ENOMEM; |
378 | res = single_open(file, show_schedstat, NULL); | 378 | res = single_open(file, show_schedstat, NULL); |
379 | if (!res) { | 379 | if (!res) { |
380 | m = file->private_data; | 380 | m = file->private_data; |
381 | m->buf = buf; | 381 | m->buf = buf; |
382 | m->size = size; | 382 | m->size = size; |
383 | } else | 383 | } else |
384 | kfree(buf); | 384 | kfree(buf); |
385 | return res; | 385 | return res; |
386 | } | 386 | } |
387 | 387 | ||
388 | struct file_operations proc_schedstat_operations = { | 388 | struct file_operations proc_schedstat_operations = { |
389 | .open = schedstat_open, | 389 | .open = schedstat_open, |
390 | .read = seq_read, | 390 | .read = seq_read, |
391 | .llseek = seq_lseek, | 391 | .llseek = seq_lseek, |
392 | .release = single_release, | 392 | .release = single_release, |
393 | }; | 393 | }; |
394 | 394 | ||
395 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 395 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
396 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 396 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
397 | #else /* !CONFIG_SCHEDSTATS */ | 397 | #else /* !CONFIG_SCHEDSTATS */ |
398 | # define schedstat_inc(rq, field) do { } while (0) | 398 | # define schedstat_inc(rq, field) do { } while (0) |
399 | # define schedstat_add(rq, field, amt) do { } while (0) | 399 | # define schedstat_add(rq, field, amt) do { } while (0) |
400 | #endif | 400 | #endif |
401 | 401 | ||
402 | /* | 402 | /* |
403 | * rq_lock - lock a given runqueue and disable interrupts. | 403 | * rq_lock - lock a given runqueue and disable interrupts. |
404 | */ | 404 | */ |
405 | static inline runqueue_t *this_rq_lock(void) | 405 | static inline runqueue_t *this_rq_lock(void) |
406 | __acquires(rq->lock) | 406 | __acquires(rq->lock) |
407 | { | 407 | { |
408 | runqueue_t *rq; | 408 | runqueue_t *rq; |
409 | 409 | ||
410 | local_irq_disable(); | 410 | local_irq_disable(); |
411 | rq = this_rq(); | 411 | rq = this_rq(); |
412 | spin_lock(&rq->lock); | 412 | spin_lock(&rq->lock); |
413 | 413 | ||
414 | return rq; | 414 | return rq; |
415 | } | 415 | } |
416 | 416 | ||
417 | #ifdef CONFIG_SCHED_SMT | 417 | #ifdef CONFIG_SCHED_SMT |
418 | static int cpu_and_siblings_are_idle(int cpu) | 418 | static int cpu_and_siblings_are_idle(int cpu) |
419 | { | 419 | { |
420 | int sib; | 420 | int sib; |
421 | for_each_cpu_mask(sib, cpu_sibling_map[cpu]) { | 421 | for_each_cpu_mask(sib, cpu_sibling_map[cpu]) { |
422 | if (idle_cpu(sib)) | 422 | if (idle_cpu(sib)) |
423 | continue; | 423 | continue; |
424 | return 0; | 424 | return 0; |
425 | } | 425 | } |
426 | 426 | ||
427 | return 1; | 427 | return 1; |
428 | } | 428 | } |
429 | #else | 429 | #else |
430 | #define cpu_and_siblings_are_idle(A) idle_cpu(A) | 430 | #define cpu_and_siblings_are_idle(A) idle_cpu(A) |
431 | #endif | 431 | #endif |
432 | 432 | ||
433 | #ifdef CONFIG_SCHEDSTATS | 433 | #ifdef CONFIG_SCHEDSTATS |
434 | /* | 434 | /* |
435 | * Called when a process is dequeued from the active array and given | 435 | * Called when a process is dequeued from the active array and given |
436 | * the cpu. We should note that with the exception of interactive | 436 | * the cpu. We should note that with the exception of interactive |
437 | * tasks, the expired queue will become the active queue after the active | 437 | * tasks, the expired queue will become the active queue after the active |
438 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | 438 | * queue is empty, without explicitly dequeuing and requeuing tasks in the |
439 | * expired queue. (Interactive tasks may be requeued directly to the | 439 | * expired queue. (Interactive tasks may be requeued directly to the |
440 | * active queue, thus delaying tasks in the expired queue from running; | 440 | * active queue, thus delaying tasks in the expired queue from running; |
441 | * see scheduler_tick()). | 441 | * see scheduler_tick()). |
442 | * | 442 | * |
443 | * This function is only called from sched_info_arrive(), rather than | 443 | * This function is only called from sched_info_arrive(), rather than |
444 | * dequeue_task(). Even though a task may be queued and dequeued multiple | 444 | * dequeue_task(). Even though a task may be queued and dequeued multiple |
445 | * times as it is shuffled about, we're really interested in knowing how | 445 | * times as it is shuffled about, we're really interested in knowing how |
446 | * long it was from the *first* time it was queued to the time that it | 446 | * long it was from the *first* time it was queued to the time that it |
447 | * finally hit a cpu. | 447 | * finally hit a cpu. |
448 | */ | 448 | */ |
449 | static inline void sched_info_dequeued(task_t *t) | 449 | static inline void sched_info_dequeued(task_t *t) |
450 | { | 450 | { |
451 | t->sched_info.last_queued = 0; | 451 | t->sched_info.last_queued = 0; |
452 | } | 452 | } |
453 | 453 | ||
454 | /* | 454 | /* |
455 | * Called when a task finally hits the cpu. We can now calculate how | 455 | * Called when a task finally hits the cpu. We can now calculate how |
456 | * long it was waiting to run. We also note when it began so that we | 456 | * long it was waiting to run. We also note when it began so that we |
457 | * can keep stats on how long its timeslice is. | 457 | * can keep stats on how long its timeslice is. |
458 | */ | 458 | */ |
459 | static inline void sched_info_arrive(task_t *t) | 459 | static inline void sched_info_arrive(task_t *t) |
460 | { | 460 | { |
461 | unsigned long now = jiffies, diff = 0; | 461 | unsigned long now = jiffies, diff = 0; |
462 | struct runqueue *rq = task_rq(t); | 462 | struct runqueue *rq = task_rq(t); |
463 | 463 | ||
464 | if (t->sched_info.last_queued) | 464 | if (t->sched_info.last_queued) |
465 | diff = now - t->sched_info.last_queued; | 465 | diff = now - t->sched_info.last_queued; |
466 | sched_info_dequeued(t); | 466 | sched_info_dequeued(t); |
467 | t->sched_info.run_delay += diff; | 467 | t->sched_info.run_delay += diff; |
468 | t->sched_info.last_arrival = now; | 468 | t->sched_info.last_arrival = now; |
469 | t->sched_info.pcnt++; | 469 | t->sched_info.pcnt++; |
470 | 470 | ||
471 | if (!rq) | 471 | if (!rq) |
472 | return; | 472 | return; |
473 | 473 | ||
474 | rq->rq_sched_info.run_delay += diff; | 474 | rq->rq_sched_info.run_delay += diff; |
475 | rq->rq_sched_info.pcnt++; | 475 | rq->rq_sched_info.pcnt++; |
476 | } | 476 | } |
477 | 477 | ||
478 | /* | 478 | /* |
479 | * Called when a process is queued into either the active or expired | 479 | * Called when a process is queued into either the active or expired |
480 | * array. The time is noted and later used to determine how long we | 480 | * array. The time is noted and later used to determine how long we |
481 | * had to wait for us to reach the cpu. Since the expired queue will | 481 | * had to wait for us to reach the cpu. Since the expired queue will |
482 | * become the active queue after active queue is empty, without dequeuing | 482 | * become the active queue after active queue is empty, without dequeuing |
483 | * and requeuing any tasks, we are interested in queuing to either. It | 483 | * and requeuing any tasks, we are interested in queuing to either. It |
484 | * is unusual but not impossible for tasks to be dequeued and immediately | 484 | * is unusual but not impossible for tasks to be dequeued and immediately |
485 | * requeued in the same or another array: this can happen in sched_yield(), | 485 | * requeued in the same or another array: this can happen in sched_yield(), |
486 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | 486 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue |
487 | * to runqueue. | 487 | * to runqueue. |
488 | * | 488 | * |
489 | * This function is only called from enqueue_task(), but also only updates | 489 | * This function is only called from enqueue_task(), but also only updates |
490 | * the timestamp if it is already not set. It's assumed that | 490 | * the timestamp if it is already not set. It's assumed that |
491 | * sched_info_dequeued() will clear that stamp when appropriate. | 491 | * sched_info_dequeued() will clear that stamp when appropriate. |
492 | */ | 492 | */ |
493 | static inline void sched_info_queued(task_t *t) | 493 | static inline void sched_info_queued(task_t *t) |
494 | { | 494 | { |
495 | if (!t->sched_info.last_queued) | 495 | if (!t->sched_info.last_queued) |
496 | t->sched_info.last_queued = jiffies; | 496 | t->sched_info.last_queued = jiffies; |
497 | } | 497 | } |
498 | 498 | ||
499 | /* | 499 | /* |
500 | * Called when a process ceases being the active-running process, either | 500 | * Called when a process ceases being the active-running process, either |
501 | * voluntarily or involuntarily. Now we can calculate how long we ran. | 501 | * voluntarily or involuntarily. Now we can calculate how long we ran. |
502 | */ | 502 | */ |
503 | static inline void sched_info_depart(task_t *t) | 503 | static inline void sched_info_depart(task_t *t) |
504 | { | 504 | { |
505 | struct runqueue *rq = task_rq(t); | 505 | struct runqueue *rq = task_rq(t); |
506 | unsigned long diff = jiffies - t->sched_info.last_arrival; | 506 | unsigned long diff = jiffies - t->sched_info.last_arrival; |
507 | 507 | ||
508 | t->sched_info.cpu_time += diff; | 508 | t->sched_info.cpu_time += diff; |
509 | 509 | ||
510 | if (rq) | 510 | if (rq) |
511 | rq->rq_sched_info.cpu_time += diff; | 511 | rq->rq_sched_info.cpu_time += diff; |
512 | } | 512 | } |
513 | 513 | ||
514 | /* | 514 | /* |
515 | * Called when tasks are switched involuntarily due, typically, to expiring | 515 | * Called when tasks are switched involuntarily due, typically, to expiring |
516 | * their time slice. (This may also be called when switching to or from | 516 | * their time slice. (This may also be called when switching to or from |
517 | * the idle task.) We are only called when prev != next. | 517 | * the idle task.) We are only called when prev != next. |
518 | */ | 518 | */ |
519 | static inline void sched_info_switch(task_t *prev, task_t *next) | 519 | static inline void sched_info_switch(task_t *prev, task_t *next) |
520 | { | 520 | { |
521 | struct runqueue *rq = task_rq(prev); | 521 | struct runqueue *rq = task_rq(prev); |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * prev now departs the cpu. It's not interesting to record | 524 | * prev now departs the cpu. It's not interesting to record |
525 | * stats about how efficient we were at scheduling the idle | 525 | * stats about how efficient we were at scheduling the idle |
526 | * process, however. | 526 | * process, however. |
527 | */ | 527 | */ |
528 | if (prev != rq->idle) | 528 | if (prev != rq->idle) |
529 | sched_info_depart(prev); | 529 | sched_info_depart(prev); |
530 | 530 | ||
531 | if (next != rq->idle) | 531 | if (next != rq->idle) |
532 | sched_info_arrive(next); | 532 | sched_info_arrive(next); |
533 | } | 533 | } |
534 | #else | 534 | #else |
535 | #define sched_info_queued(t) do { } while (0) | 535 | #define sched_info_queued(t) do { } while (0) |
536 | #define sched_info_switch(t, next) do { } while (0) | 536 | #define sched_info_switch(t, next) do { } while (0) |
537 | #endif /* CONFIG_SCHEDSTATS */ | 537 | #endif /* CONFIG_SCHEDSTATS */ |
538 | 538 | ||
539 | /* | 539 | /* |
540 | * Adding/removing a task to/from a priority array: | 540 | * Adding/removing a task to/from a priority array: |
541 | */ | 541 | */ |
542 | static void dequeue_task(struct task_struct *p, prio_array_t *array) | 542 | static void dequeue_task(struct task_struct *p, prio_array_t *array) |
543 | { | 543 | { |
544 | array->nr_active--; | 544 | array->nr_active--; |
545 | list_del(&p->run_list); | 545 | list_del(&p->run_list); |
546 | if (list_empty(array->queue + p->prio)) | 546 | if (list_empty(array->queue + p->prio)) |
547 | __clear_bit(p->prio, array->bitmap); | 547 | __clear_bit(p->prio, array->bitmap); |
548 | } | 548 | } |
549 | 549 | ||
550 | static void enqueue_task(struct task_struct *p, prio_array_t *array) | 550 | static void enqueue_task(struct task_struct *p, prio_array_t *array) |
551 | { | 551 | { |
552 | sched_info_queued(p); | 552 | sched_info_queued(p); |
553 | list_add_tail(&p->run_list, array->queue + p->prio); | 553 | list_add_tail(&p->run_list, array->queue + p->prio); |
554 | __set_bit(p->prio, array->bitmap); | 554 | __set_bit(p->prio, array->bitmap); |
555 | array->nr_active++; | 555 | array->nr_active++; |
556 | p->array = array; | 556 | p->array = array; |
557 | } | 557 | } |
558 | 558 | ||
559 | /* | 559 | /* |
560 | * Put task to the end of the run list without the overhead of dequeue | 560 | * Put task to the end of the run list without the overhead of dequeue |
561 | * followed by enqueue. | 561 | * followed by enqueue. |
562 | */ | 562 | */ |
563 | static void requeue_task(struct task_struct *p, prio_array_t *array) | 563 | static void requeue_task(struct task_struct *p, prio_array_t *array) |
564 | { | 564 | { |
565 | list_move_tail(&p->run_list, array->queue + p->prio); | 565 | list_move_tail(&p->run_list, array->queue + p->prio); |
566 | } | 566 | } |
567 | 567 | ||
568 | static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) | 568 | static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) |
569 | { | 569 | { |
570 | list_add(&p->run_list, array->queue + p->prio); | 570 | list_add(&p->run_list, array->queue + p->prio); |
571 | __set_bit(p->prio, array->bitmap); | 571 | __set_bit(p->prio, array->bitmap); |
572 | array->nr_active++; | 572 | array->nr_active++; |
573 | p->array = array; | 573 | p->array = array; |
574 | } | 574 | } |
575 | 575 | ||
576 | /* | 576 | /* |
577 | * effective_prio - return the priority that is based on the static | 577 | * effective_prio - return the priority that is based on the static |
578 | * priority but is modified by bonuses/penalties. | 578 | * priority but is modified by bonuses/penalties. |
579 | * | 579 | * |
580 | * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] | 580 | * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] |
581 | * into the -5 ... 0 ... +5 bonus/penalty range. | 581 | * into the -5 ... 0 ... +5 bonus/penalty range. |
582 | * | 582 | * |
583 | * We use 25% of the full 0...39 priority range so that: | 583 | * We use 25% of the full 0...39 priority range so that: |
584 | * | 584 | * |
585 | * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. | 585 | * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. |
586 | * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. | 586 | * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. |
587 | * | 587 | * |
588 | * Both properties are important to certain workloads. | 588 | * Both properties are important to certain workloads. |
589 | */ | 589 | */ |
590 | static int effective_prio(task_t *p) | 590 | static int effective_prio(task_t *p) |
591 | { | 591 | { |
592 | int bonus, prio; | 592 | int bonus, prio; |
593 | 593 | ||
594 | if (rt_task(p)) | 594 | if (rt_task(p)) |
595 | return p->prio; | 595 | return p->prio; |
596 | 596 | ||
597 | bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; | 597 | bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; |
598 | 598 | ||
599 | prio = p->static_prio - bonus; | 599 | prio = p->static_prio - bonus; |
600 | if (prio < MAX_RT_PRIO) | 600 | if (prio < MAX_RT_PRIO) |
601 | prio = MAX_RT_PRIO; | 601 | prio = MAX_RT_PRIO; |
602 | if (prio > MAX_PRIO-1) | 602 | if (prio > MAX_PRIO-1) |
603 | prio = MAX_PRIO-1; | 603 | prio = MAX_PRIO-1; |
604 | return prio; | 604 | return prio; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * __activate_task - move a task to the runqueue. | 608 | * __activate_task - move a task to the runqueue. |
609 | */ | 609 | */ |
610 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 610 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
611 | { | 611 | { |
612 | enqueue_task(p, rq->active); | 612 | enqueue_task(p, rq->active); |
613 | rq->nr_running++; | 613 | rq->nr_running++; |
614 | } | 614 | } |
615 | 615 | ||
616 | /* | 616 | /* |
617 | * __activate_idle_task - move idle task to the _front_ of runqueue. | 617 | * __activate_idle_task - move idle task to the _front_ of runqueue. |
618 | */ | 618 | */ |
619 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 619 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
620 | { | 620 | { |
621 | enqueue_task_head(p, rq->active); | 621 | enqueue_task_head(p, rq->active); |
622 | rq->nr_running++; | 622 | rq->nr_running++; |
623 | } | 623 | } |
624 | 624 | ||
625 | static void recalc_task_prio(task_t *p, unsigned long long now) | 625 | static void recalc_task_prio(task_t *p, unsigned long long now) |
626 | { | 626 | { |
627 | /* Caller must always ensure 'now >= p->timestamp' */ | 627 | /* Caller must always ensure 'now >= p->timestamp' */ |
628 | unsigned long long __sleep_time = now - p->timestamp; | 628 | unsigned long long __sleep_time = now - p->timestamp; |
629 | unsigned long sleep_time; | 629 | unsigned long sleep_time; |
630 | 630 | ||
631 | if (__sleep_time > NS_MAX_SLEEP_AVG) | 631 | if (__sleep_time > NS_MAX_SLEEP_AVG) |
632 | sleep_time = NS_MAX_SLEEP_AVG; | 632 | sleep_time = NS_MAX_SLEEP_AVG; |
633 | else | 633 | else |
634 | sleep_time = (unsigned long)__sleep_time; | 634 | sleep_time = (unsigned long)__sleep_time; |
635 | 635 | ||
636 | if (likely(sleep_time > 0)) { | 636 | if (likely(sleep_time > 0)) { |
637 | /* | 637 | /* |
638 | * User tasks that sleep a long time are categorised as | 638 | * User tasks that sleep a long time are categorised as |
639 | * idle and will get just interactive status to stay active & | 639 | * idle and will get just interactive status to stay active & |
640 | * prevent them suddenly becoming cpu hogs and starving | 640 | * prevent them suddenly becoming cpu hogs and starving |
641 | * other processes. | 641 | * other processes. |
642 | */ | 642 | */ |
643 | if (p->mm && p->activated != -1 && | 643 | if (p->mm && p->activated != -1 && |
644 | sleep_time > INTERACTIVE_SLEEP(p)) { | 644 | sleep_time > INTERACTIVE_SLEEP(p)) { |
645 | p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - | 645 | p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - |
646 | DEF_TIMESLICE); | 646 | DEF_TIMESLICE); |
647 | } else { | 647 | } else { |
648 | /* | 648 | /* |
649 | * The lower the sleep avg a task has the more | 649 | * The lower the sleep avg a task has the more |
650 | * rapidly it will rise with sleep time. | 650 | * rapidly it will rise with sleep time. |
651 | */ | 651 | */ |
652 | sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1; | 652 | sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1; |
653 | 653 | ||
654 | /* | 654 | /* |
655 | * Tasks waking from uninterruptible sleep are | 655 | * Tasks waking from uninterruptible sleep are |
656 | * limited in their sleep_avg rise as they | 656 | * limited in their sleep_avg rise as they |
657 | * are likely to be waiting on I/O | 657 | * are likely to be waiting on I/O |
658 | */ | 658 | */ |
659 | if (p->activated == -1 && p->mm) { | 659 | if (p->activated == -1 && p->mm) { |
660 | if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) | 660 | if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) |
661 | sleep_time = 0; | 661 | sleep_time = 0; |
662 | else if (p->sleep_avg + sleep_time >= | 662 | else if (p->sleep_avg + sleep_time >= |
663 | INTERACTIVE_SLEEP(p)) { | 663 | INTERACTIVE_SLEEP(p)) { |
664 | p->sleep_avg = INTERACTIVE_SLEEP(p); | 664 | p->sleep_avg = INTERACTIVE_SLEEP(p); |
665 | sleep_time = 0; | 665 | sleep_time = 0; |
666 | } | 666 | } |
667 | } | 667 | } |
668 | 668 | ||
669 | /* | 669 | /* |
670 | * This code gives a bonus to interactive tasks. | 670 | * This code gives a bonus to interactive tasks. |
671 | * | 671 | * |
672 | * The boost works by updating the 'average sleep time' | 672 | * The boost works by updating the 'average sleep time' |
673 | * value here, based on ->timestamp. The more time a | 673 | * value here, based on ->timestamp. The more time a |
674 | * task spends sleeping, the higher the average gets - | 674 | * task spends sleeping, the higher the average gets - |
675 | * and the higher the priority boost gets as well. | 675 | * and the higher the priority boost gets as well. |
676 | */ | 676 | */ |
677 | p->sleep_avg += sleep_time; | 677 | p->sleep_avg += sleep_time; |
678 | 678 | ||
679 | if (p->sleep_avg > NS_MAX_SLEEP_AVG) | 679 | if (p->sleep_avg > NS_MAX_SLEEP_AVG) |
680 | p->sleep_avg = NS_MAX_SLEEP_AVG; | 680 | p->sleep_avg = NS_MAX_SLEEP_AVG; |
681 | } | 681 | } |
682 | } | 682 | } |
683 | 683 | ||
684 | p->prio = effective_prio(p); | 684 | p->prio = effective_prio(p); |
685 | } | 685 | } |
686 | 686 | ||
687 | /* | 687 | /* |
688 | * activate_task - move a task to the runqueue and do priority recalculation | 688 | * activate_task - move a task to the runqueue and do priority recalculation |
689 | * | 689 | * |
690 | * Update all the scheduling statistics stuff. (sleep average | 690 | * Update all the scheduling statistics stuff. (sleep average |
691 | * calculation, priority modifiers, etc.) | 691 | * calculation, priority modifiers, etc.) |
692 | */ | 692 | */ |
693 | static void activate_task(task_t *p, runqueue_t *rq, int local) | 693 | static void activate_task(task_t *p, runqueue_t *rq, int local) |
694 | { | 694 | { |
695 | unsigned long long now; | 695 | unsigned long long now; |
696 | 696 | ||
697 | now = sched_clock(); | 697 | now = sched_clock(); |
698 | #ifdef CONFIG_SMP | 698 | #ifdef CONFIG_SMP |
699 | if (!local) { | 699 | if (!local) { |
700 | /* Compensate for drifting sched_clock */ | 700 | /* Compensate for drifting sched_clock */ |
701 | runqueue_t *this_rq = this_rq(); | 701 | runqueue_t *this_rq = this_rq(); |
702 | now = (now - this_rq->timestamp_last_tick) | 702 | now = (now - this_rq->timestamp_last_tick) |
703 | + rq->timestamp_last_tick; | 703 | + rq->timestamp_last_tick; |
704 | } | 704 | } |
705 | #endif | 705 | #endif |
706 | 706 | ||
707 | recalc_task_prio(p, now); | 707 | recalc_task_prio(p, now); |
708 | 708 | ||
709 | /* | 709 | /* |
710 | * This checks to make sure it's not an uninterruptible task | 710 | * This checks to make sure it's not an uninterruptible task |
711 | * that is now waking up. | 711 | * that is now waking up. |
712 | */ | 712 | */ |
713 | if (!p->activated) { | 713 | if (!p->activated) { |
714 | /* | 714 | /* |
715 | * Tasks which were woken up by interrupts (ie. hw events) | 715 | * Tasks which were woken up by interrupts (ie. hw events) |
716 | * are most likely of interactive nature. So we give them | 716 | * are most likely of interactive nature. So we give them |
717 | * the credit of extending their sleep time to the period | 717 | * the credit of extending their sleep time to the period |
718 | * of time they spend on the runqueue, waiting for execution | 718 | * of time they spend on the runqueue, waiting for execution |
719 | * on a CPU, first time around: | 719 | * on a CPU, first time around: |
720 | */ | 720 | */ |
721 | if (in_interrupt()) | 721 | if (in_interrupt()) |
722 | p->activated = 2; | 722 | p->activated = 2; |
723 | else { | 723 | else { |
724 | /* | 724 | /* |
725 | * Normal first-time wakeups get a credit too for | 725 | * Normal first-time wakeups get a credit too for |
726 | * on-runqueue time, but it will be weighted down: | 726 | * on-runqueue time, but it will be weighted down: |
727 | */ | 727 | */ |
728 | p->activated = 1; | 728 | p->activated = 1; |
729 | } | 729 | } |
730 | } | 730 | } |
731 | p->timestamp = now; | 731 | p->timestamp = now; |
732 | 732 | ||
733 | __activate_task(p, rq); | 733 | __activate_task(p, rq); |
734 | } | 734 | } |
735 | 735 | ||
736 | /* | 736 | /* |
737 | * deactivate_task - remove a task from the runqueue. | 737 | * deactivate_task - remove a task from the runqueue. |
738 | */ | 738 | */ |
739 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 739 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
740 | { | 740 | { |
741 | rq->nr_running--; | 741 | rq->nr_running--; |
742 | dequeue_task(p, p->array); | 742 | dequeue_task(p, p->array); |
743 | p->array = NULL; | 743 | p->array = NULL; |
744 | } | 744 | } |
745 | 745 | ||
746 | /* | 746 | /* |
747 | * resched_task - mark a task 'to be rescheduled now'. | 747 | * resched_task - mark a task 'to be rescheduled now'. |
748 | * | 748 | * |
749 | * On UP this means the setting of the need_resched flag, on SMP it | 749 | * On UP this means the setting of the need_resched flag, on SMP it |
750 | * might also involve a cross-CPU call to trigger the scheduler on | 750 | * might also involve a cross-CPU call to trigger the scheduler on |
751 | * the target CPU. | 751 | * the target CPU. |
752 | */ | 752 | */ |
753 | #ifdef CONFIG_SMP | 753 | #ifdef CONFIG_SMP |
754 | static void resched_task(task_t *p) | 754 | static void resched_task(task_t *p) |
755 | { | 755 | { |
756 | int need_resched, nrpolling; | 756 | int need_resched, nrpolling; |
757 | 757 | ||
758 | assert_spin_locked(&task_rq(p)->lock); | 758 | assert_spin_locked(&task_rq(p)->lock); |
759 | 759 | ||
760 | /* minimise the chance of sending an interrupt to poll_idle() */ | 760 | /* minimise the chance of sending an interrupt to poll_idle() */ |
761 | nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 761 | nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); |
762 | need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); | 762 | need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); |
763 | nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 763 | nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); |
764 | 764 | ||
765 | if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) | 765 | if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) |
766 | smp_send_reschedule(task_cpu(p)); | 766 | smp_send_reschedule(task_cpu(p)); |
767 | } | 767 | } |
768 | #else | 768 | #else |
769 | static inline void resched_task(task_t *p) | 769 | static inline void resched_task(task_t *p) |
770 | { | 770 | { |
771 | set_tsk_need_resched(p); | 771 | set_tsk_need_resched(p); |
772 | } | 772 | } |
773 | #endif | 773 | #endif |
774 | 774 | ||
775 | /** | 775 | /** |
776 | * task_curr - is this task currently executing on a CPU? | 776 | * task_curr - is this task currently executing on a CPU? |
777 | * @p: the task in question. | 777 | * @p: the task in question. |
778 | */ | 778 | */ |
779 | inline int task_curr(const task_t *p) | 779 | inline int task_curr(const task_t *p) |
780 | { | 780 | { |
781 | return cpu_curr(task_cpu(p)) == p; | 781 | return cpu_curr(task_cpu(p)) == p; |
782 | } | 782 | } |
783 | 783 | ||
784 | #ifdef CONFIG_SMP | 784 | #ifdef CONFIG_SMP |
785 | enum request_type { | 785 | enum request_type { |
786 | REQ_MOVE_TASK, | 786 | REQ_MOVE_TASK, |
787 | REQ_SET_DOMAIN, | 787 | REQ_SET_DOMAIN, |
788 | }; | 788 | }; |
789 | 789 | ||
790 | typedef struct { | 790 | typedef struct { |
791 | struct list_head list; | 791 | struct list_head list; |
792 | enum request_type type; | 792 | enum request_type type; |
793 | 793 | ||
794 | /* For REQ_MOVE_TASK */ | 794 | /* For REQ_MOVE_TASK */ |
795 | task_t *task; | 795 | task_t *task; |
796 | int dest_cpu; | 796 | int dest_cpu; |
797 | 797 | ||
798 | /* For REQ_SET_DOMAIN */ | 798 | /* For REQ_SET_DOMAIN */ |
799 | struct sched_domain *sd; | 799 | struct sched_domain *sd; |
800 | 800 | ||
801 | struct completion done; | 801 | struct completion done; |
802 | } migration_req_t; | 802 | } migration_req_t; |
803 | 803 | ||
804 | /* | 804 | /* |
805 | * The task's runqueue lock must be held. | 805 | * The task's runqueue lock must be held. |
806 | * Returns true if you have to wait for migration thread. | 806 | * Returns true if you have to wait for migration thread. |
807 | */ | 807 | */ |
808 | static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) | 808 | static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) |
809 | { | 809 | { |
810 | runqueue_t *rq = task_rq(p); | 810 | runqueue_t *rq = task_rq(p); |
811 | 811 | ||
812 | /* | 812 | /* |
813 | * If the task is not on a runqueue (and not running), then | 813 | * If the task is not on a runqueue (and not running), then |
814 | * it is sufficient to simply update the task's cpu field. | 814 | * it is sufficient to simply update the task's cpu field. |
815 | */ | 815 | */ |
816 | if (!p->array && !task_running(rq, p)) { | 816 | if (!p->array && !task_running(rq, p)) { |
817 | set_task_cpu(p, dest_cpu); | 817 | set_task_cpu(p, dest_cpu); |
818 | return 0; | 818 | return 0; |
819 | } | 819 | } |
820 | 820 | ||
821 | init_completion(&req->done); | 821 | init_completion(&req->done); |
822 | req->type = REQ_MOVE_TASK; | 822 | req->type = REQ_MOVE_TASK; |
823 | req->task = p; | 823 | req->task = p; |
824 | req->dest_cpu = dest_cpu; | 824 | req->dest_cpu = dest_cpu; |
825 | list_add(&req->list, &rq->migration_queue); | 825 | list_add(&req->list, &rq->migration_queue); |
826 | return 1; | 826 | return 1; |
827 | } | 827 | } |
828 | 828 | ||
829 | /* | 829 | /* |
830 | * wait_task_inactive - wait for a thread to unschedule. | 830 | * wait_task_inactive - wait for a thread to unschedule. |
831 | * | 831 | * |
832 | * The caller must ensure that the task *will* unschedule sometime soon, | 832 | * The caller must ensure that the task *will* unschedule sometime soon, |
833 | * else this function might spin for a *long* time. This function can't | 833 | * else this function might spin for a *long* time. This function can't |
834 | * be called with interrupts off, or it may introduce deadlock with | 834 | * be called with interrupts off, or it may introduce deadlock with |
835 | * smp_call_function() if an IPI is sent by the same process we are | 835 | * smp_call_function() if an IPI is sent by the same process we are |
836 | * waiting to become inactive. | 836 | * waiting to become inactive. |
837 | */ | 837 | */ |
838 | void wait_task_inactive(task_t * p) | 838 | void wait_task_inactive(task_t * p) |
839 | { | 839 | { |
840 | unsigned long flags; | 840 | unsigned long flags; |
841 | runqueue_t *rq; | 841 | runqueue_t *rq; |
842 | int preempted; | 842 | int preempted; |
843 | 843 | ||
844 | repeat: | 844 | repeat: |
845 | rq = task_rq_lock(p, &flags); | 845 | rq = task_rq_lock(p, &flags); |
846 | /* Must be off runqueue entirely, not preempted. */ | 846 | /* Must be off runqueue entirely, not preempted. */ |
847 | if (unlikely(p->array || task_running(rq, p))) { | 847 | if (unlikely(p->array || task_running(rq, p))) { |
848 | /* If it's preempted, we yield. It could be a while. */ | 848 | /* If it's preempted, we yield. It could be a while. */ |
849 | preempted = !task_running(rq, p); | 849 | preempted = !task_running(rq, p); |
850 | task_rq_unlock(rq, &flags); | 850 | task_rq_unlock(rq, &flags); |
851 | cpu_relax(); | 851 | cpu_relax(); |
852 | if (preempted) | 852 | if (preempted) |
853 | yield(); | 853 | yield(); |
854 | goto repeat; | 854 | goto repeat; |
855 | } | 855 | } |
856 | task_rq_unlock(rq, &flags); | 856 | task_rq_unlock(rq, &flags); |
857 | } | 857 | } |
858 | 858 | ||
859 | /*** | 859 | /*** |
860 | * kick_process - kick a running thread to enter/exit the kernel | 860 | * kick_process - kick a running thread to enter/exit the kernel |
861 | * @p: the to-be-kicked thread | 861 | * @p: the to-be-kicked thread |
862 | * | 862 | * |
863 | * Cause a process which is running on another CPU to enter | 863 | * Cause a process which is running on another CPU to enter |
864 | * kernel-mode, without any delay. (to get signals handled.) | 864 | * kernel-mode, without any delay. (to get signals handled.) |
865 | * | 865 | * |
866 | * NOTE: this function doesnt have to take the runqueue lock, | 866 | * NOTE: this function doesnt have to take the runqueue lock, |
867 | * because all it wants to ensure is that the remote task enters | 867 | * because all it wants to ensure is that the remote task enters |
868 | * the kernel. If the IPI races and the task has been migrated | 868 | * the kernel. If the IPI races and the task has been migrated |
869 | * to another CPU then no harm is done and the purpose has been | 869 | * to another CPU then no harm is done and the purpose has been |
870 | * achieved as well. | 870 | * achieved as well. |
871 | */ | 871 | */ |
872 | void kick_process(task_t *p) | 872 | void kick_process(task_t *p) |
873 | { | 873 | { |
874 | int cpu; | 874 | int cpu; |
875 | 875 | ||
876 | preempt_disable(); | 876 | preempt_disable(); |
877 | cpu = task_cpu(p); | 877 | cpu = task_cpu(p); |
878 | if ((cpu != smp_processor_id()) && task_curr(p)) | 878 | if ((cpu != smp_processor_id()) && task_curr(p)) |
879 | smp_send_reschedule(cpu); | 879 | smp_send_reschedule(cpu); |
880 | preempt_enable(); | 880 | preempt_enable(); |
881 | } | 881 | } |
882 | 882 | ||
883 | /* | 883 | /* |
884 | * Return a low guess at the load of a migration-source cpu. | 884 | * Return a low guess at the load of a migration-source cpu. |
885 | * | 885 | * |
886 | * We want to under-estimate the load of migration sources, to | 886 | * We want to under-estimate the load of migration sources, to |
887 | * balance conservatively. | 887 | * balance conservatively. |
888 | */ | 888 | */ |
889 | static inline unsigned long source_load(int cpu) | 889 | static inline unsigned long source_load(int cpu) |
890 | { | 890 | { |
891 | runqueue_t *rq = cpu_rq(cpu); | 891 | runqueue_t *rq = cpu_rq(cpu); |
892 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 892 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
893 | 893 | ||
894 | return min(rq->cpu_load, load_now); | 894 | return min(rq->cpu_load, load_now); |
895 | } | 895 | } |
896 | 896 | ||
897 | /* | 897 | /* |
898 | * Return a high guess at the load of a migration-target cpu | 898 | * Return a high guess at the load of a migration-target cpu |
899 | */ | 899 | */ |
900 | static inline unsigned long target_load(int cpu) | 900 | static inline unsigned long target_load(int cpu) |
901 | { | 901 | { |
902 | runqueue_t *rq = cpu_rq(cpu); | 902 | runqueue_t *rq = cpu_rq(cpu); |
903 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 903 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
904 | 904 | ||
905 | return max(rq->cpu_load, load_now); | 905 | return max(rq->cpu_load, load_now); |
906 | } | 906 | } |
907 | 907 | ||
908 | #endif | 908 | #endif |
909 | 909 | ||
910 | /* | 910 | /* |
911 | * wake_idle() will wake a task on an idle cpu if task->cpu is | 911 | * wake_idle() will wake a task on an idle cpu if task->cpu is |
912 | * not idle and an idle cpu is available. The span of cpus to | 912 | * not idle and an idle cpu is available. The span of cpus to |
913 | * search starts with cpus closest then further out as needed, | 913 | * search starts with cpus closest then further out as needed, |
914 | * so we always favor a closer, idle cpu. | 914 | * so we always favor a closer, idle cpu. |
915 | * | 915 | * |
916 | * Returns the CPU we should wake onto. | 916 | * Returns the CPU we should wake onto. |
917 | */ | 917 | */ |
918 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | 918 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) |
919 | static int wake_idle(int cpu, task_t *p) | 919 | static int wake_idle(int cpu, task_t *p) |
920 | { | 920 | { |
921 | cpumask_t tmp; | 921 | cpumask_t tmp; |
922 | struct sched_domain *sd; | 922 | struct sched_domain *sd; |
923 | int i; | 923 | int i; |
924 | 924 | ||
925 | if (idle_cpu(cpu)) | 925 | if (idle_cpu(cpu)) |
926 | return cpu; | 926 | return cpu; |
927 | 927 | ||
928 | for_each_domain(cpu, sd) { | 928 | for_each_domain(cpu, sd) { |
929 | if (sd->flags & SD_WAKE_IDLE) { | 929 | if (sd->flags & SD_WAKE_IDLE) { |
930 | cpus_and(tmp, sd->span, cpu_online_map); | 930 | cpus_and(tmp, sd->span, cpu_online_map); |
931 | cpus_and(tmp, tmp, p->cpus_allowed); | 931 | cpus_and(tmp, tmp, p->cpus_allowed); |
932 | for_each_cpu_mask(i, tmp) { | 932 | for_each_cpu_mask(i, tmp) { |
933 | if (idle_cpu(i)) | 933 | if (idle_cpu(i)) |
934 | return i; | 934 | return i; |
935 | } | 935 | } |
936 | } | 936 | } |
937 | else break; | 937 | else break; |
938 | } | 938 | } |
939 | return cpu; | 939 | return cpu; |
940 | } | 940 | } |
941 | #else | 941 | #else |
942 | static inline int wake_idle(int cpu, task_t *p) | 942 | static inline int wake_idle(int cpu, task_t *p) |
943 | { | 943 | { |
944 | return cpu; | 944 | return cpu; |
945 | } | 945 | } |
946 | #endif | 946 | #endif |
947 | 947 | ||
948 | /*** | 948 | /*** |
949 | * try_to_wake_up - wake up a thread | 949 | * try_to_wake_up - wake up a thread |
950 | * @p: the to-be-woken-up thread | 950 | * @p: the to-be-woken-up thread |
951 | * @state: the mask of task states that can be woken | 951 | * @state: the mask of task states that can be woken |
952 | * @sync: do a synchronous wakeup? | 952 | * @sync: do a synchronous wakeup? |
953 | * | 953 | * |
954 | * Put it on the run-queue if it's not already there. The "current" | 954 | * Put it on the run-queue if it's not already there. The "current" |
955 | * thread is always on the run-queue (except when the actual | 955 | * thread is always on the run-queue (except when the actual |
956 | * re-schedule is in progress), and as such you're allowed to do | 956 | * re-schedule is in progress), and as such you're allowed to do |
957 | * the simpler "current->state = TASK_RUNNING" to mark yourself | 957 | * the simpler "current->state = TASK_RUNNING" to mark yourself |
958 | * runnable without the overhead of this. | 958 | * runnable without the overhead of this. |
959 | * | 959 | * |
960 | * returns failure only if the task is already active. | 960 | * returns failure only if the task is already active. |
961 | */ | 961 | */ |
962 | static int try_to_wake_up(task_t * p, unsigned int state, int sync) | 962 | static int try_to_wake_up(task_t * p, unsigned int state, int sync) |
963 | { | 963 | { |
964 | int cpu, this_cpu, success = 0; | 964 | int cpu, this_cpu, success = 0; |
965 | unsigned long flags; | 965 | unsigned long flags; |
966 | long old_state; | 966 | long old_state; |
967 | runqueue_t *rq; | 967 | runqueue_t *rq; |
968 | #ifdef CONFIG_SMP | 968 | #ifdef CONFIG_SMP |
969 | unsigned long load, this_load; | 969 | unsigned long load, this_load; |
970 | struct sched_domain *sd; | 970 | struct sched_domain *sd; |
971 | int new_cpu; | 971 | int new_cpu; |
972 | #endif | 972 | #endif |
973 | 973 | ||
974 | rq = task_rq_lock(p, &flags); | 974 | rq = task_rq_lock(p, &flags); |
975 | old_state = p->state; | 975 | old_state = p->state; |
976 | if (!(old_state & state)) | 976 | if (!(old_state & state)) |
977 | goto out; | 977 | goto out; |
978 | 978 | ||
979 | if (p->array) | 979 | if (p->array) |
980 | goto out_running; | 980 | goto out_running; |
981 | 981 | ||
982 | cpu = task_cpu(p); | 982 | cpu = task_cpu(p); |
983 | this_cpu = smp_processor_id(); | 983 | this_cpu = smp_processor_id(); |
984 | 984 | ||
985 | #ifdef CONFIG_SMP | 985 | #ifdef CONFIG_SMP |
986 | if (unlikely(task_running(rq, p))) | 986 | if (unlikely(task_running(rq, p))) |
987 | goto out_activate; | 987 | goto out_activate; |
988 | 988 | ||
989 | #ifdef CONFIG_SCHEDSTATS | 989 | #ifdef CONFIG_SCHEDSTATS |
990 | schedstat_inc(rq, ttwu_cnt); | 990 | schedstat_inc(rq, ttwu_cnt); |
991 | if (cpu == this_cpu) { | 991 | if (cpu == this_cpu) { |
992 | schedstat_inc(rq, ttwu_local); | 992 | schedstat_inc(rq, ttwu_local); |
993 | } else { | 993 | } else { |
994 | for_each_domain(this_cpu, sd) { | 994 | for_each_domain(this_cpu, sd) { |
995 | if (cpu_isset(cpu, sd->span)) { | 995 | if (cpu_isset(cpu, sd->span)) { |
996 | schedstat_inc(sd, ttwu_wake_remote); | 996 | schedstat_inc(sd, ttwu_wake_remote); |
997 | break; | 997 | break; |
998 | } | 998 | } |
999 | } | 999 | } |
1000 | } | 1000 | } |
1001 | #endif | 1001 | #endif |
1002 | 1002 | ||
1003 | new_cpu = cpu; | 1003 | new_cpu = cpu; |
1004 | if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1004 | if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1005 | goto out_set_cpu; | 1005 | goto out_set_cpu; |
1006 | 1006 | ||
1007 | load = source_load(cpu); | 1007 | load = source_load(cpu); |
1008 | this_load = target_load(this_cpu); | 1008 | this_load = target_load(this_cpu); |
1009 | 1009 | ||
1010 | /* | 1010 | /* |
1011 | * If sync wakeup then subtract the (maximum possible) effect of | 1011 | * If sync wakeup then subtract the (maximum possible) effect of |
1012 | * the currently running task from the load of the current CPU: | 1012 | * the currently running task from the load of the current CPU: |
1013 | */ | 1013 | */ |
1014 | if (sync) | 1014 | if (sync) |
1015 | this_load -= SCHED_LOAD_SCALE; | 1015 | this_load -= SCHED_LOAD_SCALE; |
1016 | 1016 | ||
1017 | /* Don't pull the task off an idle CPU to a busy one */ | 1017 | /* Don't pull the task off an idle CPU to a busy one */ |
1018 | if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) | 1018 | if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) |
1019 | goto out_set_cpu; | 1019 | goto out_set_cpu; |
1020 | 1020 | ||
1021 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | 1021 | new_cpu = this_cpu; /* Wake to this CPU if we can */ |
1022 | 1022 | ||
1023 | /* | 1023 | /* |
1024 | * Scan domains for affine wakeup and passive balancing | 1024 | * Scan domains for affine wakeup and passive balancing |
1025 | * possibilities. | 1025 | * possibilities. |
1026 | */ | 1026 | */ |
1027 | for_each_domain(this_cpu, sd) { | 1027 | for_each_domain(this_cpu, sd) { |
1028 | unsigned int imbalance; | 1028 | unsigned int imbalance; |
1029 | /* | 1029 | /* |
1030 | * Start passive balancing when half the imbalance_pct | 1030 | * Start passive balancing when half the imbalance_pct |
1031 | * limit is reached. | 1031 | * limit is reached. |
1032 | */ | 1032 | */ |
1033 | imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2; | 1033 | imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2; |
1034 | 1034 | ||
1035 | if ((sd->flags & SD_WAKE_AFFINE) && | 1035 | if ((sd->flags & SD_WAKE_AFFINE) && |
1036 | !task_hot(p, rq->timestamp_last_tick, sd)) { | 1036 | !task_hot(p, rq->timestamp_last_tick, sd)) { |
1037 | /* | 1037 | /* |
1038 | * This domain has SD_WAKE_AFFINE and p is cache cold | 1038 | * This domain has SD_WAKE_AFFINE and p is cache cold |
1039 | * in this domain. | 1039 | * in this domain. |
1040 | */ | 1040 | */ |
1041 | if (cpu_isset(cpu, sd->span)) { | 1041 | if (cpu_isset(cpu, sd->span)) { |
1042 | schedstat_inc(sd, ttwu_move_affine); | 1042 | schedstat_inc(sd, ttwu_move_affine); |
1043 | goto out_set_cpu; | 1043 | goto out_set_cpu; |
1044 | } | 1044 | } |
1045 | } else if ((sd->flags & SD_WAKE_BALANCE) && | 1045 | } else if ((sd->flags & SD_WAKE_BALANCE) && |
1046 | imbalance*this_load <= 100*load) { | 1046 | imbalance*this_load <= 100*load) { |
1047 | /* | 1047 | /* |
1048 | * This domain has SD_WAKE_BALANCE and there is | 1048 | * This domain has SD_WAKE_BALANCE and there is |
1049 | * an imbalance. | 1049 | * an imbalance. |
1050 | */ | 1050 | */ |
1051 | if (cpu_isset(cpu, sd->span)) { | 1051 | if (cpu_isset(cpu, sd->span)) { |
1052 | schedstat_inc(sd, ttwu_move_balance); | 1052 | schedstat_inc(sd, ttwu_move_balance); |
1053 | goto out_set_cpu; | 1053 | goto out_set_cpu; |
1054 | } | 1054 | } |
1055 | } | 1055 | } |
1056 | } | 1056 | } |
1057 | 1057 | ||
1058 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ | 1058 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ |
1059 | out_set_cpu: | 1059 | out_set_cpu: |
1060 | new_cpu = wake_idle(new_cpu, p); | 1060 | new_cpu = wake_idle(new_cpu, p); |
1061 | if (new_cpu != cpu) { | 1061 | if (new_cpu != cpu) { |
1062 | set_task_cpu(p, new_cpu); | 1062 | set_task_cpu(p, new_cpu); |
1063 | task_rq_unlock(rq, &flags); | 1063 | task_rq_unlock(rq, &flags); |
1064 | /* might preempt at this point */ | 1064 | /* might preempt at this point */ |
1065 | rq = task_rq_lock(p, &flags); | 1065 | rq = task_rq_lock(p, &flags); |
1066 | old_state = p->state; | 1066 | old_state = p->state; |
1067 | if (!(old_state & state)) | 1067 | if (!(old_state & state)) |
1068 | goto out; | 1068 | goto out; |
1069 | if (p->array) | 1069 | if (p->array) |
1070 | goto out_running; | 1070 | goto out_running; |
1071 | 1071 | ||
1072 | this_cpu = smp_processor_id(); | 1072 | this_cpu = smp_processor_id(); |
1073 | cpu = task_cpu(p); | 1073 | cpu = task_cpu(p); |
1074 | } | 1074 | } |
1075 | 1075 | ||
1076 | out_activate: | 1076 | out_activate: |
1077 | #endif /* CONFIG_SMP */ | 1077 | #endif /* CONFIG_SMP */ |
1078 | if (old_state == TASK_UNINTERRUPTIBLE) { | 1078 | if (old_state == TASK_UNINTERRUPTIBLE) { |
1079 | rq->nr_uninterruptible--; | 1079 | rq->nr_uninterruptible--; |
1080 | /* | 1080 | /* |
1081 | * Tasks on involuntary sleep don't earn | 1081 | * Tasks on involuntary sleep don't earn |
1082 | * sleep_avg beyond just interactive state. | 1082 | * sleep_avg beyond just interactive state. |
1083 | */ | 1083 | */ |
1084 | p->activated = -1; | 1084 | p->activated = -1; |
1085 | } | 1085 | } |
1086 | 1086 | ||
1087 | /* | 1087 | /* |
1088 | * Sync wakeups (i.e. those types of wakeups where the waker | 1088 | * Sync wakeups (i.e. those types of wakeups where the waker |
1089 | * has indicated that it will leave the CPU in short order) | 1089 | * has indicated that it will leave the CPU in short order) |
1090 | * don't trigger a preemption, if the woken up task will run on | 1090 | * don't trigger a preemption, if the woken up task will run on |
1091 | * this cpu. (in this case the 'I will reschedule' promise of | 1091 | * this cpu. (in this case the 'I will reschedule' promise of |
1092 | * the waker guarantees that the freshly woken up task is going | 1092 | * the waker guarantees that the freshly woken up task is going |
1093 | * to be considered on this CPU.) | 1093 | * to be considered on this CPU.) |
1094 | */ | 1094 | */ |
1095 | activate_task(p, rq, cpu == this_cpu); | 1095 | activate_task(p, rq, cpu == this_cpu); |
1096 | if (!sync || cpu != this_cpu) { | 1096 | if (!sync || cpu != this_cpu) { |
1097 | if (TASK_PREEMPTS_CURR(p, rq)) | 1097 | if (TASK_PREEMPTS_CURR(p, rq)) |
1098 | resched_task(rq->curr); | 1098 | resched_task(rq->curr); |
1099 | } | 1099 | } |
1100 | success = 1; | 1100 | success = 1; |
1101 | 1101 | ||
1102 | out_running: | 1102 | out_running: |
1103 | p->state = TASK_RUNNING; | 1103 | p->state = TASK_RUNNING; |
1104 | out: | 1104 | out: |
1105 | task_rq_unlock(rq, &flags); | 1105 | task_rq_unlock(rq, &flags); |
1106 | 1106 | ||
1107 | return success; | 1107 | return success; |
1108 | } | 1108 | } |
1109 | 1109 | ||
1110 | int fastcall wake_up_process(task_t * p) | 1110 | int fastcall wake_up_process(task_t * p) |
1111 | { | 1111 | { |
1112 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | | 1112 | return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | |
1113 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); | 1113 | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | EXPORT_SYMBOL(wake_up_process); | 1116 | EXPORT_SYMBOL(wake_up_process); |
1117 | 1117 | ||
1118 | int fastcall wake_up_state(task_t *p, unsigned int state) | 1118 | int fastcall wake_up_state(task_t *p, unsigned int state) |
1119 | { | 1119 | { |
1120 | return try_to_wake_up(p, state, 0); | 1120 | return try_to_wake_up(p, state, 0); |
1121 | } | 1121 | } |
1122 | 1122 | ||
1123 | #ifdef CONFIG_SMP | 1123 | #ifdef CONFIG_SMP |
1124 | static int find_idlest_cpu(struct task_struct *p, int this_cpu, | 1124 | static int find_idlest_cpu(struct task_struct *p, int this_cpu, |
1125 | struct sched_domain *sd); | 1125 | struct sched_domain *sd); |
1126 | #endif | 1126 | #endif |
1127 | 1127 | ||
1128 | /* | 1128 | /* |
1129 | * Perform scheduler related setup for a newly forked process p. | 1129 | * Perform scheduler related setup for a newly forked process p. |
1130 | * p is forked by current. | 1130 | * p is forked by current. |
1131 | */ | 1131 | */ |
1132 | void fastcall sched_fork(task_t *p) | 1132 | void fastcall sched_fork(task_t *p) |
1133 | { | 1133 | { |
1134 | /* | 1134 | /* |
1135 | * We mark the process as running here, but have not actually | 1135 | * We mark the process as running here, but have not actually |
1136 | * inserted it onto the runqueue yet. This guarantees that | 1136 | * inserted it onto the runqueue yet. This guarantees that |
1137 | * nobody will actually run it, and a signal or other external | 1137 | * nobody will actually run it, and a signal or other external |
1138 | * event cannot wake it up and insert it on the runqueue either. | 1138 | * event cannot wake it up and insert it on the runqueue either. |
1139 | */ | 1139 | */ |
1140 | p->state = TASK_RUNNING; | 1140 | p->state = TASK_RUNNING; |
1141 | INIT_LIST_HEAD(&p->run_list); | 1141 | INIT_LIST_HEAD(&p->run_list); |
1142 | p->array = NULL; | 1142 | p->array = NULL; |
1143 | spin_lock_init(&p->switch_lock); | 1143 | spin_lock_init(&p->switch_lock); |
1144 | #ifdef CONFIG_SCHEDSTATS | 1144 | #ifdef CONFIG_SCHEDSTATS |
1145 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1145 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
1146 | #endif | 1146 | #endif |
1147 | #ifdef CONFIG_PREEMPT | 1147 | #ifdef CONFIG_PREEMPT |
1148 | /* | 1148 | /* |
1149 | * During context-switch we hold precisely one spinlock, which | 1149 | * During context-switch we hold precisely one spinlock, which |
1150 | * schedule_tail drops. (in the common case it's this_rq()->lock, | 1150 | * schedule_tail drops. (in the common case it's this_rq()->lock, |
1151 | * but it also can be p->switch_lock.) So we compensate with a count | 1151 | * but it also can be p->switch_lock.) So we compensate with a count |
1152 | * of 1. Also, we want to start with kernel preemption disabled. | 1152 | * of 1. Also, we want to start with kernel preemption disabled. |
1153 | */ | 1153 | */ |
1154 | p->thread_info->preempt_count = 1; | 1154 | p->thread_info->preempt_count = 1; |
1155 | #endif | 1155 | #endif |
1156 | /* | 1156 | /* |
1157 | * Share the timeslice between parent and child, thus the | 1157 | * Share the timeslice between parent and child, thus the |
1158 | * total amount of pending timeslices in the system doesn't change, | 1158 | * total amount of pending timeslices in the system doesn't change, |
1159 | * resulting in more scheduling fairness. | 1159 | * resulting in more scheduling fairness. |
1160 | */ | 1160 | */ |
1161 | local_irq_disable(); | 1161 | local_irq_disable(); |
1162 | p->time_slice = (current->time_slice + 1) >> 1; | 1162 | p->time_slice = (current->time_slice + 1) >> 1; |
1163 | /* | 1163 | /* |
1164 | * The remainder of the first timeslice might be recovered by | 1164 | * The remainder of the first timeslice might be recovered by |
1165 | * the parent if the child exits early enough. | 1165 | * the parent if the child exits early enough. |
1166 | */ | 1166 | */ |
1167 | p->first_time_slice = 1; | 1167 | p->first_time_slice = 1; |
1168 | current->time_slice >>= 1; | 1168 | current->time_slice >>= 1; |
1169 | p->timestamp = sched_clock(); | 1169 | p->timestamp = sched_clock(); |
1170 | if (unlikely(!current->time_slice)) { | 1170 | if (unlikely(!current->time_slice)) { |
1171 | /* | 1171 | /* |
1172 | * This case is rare, it happens when the parent has only | 1172 | * This case is rare, it happens when the parent has only |
1173 | * a single jiffy left from its timeslice. Taking the | 1173 | * a single jiffy left from its timeslice. Taking the |
1174 | * runqueue lock is not a problem. | 1174 | * runqueue lock is not a problem. |
1175 | */ | 1175 | */ |
1176 | current->time_slice = 1; | 1176 | current->time_slice = 1; |
1177 | preempt_disable(); | 1177 | preempt_disable(); |
1178 | scheduler_tick(); | 1178 | scheduler_tick(); |
1179 | local_irq_enable(); | 1179 | local_irq_enable(); |
1180 | preempt_enable(); | 1180 | preempt_enable(); |
1181 | } else | 1181 | } else |
1182 | local_irq_enable(); | 1182 | local_irq_enable(); |
1183 | } | 1183 | } |
1184 | 1184 | ||
1185 | /* | 1185 | /* |
1186 | * wake_up_new_task - wake up a newly created task for the first time. | 1186 | * wake_up_new_task - wake up a newly created task for the first time. |
1187 | * | 1187 | * |
1188 | * This function will do some initial scheduler statistics housekeeping | 1188 | * This function will do some initial scheduler statistics housekeeping |
1189 | * that must be done for every newly created context, then puts the task | 1189 | * that must be done for every newly created context, then puts the task |
1190 | * on the runqueue and wakes it. | 1190 | * on the runqueue and wakes it. |
1191 | */ | 1191 | */ |
1192 | void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) | 1192 | void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) |
1193 | { | 1193 | { |
1194 | unsigned long flags; | 1194 | unsigned long flags; |
1195 | int this_cpu, cpu; | 1195 | int this_cpu, cpu; |
1196 | runqueue_t *rq, *this_rq; | 1196 | runqueue_t *rq, *this_rq; |
1197 | 1197 | ||
1198 | rq = task_rq_lock(p, &flags); | 1198 | rq = task_rq_lock(p, &flags); |
1199 | cpu = task_cpu(p); | 1199 | cpu = task_cpu(p); |
1200 | this_cpu = smp_processor_id(); | 1200 | this_cpu = smp_processor_id(); |
1201 | 1201 | ||
1202 | BUG_ON(p->state != TASK_RUNNING); | 1202 | BUG_ON(p->state != TASK_RUNNING); |
1203 | 1203 | ||
1204 | /* | 1204 | /* |
1205 | * We decrease the sleep average of forking parents | 1205 | * We decrease the sleep average of forking parents |
1206 | * and children as well, to keep max-interactive tasks | 1206 | * and children as well, to keep max-interactive tasks |
1207 | * from forking tasks that are max-interactive. The parent | 1207 | * from forking tasks that are max-interactive. The parent |
1208 | * (current) is done further down, under its lock. | 1208 | * (current) is done further down, under its lock. |
1209 | */ | 1209 | */ |
1210 | p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) * | 1210 | p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) * |
1211 | CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); | 1211 | CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); |
1212 | 1212 | ||
1213 | p->prio = effective_prio(p); | 1213 | p->prio = effective_prio(p); |
1214 | 1214 | ||
1215 | if (likely(cpu == this_cpu)) { | 1215 | if (likely(cpu == this_cpu)) { |
1216 | if (!(clone_flags & CLONE_VM)) { | 1216 | if (!(clone_flags & CLONE_VM)) { |
1217 | /* | 1217 | /* |
1218 | * The VM isn't cloned, so we're in a good position to | 1218 | * The VM isn't cloned, so we're in a good position to |
1219 | * do child-runs-first in anticipation of an exec. This | 1219 | * do child-runs-first in anticipation of an exec. This |
1220 | * usually avoids a lot of COW overhead. | 1220 | * usually avoids a lot of COW overhead. |
1221 | */ | 1221 | */ |
1222 | if (unlikely(!current->array)) | 1222 | if (unlikely(!current->array)) |
1223 | __activate_task(p, rq); | 1223 | __activate_task(p, rq); |
1224 | else { | 1224 | else { |
1225 | p->prio = current->prio; | 1225 | p->prio = current->prio; |
1226 | list_add_tail(&p->run_list, ¤t->run_list); | 1226 | list_add_tail(&p->run_list, ¤t->run_list); |
1227 | p->array = current->array; | 1227 | p->array = current->array; |
1228 | p->array->nr_active++; | 1228 | p->array->nr_active++; |
1229 | rq->nr_running++; | 1229 | rq->nr_running++; |
1230 | } | 1230 | } |
1231 | set_need_resched(); | 1231 | set_need_resched(); |
1232 | } else | 1232 | } else |
1233 | /* Run child last */ | 1233 | /* Run child last */ |
1234 | __activate_task(p, rq); | 1234 | __activate_task(p, rq); |
1235 | /* | 1235 | /* |
1236 | * We skip the following code due to cpu == this_cpu | 1236 | * We skip the following code due to cpu == this_cpu |
1237 | * | 1237 | * |
1238 | * task_rq_unlock(rq, &flags); | 1238 | * task_rq_unlock(rq, &flags); |
1239 | * this_rq = task_rq_lock(current, &flags); | 1239 | * this_rq = task_rq_lock(current, &flags); |
1240 | */ | 1240 | */ |
1241 | this_rq = rq; | 1241 | this_rq = rq; |
1242 | } else { | 1242 | } else { |
1243 | this_rq = cpu_rq(this_cpu); | 1243 | this_rq = cpu_rq(this_cpu); |
1244 | 1244 | ||
1245 | /* | 1245 | /* |
1246 | * Not the local CPU - must adjust timestamp. This should | 1246 | * Not the local CPU - must adjust timestamp. This should |
1247 | * get optimised away in the !CONFIG_SMP case. | 1247 | * get optimised away in the !CONFIG_SMP case. |
1248 | */ | 1248 | */ |
1249 | p->timestamp = (p->timestamp - this_rq->timestamp_last_tick) | 1249 | p->timestamp = (p->timestamp - this_rq->timestamp_last_tick) |
1250 | + rq->timestamp_last_tick; | 1250 | + rq->timestamp_last_tick; |
1251 | __activate_task(p, rq); | 1251 | __activate_task(p, rq); |
1252 | if (TASK_PREEMPTS_CURR(p, rq)) | 1252 | if (TASK_PREEMPTS_CURR(p, rq)) |
1253 | resched_task(rq->curr); | 1253 | resched_task(rq->curr); |
1254 | 1254 | ||
1255 | /* | 1255 | /* |
1256 | * Parent and child are on different CPUs, now get the | 1256 | * Parent and child are on different CPUs, now get the |
1257 | * parent runqueue to update the parent's ->sleep_avg: | 1257 | * parent runqueue to update the parent's ->sleep_avg: |
1258 | */ | 1258 | */ |
1259 | task_rq_unlock(rq, &flags); | 1259 | task_rq_unlock(rq, &flags); |
1260 | this_rq = task_rq_lock(current, &flags); | 1260 | this_rq = task_rq_lock(current, &flags); |
1261 | } | 1261 | } |
1262 | current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) * | 1262 | current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) * |
1263 | PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); | 1263 | PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); |
1264 | task_rq_unlock(this_rq, &flags); | 1264 | task_rq_unlock(this_rq, &flags); |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | /* | 1267 | /* |
1268 | * Potentially available exiting-child timeslices are | 1268 | * Potentially available exiting-child timeslices are |
1269 | * retrieved here - this way the parent does not get | 1269 | * retrieved here - this way the parent does not get |
1270 | * penalized for creating too many threads. | 1270 | * penalized for creating too many threads. |
1271 | * | 1271 | * |
1272 | * (this cannot be used to 'generate' timeslices | 1272 | * (this cannot be used to 'generate' timeslices |
1273 | * artificially, because any timeslice recovered here | 1273 | * artificially, because any timeslice recovered here |
1274 | * was given away by the parent in the first place.) | 1274 | * was given away by the parent in the first place.) |
1275 | */ | 1275 | */ |
1276 | void fastcall sched_exit(task_t * p) | 1276 | void fastcall sched_exit(task_t * p) |
1277 | { | 1277 | { |
1278 | unsigned long flags; | 1278 | unsigned long flags; |
1279 | runqueue_t *rq; | 1279 | runqueue_t *rq; |
1280 | 1280 | ||
1281 | /* | 1281 | /* |
1282 | * If the child was a (relative-) CPU hog then decrease | 1282 | * If the child was a (relative-) CPU hog then decrease |
1283 | * the sleep_avg of the parent as well. | 1283 | * the sleep_avg of the parent as well. |
1284 | */ | 1284 | */ |
1285 | rq = task_rq_lock(p->parent, &flags); | 1285 | rq = task_rq_lock(p->parent, &flags); |
1286 | if (p->first_time_slice) { | 1286 | if (p->first_time_slice) { |
1287 | p->parent->time_slice += p->time_slice; | 1287 | p->parent->time_slice += p->time_slice; |
1288 | if (unlikely(p->parent->time_slice > task_timeslice(p))) | 1288 | if (unlikely(p->parent->time_slice > task_timeslice(p))) |
1289 | p->parent->time_slice = task_timeslice(p); | 1289 | p->parent->time_slice = task_timeslice(p); |
1290 | } | 1290 | } |
1291 | if (p->sleep_avg < p->parent->sleep_avg) | 1291 | if (p->sleep_avg < p->parent->sleep_avg) |
1292 | p->parent->sleep_avg = p->parent->sleep_avg / | 1292 | p->parent->sleep_avg = p->parent->sleep_avg / |
1293 | (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg / | 1293 | (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg / |
1294 | (EXIT_WEIGHT + 1); | 1294 | (EXIT_WEIGHT + 1); |
1295 | task_rq_unlock(rq, &flags); | 1295 | task_rq_unlock(rq, &flags); |
1296 | } | 1296 | } |
1297 | 1297 | ||
1298 | /** | 1298 | /** |
1299 | * finish_task_switch - clean up after a task-switch | 1299 | * finish_task_switch - clean up after a task-switch |
1300 | * @prev: the thread we just switched away from. | 1300 | * @prev: the thread we just switched away from. |
1301 | * | 1301 | * |
1302 | * We enter this with the runqueue still locked, and finish_arch_switch() | 1302 | * We enter this with the runqueue still locked, and finish_arch_switch() |
1303 | * will unlock it along with doing any other architecture-specific cleanup | 1303 | * will unlock it along with doing any other architecture-specific cleanup |
1304 | * actions. | 1304 | * actions. |
1305 | * | 1305 | * |
1306 | * Note that we may have delayed dropping an mm in context_switch(). If | 1306 | * Note that we may have delayed dropping an mm in context_switch(). If |
1307 | * so, we finish that here outside of the runqueue lock. (Doing it | 1307 | * so, we finish that here outside of the runqueue lock. (Doing it |
1308 | * with the lock held can cause deadlocks; see schedule() for | 1308 | * with the lock held can cause deadlocks; see schedule() for |
1309 | * details.) | 1309 | * details.) |
1310 | */ | 1310 | */ |
1311 | static inline void finish_task_switch(task_t *prev) | 1311 | static inline void finish_task_switch(task_t *prev) |
1312 | __releases(rq->lock) | 1312 | __releases(rq->lock) |
1313 | { | 1313 | { |
1314 | runqueue_t *rq = this_rq(); | 1314 | runqueue_t *rq = this_rq(); |
1315 | struct mm_struct *mm = rq->prev_mm; | 1315 | struct mm_struct *mm = rq->prev_mm; |
1316 | unsigned long prev_task_flags; | 1316 | unsigned long prev_task_flags; |
1317 | 1317 | ||
1318 | rq->prev_mm = NULL; | 1318 | rq->prev_mm = NULL; |
1319 | 1319 | ||
1320 | /* | 1320 | /* |
1321 | * A task struct has one reference for the use as "current". | 1321 | * A task struct has one reference for the use as "current". |
1322 | * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and | 1322 | * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and |
1323 | * calls schedule one last time. The schedule call will never return, | 1323 | * calls schedule one last time. The schedule call will never return, |
1324 | * and the scheduled task must drop that reference. | 1324 | * and the scheduled task must drop that reference. |
1325 | * The test for EXIT_ZOMBIE must occur while the runqueue locks are | 1325 | * The test for EXIT_ZOMBIE must occur while the runqueue locks are |
1326 | * still held, otherwise prev could be scheduled on another cpu, die | 1326 | * still held, otherwise prev could be scheduled on another cpu, die |
1327 | * there before we look at prev->state, and then the reference would | 1327 | * there before we look at prev->state, and then the reference would |
1328 | * be dropped twice. | 1328 | * be dropped twice. |
1329 | * Manfred Spraul <manfred@colorfullife.com> | 1329 | * Manfred Spraul <manfred@colorfullife.com> |
1330 | */ | 1330 | */ |
1331 | prev_task_flags = prev->flags; | 1331 | prev_task_flags = prev->flags; |
1332 | finish_arch_switch(rq, prev); | 1332 | finish_arch_switch(rq, prev); |
1333 | if (mm) | 1333 | if (mm) |
1334 | mmdrop(mm); | 1334 | mmdrop(mm); |
1335 | if (unlikely(prev_task_flags & PF_DEAD)) | 1335 | if (unlikely(prev_task_flags & PF_DEAD)) |
1336 | put_task_struct(prev); | 1336 | put_task_struct(prev); |
1337 | } | 1337 | } |
1338 | 1338 | ||
1339 | /** | 1339 | /** |
1340 | * schedule_tail - first thing a freshly forked thread must call. | 1340 | * schedule_tail - first thing a freshly forked thread must call. |
1341 | * @prev: the thread we just switched away from. | 1341 | * @prev: the thread we just switched away from. |
1342 | */ | 1342 | */ |
1343 | asmlinkage void schedule_tail(task_t *prev) | 1343 | asmlinkage void schedule_tail(task_t *prev) |
1344 | __releases(rq->lock) | 1344 | __releases(rq->lock) |
1345 | { | 1345 | { |
1346 | finish_task_switch(prev); | 1346 | finish_task_switch(prev); |
1347 | 1347 | ||
1348 | if (current->set_child_tid) | 1348 | if (current->set_child_tid) |
1349 | put_user(current->pid, current->set_child_tid); | 1349 | put_user(current->pid, current->set_child_tid); |
1350 | } | 1350 | } |
1351 | 1351 | ||
1352 | /* | 1352 | /* |
1353 | * context_switch - switch to the new MM and the new | 1353 | * context_switch - switch to the new MM and the new |
1354 | * thread's register state. | 1354 | * thread's register state. |
1355 | */ | 1355 | */ |
1356 | static inline | 1356 | static inline |
1357 | task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) | 1357 | task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) |
1358 | { | 1358 | { |
1359 | struct mm_struct *mm = next->mm; | 1359 | struct mm_struct *mm = next->mm; |
1360 | struct mm_struct *oldmm = prev->active_mm; | 1360 | struct mm_struct *oldmm = prev->active_mm; |
1361 | 1361 | ||
1362 | if (unlikely(!mm)) { | 1362 | if (unlikely(!mm)) { |
1363 | next->active_mm = oldmm; | 1363 | next->active_mm = oldmm; |
1364 | atomic_inc(&oldmm->mm_count); | 1364 | atomic_inc(&oldmm->mm_count); |
1365 | enter_lazy_tlb(oldmm, next); | 1365 | enter_lazy_tlb(oldmm, next); |
1366 | } else | 1366 | } else |
1367 | switch_mm(oldmm, mm, next); | 1367 | switch_mm(oldmm, mm, next); |
1368 | 1368 | ||
1369 | if (unlikely(!prev->mm)) { | 1369 | if (unlikely(!prev->mm)) { |
1370 | prev->active_mm = NULL; | 1370 | prev->active_mm = NULL; |
1371 | WARN_ON(rq->prev_mm); | 1371 | WARN_ON(rq->prev_mm); |
1372 | rq->prev_mm = oldmm; | 1372 | rq->prev_mm = oldmm; |
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | /* Here we just switch the register state and the stack. */ | 1375 | /* Here we just switch the register state and the stack. */ |
1376 | switch_to(prev, next, prev); | 1376 | switch_to(prev, next, prev); |
1377 | 1377 | ||
1378 | return prev; | 1378 | return prev; |
1379 | } | 1379 | } |
1380 | 1380 | ||
1381 | /* | 1381 | /* |
1382 | * nr_running, nr_uninterruptible and nr_context_switches: | 1382 | * nr_running, nr_uninterruptible and nr_context_switches: |
1383 | * | 1383 | * |
1384 | * externally visible scheduler statistics: current number of runnable | 1384 | * externally visible scheduler statistics: current number of runnable |
1385 | * threads, current number of uninterruptible-sleeping threads, total | 1385 | * threads, current number of uninterruptible-sleeping threads, total |
1386 | * number of context switches performed since bootup. | 1386 | * number of context switches performed since bootup. |
1387 | */ | 1387 | */ |
1388 | unsigned long nr_running(void) | 1388 | unsigned long nr_running(void) |
1389 | { | 1389 | { |
1390 | unsigned long i, sum = 0; | 1390 | unsigned long i, sum = 0; |
1391 | 1391 | ||
1392 | for_each_online_cpu(i) | 1392 | for_each_online_cpu(i) |
1393 | sum += cpu_rq(i)->nr_running; | 1393 | sum += cpu_rq(i)->nr_running; |
1394 | 1394 | ||
1395 | return sum; | 1395 | return sum; |
1396 | } | 1396 | } |
1397 | 1397 | ||
1398 | unsigned long nr_uninterruptible(void) | 1398 | unsigned long nr_uninterruptible(void) |
1399 | { | 1399 | { |
1400 | unsigned long i, sum = 0; | 1400 | unsigned long i, sum = 0; |
1401 | 1401 | ||
1402 | for_each_cpu(i) | 1402 | for_each_cpu(i) |
1403 | sum += cpu_rq(i)->nr_uninterruptible; | 1403 | sum += cpu_rq(i)->nr_uninterruptible; |
1404 | 1404 | ||
1405 | /* | 1405 | /* |
1406 | * Since we read the counters lockless, it might be slightly | 1406 | * Since we read the counters lockless, it might be slightly |
1407 | * inaccurate. Do not allow it to go below zero though: | 1407 | * inaccurate. Do not allow it to go below zero though: |
1408 | */ | 1408 | */ |
1409 | if (unlikely((long)sum < 0)) | 1409 | if (unlikely((long)sum < 0)) |
1410 | sum = 0; | 1410 | sum = 0; |
1411 | 1411 | ||
1412 | return sum; | 1412 | return sum; |
1413 | } | 1413 | } |
1414 | 1414 | ||
1415 | unsigned long long nr_context_switches(void) | 1415 | unsigned long long nr_context_switches(void) |
1416 | { | 1416 | { |
1417 | unsigned long long i, sum = 0; | 1417 | unsigned long long i, sum = 0; |
1418 | 1418 | ||
1419 | for_each_cpu(i) | 1419 | for_each_cpu(i) |
1420 | sum += cpu_rq(i)->nr_switches; | 1420 | sum += cpu_rq(i)->nr_switches; |
1421 | 1421 | ||
1422 | return sum; | 1422 | return sum; |
1423 | } | 1423 | } |
1424 | 1424 | ||
1425 | unsigned long nr_iowait(void) | 1425 | unsigned long nr_iowait(void) |
1426 | { | 1426 | { |
1427 | unsigned long i, sum = 0; | 1427 | unsigned long i, sum = 0; |
1428 | 1428 | ||
1429 | for_each_cpu(i) | 1429 | for_each_cpu(i) |
1430 | sum += atomic_read(&cpu_rq(i)->nr_iowait); | 1430 | sum += atomic_read(&cpu_rq(i)->nr_iowait); |
1431 | 1431 | ||
1432 | return sum; | 1432 | return sum; |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | #ifdef CONFIG_SMP | 1435 | #ifdef CONFIG_SMP |
1436 | 1436 | ||
1437 | /* | 1437 | /* |
1438 | * double_rq_lock - safely lock two runqueues | 1438 | * double_rq_lock - safely lock two runqueues |
1439 | * | 1439 | * |
1440 | * Note this does not disable interrupts like task_rq_lock, | 1440 | * Note this does not disable interrupts like task_rq_lock, |
1441 | * you need to do so manually before calling. | 1441 | * you need to do so manually before calling. |
1442 | */ | 1442 | */ |
1443 | static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) | 1443 | static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) |
1444 | __acquires(rq1->lock) | 1444 | __acquires(rq1->lock) |
1445 | __acquires(rq2->lock) | 1445 | __acquires(rq2->lock) |
1446 | { | 1446 | { |
1447 | if (rq1 == rq2) { | 1447 | if (rq1 == rq2) { |
1448 | spin_lock(&rq1->lock); | 1448 | spin_lock(&rq1->lock); |
1449 | __acquire(rq2->lock); /* Fake it out ;) */ | 1449 | __acquire(rq2->lock); /* Fake it out ;) */ |
1450 | } else { | 1450 | } else { |
1451 | if (rq1 < rq2) { | 1451 | if (rq1 < rq2) { |
1452 | spin_lock(&rq1->lock); | 1452 | spin_lock(&rq1->lock); |
1453 | spin_lock(&rq2->lock); | 1453 | spin_lock(&rq2->lock); |
1454 | } else { | 1454 | } else { |
1455 | spin_lock(&rq2->lock); | 1455 | spin_lock(&rq2->lock); |
1456 | spin_lock(&rq1->lock); | 1456 | spin_lock(&rq1->lock); |
1457 | } | 1457 | } |
1458 | } | 1458 | } |
1459 | } | 1459 | } |
1460 | 1460 | ||
1461 | /* | 1461 | /* |
1462 | * double_rq_unlock - safely unlock two runqueues | 1462 | * double_rq_unlock - safely unlock two runqueues |
1463 | * | 1463 | * |
1464 | * Note this does not restore interrupts like task_rq_unlock, | 1464 | * Note this does not restore interrupts like task_rq_unlock, |
1465 | * you need to do so manually after calling. | 1465 | * you need to do so manually after calling. |
1466 | */ | 1466 | */ |
1467 | static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) | 1467 | static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) |
1468 | __releases(rq1->lock) | 1468 | __releases(rq1->lock) |
1469 | __releases(rq2->lock) | 1469 | __releases(rq2->lock) |
1470 | { | 1470 | { |
1471 | spin_unlock(&rq1->lock); | 1471 | spin_unlock(&rq1->lock); |
1472 | if (rq1 != rq2) | 1472 | if (rq1 != rq2) |
1473 | spin_unlock(&rq2->lock); | 1473 | spin_unlock(&rq2->lock); |
1474 | else | 1474 | else |
1475 | __release(rq2->lock); | 1475 | __release(rq2->lock); |
1476 | } | 1476 | } |
1477 | 1477 | ||
1478 | /* | 1478 | /* |
1479 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | 1479 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. |
1480 | */ | 1480 | */ |
1481 | static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) | 1481 | static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) |
1482 | __releases(this_rq->lock) | 1482 | __releases(this_rq->lock) |
1483 | __acquires(busiest->lock) | 1483 | __acquires(busiest->lock) |
1484 | __acquires(this_rq->lock) | 1484 | __acquires(this_rq->lock) |
1485 | { | 1485 | { |
1486 | if (unlikely(!spin_trylock(&busiest->lock))) { | 1486 | if (unlikely(!spin_trylock(&busiest->lock))) { |
1487 | if (busiest < this_rq) { | 1487 | if (busiest < this_rq) { |
1488 | spin_unlock(&this_rq->lock); | 1488 | spin_unlock(&this_rq->lock); |
1489 | spin_lock(&busiest->lock); | 1489 | spin_lock(&busiest->lock); |
1490 | spin_lock(&this_rq->lock); | 1490 | spin_lock(&this_rq->lock); |
1491 | } else | 1491 | } else |
1492 | spin_lock(&busiest->lock); | 1492 | spin_lock(&busiest->lock); |
1493 | } | 1493 | } |
1494 | } | 1494 | } |
1495 | 1495 | ||
1496 | /* | 1496 | /* |
1497 | * find_idlest_cpu - find the least busy runqueue. | 1497 | * find_idlest_cpu - find the least busy runqueue. |
1498 | */ | 1498 | */ |
1499 | static int find_idlest_cpu(struct task_struct *p, int this_cpu, | 1499 | static int find_idlest_cpu(struct task_struct *p, int this_cpu, |
1500 | struct sched_domain *sd) | 1500 | struct sched_domain *sd) |
1501 | { | 1501 | { |
1502 | unsigned long load, min_load, this_load; | 1502 | unsigned long load, min_load, this_load; |
1503 | int i, min_cpu; | 1503 | int i, min_cpu; |
1504 | cpumask_t mask; | 1504 | cpumask_t mask; |
1505 | 1505 | ||
1506 | min_cpu = UINT_MAX; | 1506 | min_cpu = UINT_MAX; |
1507 | min_load = ULONG_MAX; | 1507 | min_load = ULONG_MAX; |
1508 | 1508 | ||
1509 | cpus_and(mask, sd->span, p->cpus_allowed); | 1509 | cpus_and(mask, sd->span, p->cpus_allowed); |
1510 | 1510 | ||
1511 | for_each_cpu_mask(i, mask) { | 1511 | for_each_cpu_mask(i, mask) { |
1512 | load = target_load(i); | 1512 | load = target_load(i); |
1513 | 1513 | ||
1514 | if (load < min_load) { | 1514 | if (load < min_load) { |
1515 | min_cpu = i; | 1515 | min_cpu = i; |
1516 | min_load = load; | 1516 | min_load = load; |
1517 | 1517 | ||
1518 | /* break out early on an idle CPU: */ | 1518 | /* break out early on an idle CPU: */ |
1519 | if (!min_load) | 1519 | if (!min_load) |
1520 | break; | 1520 | break; |
1521 | } | 1521 | } |
1522 | } | 1522 | } |
1523 | 1523 | ||
1524 | /* add +1 to account for the new task */ | 1524 | /* add +1 to account for the new task */ |
1525 | this_load = source_load(this_cpu) + SCHED_LOAD_SCALE; | 1525 | this_load = source_load(this_cpu) + SCHED_LOAD_SCALE; |
1526 | 1526 | ||
1527 | /* | 1527 | /* |
1528 | * Would with the addition of the new task to the | 1528 | * Would with the addition of the new task to the |
1529 | * current CPU there be an imbalance between this | 1529 | * current CPU there be an imbalance between this |
1530 | * CPU and the idlest CPU? | 1530 | * CPU and the idlest CPU? |
1531 | * | 1531 | * |
1532 | * Use half of the balancing threshold - new-context is | 1532 | * Use half of the balancing threshold - new-context is |
1533 | * a good opportunity to balance. | 1533 | * a good opportunity to balance. |
1534 | */ | 1534 | */ |
1535 | if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100) | 1535 | if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100) |
1536 | return min_cpu; | 1536 | return min_cpu; |
1537 | 1537 | ||
1538 | return this_cpu; | 1538 | return this_cpu; |
1539 | } | 1539 | } |
1540 | 1540 | ||
1541 | /* | 1541 | /* |
1542 | * If dest_cpu is allowed for this process, migrate the task to it. | 1542 | * If dest_cpu is allowed for this process, migrate the task to it. |
1543 | * This is accomplished by forcing the cpu_allowed mask to only | 1543 | * This is accomplished by forcing the cpu_allowed mask to only |
1544 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | 1544 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then |
1545 | * the cpu_allowed mask is restored. | 1545 | * the cpu_allowed mask is restored. |
1546 | */ | 1546 | */ |
1547 | static void sched_migrate_task(task_t *p, int dest_cpu) | 1547 | static void sched_migrate_task(task_t *p, int dest_cpu) |
1548 | { | 1548 | { |
1549 | migration_req_t req; | 1549 | migration_req_t req; |
1550 | runqueue_t *rq; | 1550 | runqueue_t *rq; |
1551 | unsigned long flags; | 1551 | unsigned long flags; |
1552 | 1552 | ||
1553 | rq = task_rq_lock(p, &flags); | 1553 | rq = task_rq_lock(p, &flags); |
1554 | if (!cpu_isset(dest_cpu, p->cpus_allowed) | 1554 | if (!cpu_isset(dest_cpu, p->cpus_allowed) |
1555 | || unlikely(cpu_is_offline(dest_cpu))) | 1555 | || unlikely(cpu_is_offline(dest_cpu))) |
1556 | goto out; | 1556 | goto out; |
1557 | 1557 | ||
1558 | /* force the process onto the specified CPU */ | 1558 | /* force the process onto the specified CPU */ |
1559 | if (migrate_task(p, dest_cpu, &req)) { | 1559 | if (migrate_task(p, dest_cpu, &req)) { |
1560 | /* Need to wait for migration thread (might exit: take ref). */ | 1560 | /* Need to wait for migration thread (might exit: take ref). */ |
1561 | struct task_struct *mt = rq->migration_thread; | 1561 | struct task_struct *mt = rq->migration_thread; |
1562 | get_task_struct(mt); | 1562 | get_task_struct(mt); |
1563 | task_rq_unlock(rq, &flags); | 1563 | task_rq_unlock(rq, &flags); |
1564 | wake_up_process(mt); | 1564 | wake_up_process(mt); |
1565 | put_task_struct(mt); | 1565 | put_task_struct(mt); |
1566 | wait_for_completion(&req.done); | 1566 | wait_for_completion(&req.done); |
1567 | return; | 1567 | return; |
1568 | } | 1568 | } |
1569 | out: | 1569 | out: |
1570 | task_rq_unlock(rq, &flags); | 1570 | task_rq_unlock(rq, &flags); |
1571 | } | 1571 | } |
1572 | 1572 | ||
1573 | /* | 1573 | /* |
1574 | * sched_exec(): find the highest-level, exec-balance-capable | 1574 | * sched_exec(): find the highest-level, exec-balance-capable |
1575 | * domain and try to migrate the task to the least loaded CPU. | 1575 | * domain and try to migrate the task to the least loaded CPU. |
1576 | * | 1576 | * |
1577 | * execve() is a valuable balancing opportunity, because at this point | 1577 | * execve() is a valuable balancing opportunity, because at this point |
1578 | * the task has the smallest effective memory and cache footprint. | 1578 | * the task has the smallest effective memory and cache footprint. |
1579 | */ | 1579 | */ |
1580 | void sched_exec(void) | 1580 | void sched_exec(void) |
1581 | { | 1581 | { |
1582 | struct sched_domain *tmp, *sd = NULL; | 1582 | struct sched_domain *tmp, *sd = NULL; |
1583 | int new_cpu, this_cpu = get_cpu(); | 1583 | int new_cpu, this_cpu = get_cpu(); |
1584 | 1584 | ||
1585 | /* Prefer the current CPU if there's only this task running */ | 1585 | /* Prefer the current CPU if there's only this task running */ |
1586 | if (this_rq()->nr_running <= 1) | 1586 | if (this_rq()->nr_running <= 1) |
1587 | goto out; | 1587 | goto out; |
1588 | 1588 | ||
1589 | for_each_domain(this_cpu, tmp) | 1589 | for_each_domain(this_cpu, tmp) |
1590 | if (tmp->flags & SD_BALANCE_EXEC) | 1590 | if (tmp->flags & SD_BALANCE_EXEC) |
1591 | sd = tmp; | 1591 | sd = tmp; |
1592 | 1592 | ||
1593 | if (sd) { | 1593 | if (sd) { |
1594 | schedstat_inc(sd, sbe_attempts); | 1594 | schedstat_inc(sd, sbe_attempts); |
1595 | new_cpu = find_idlest_cpu(current, this_cpu, sd); | 1595 | new_cpu = find_idlest_cpu(current, this_cpu, sd); |
1596 | if (new_cpu != this_cpu) { | 1596 | if (new_cpu != this_cpu) { |
1597 | schedstat_inc(sd, sbe_pushed); | 1597 | schedstat_inc(sd, sbe_pushed); |
1598 | put_cpu(); | 1598 | put_cpu(); |
1599 | sched_migrate_task(current, new_cpu); | 1599 | sched_migrate_task(current, new_cpu); |
1600 | return; | 1600 | return; |
1601 | } | 1601 | } |
1602 | } | 1602 | } |
1603 | out: | 1603 | out: |
1604 | put_cpu(); | 1604 | put_cpu(); |
1605 | } | 1605 | } |
1606 | 1606 | ||
1607 | /* | 1607 | /* |
1608 | * pull_task - move a task from a remote runqueue to the local runqueue. | 1608 | * pull_task - move a task from a remote runqueue to the local runqueue. |
1609 | * Both runqueues must be locked. | 1609 | * Both runqueues must be locked. |
1610 | */ | 1610 | */ |
1611 | static inline | 1611 | static inline |
1612 | void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | 1612 | void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, |
1613 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1613 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1614 | { | 1614 | { |
1615 | dequeue_task(p, src_array); | 1615 | dequeue_task(p, src_array); |
1616 | src_rq->nr_running--; | 1616 | src_rq->nr_running--; |
1617 | set_task_cpu(p, this_cpu); | 1617 | set_task_cpu(p, this_cpu); |
1618 | this_rq->nr_running++; | 1618 | this_rq->nr_running++; |
1619 | enqueue_task(p, this_array); | 1619 | enqueue_task(p, this_array); |
1620 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1620 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1621 | + this_rq->timestamp_last_tick; | 1621 | + this_rq->timestamp_last_tick; |
1622 | /* | 1622 | /* |
1623 | * Note that idle threads have a prio of MAX_PRIO, for this test | 1623 | * Note that idle threads have a prio of MAX_PRIO, for this test |
1624 | * to be always true for them. | 1624 | * to be always true for them. |
1625 | */ | 1625 | */ |
1626 | if (TASK_PREEMPTS_CURR(p, this_rq)) | 1626 | if (TASK_PREEMPTS_CURR(p, this_rq)) |
1627 | resched_task(this_rq->curr); | 1627 | resched_task(this_rq->curr); |
1628 | } | 1628 | } |
1629 | 1629 | ||
1630 | /* | 1630 | /* |
1631 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? | 1631 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? |
1632 | */ | 1632 | */ |
1633 | static inline | 1633 | static inline |
1634 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, | 1634 | int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, |
1635 | struct sched_domain *sd, enum idle_type idle) | 1635 | struct sched_domain *sd, enum idle_type idle) |
1636 | { | 1636 | { |
1637 | /* | 1637 | /* |
1638 | * We do not migrate tasks that are: | 1638 | * We do not migrate tasks that are: |
1639 | * 1) running (obviously), or | 1639 | * 1) running (obviously), or |
1640 | * 2) cannot be migrated to this CPU due to cpus_allowed, or | 1640 | * 2) cannot be migrated to this CPU due to cpus_allowed, or |
1641 | * 3) are cache-hot on their current CPU. | 1641 | * 3) are cache-hot on their current CPU. |
1642 | */ | 1642 | */ |
1643 | if (task_running(rq, p)) | 1643 | if (task_running(rq, p)) |
1644 | return 0; | 1644 | return 0; |
1645 | if (!cpu_isset(this_cpu, p->cpus_allowed)) | 1645 | if (!cpu_isset(this_cpu, p->cpus_allowed)) |
1646 | return 0; | 1646 | return 0; |
1647 | 1647 | ||
1648 | /* | 1648 | /* |
1649 | * Aggressive migration if: | 1649 | * Aggressive migration if: |
1650 | * 1) the [whole] cpu is idle, or | 1650 | * 1) the [whole] cpu is idle, or |
1651 | * 2) too many balance attempts have failed. | 1651 | * 2) too many balance attempts have failed. |
1652 | */ | 1652 | */ |
1653 | 1653 | ||
1654 | if (cpu_and_siblings_are_idle(this_cpu) || \ | 1654 | if (cpu_and_siblings_are_idle(this_cpu) || \ |
1655 | sd->nr_balance_failed > sd->cache_nice_tries) | 1655 | sd->nr_balance_failed > sd->cache_nice_tries) |
1656 | return 1; | 1656 | return 1; |
1657 | 1657 | ||
1658 | if (task_hot(p, rq->timestamp_last_tick, sd)) | 1658 | if (task_hot(p, rq->timestamp_last_tick, sd)) |
1659 | return 0; | 1659 | return 0; |
1660 | return 1; | 1660 | return 1; |
1661 | } | 1661 | } |
1662 | 1662 | ||
1663 | /* | 1663 | /* |
1664 | * move_tasks tries to move up to max_nr_move tasks from busiest to this_rq, | 1664 | * move_tasks tries to move up to max_nr_move tasks from busiest to this_rq, |
1665 | * as part of a balancing operation within "domain". Returns the number of | 1665 | * as part of a balancing operation within "domain". Returns the number of |
1666 | * tasks moved. | 1666 | * tasks moved. |
1667 | * | 1667 | * |
1668 | * Called with both runqueues locked. | 1668 | * Called with both runqueues locked. |
1669 | */ | 1669 | */ |
1670 | static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, | 1670 | static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, |
1671 | unsigned long max_nr_move, struct sched_domain *sd, | 1671 | unsigned long max_nr_move, struct sched_domain *sd, |
1672 | enum idle_type idle) | 1672 | enum idle_type idle) |
1673 | { | 1673 | { |
1674 | prio_array_t *array, *dst_array; | 1674 | prio_array_t *array, *dst_array; |
1675 | struct list_head *head, *curr; | 1675 | struct list_head *head, *curr; |
1676 | int idx, pulled = 0; | 1676 | int idx, pulled = 0; |
1677 | task_t *tmp; | 1677 | task_t *tmp; |
1678 | 1678 | ||
1679 | if (max_nr_move <= 0 || busiest->nr_running <= 1) | 1679 | if (max_nr_move <= 0 || busiest->nr_running <= 1) |
1680 | goto out; | 1680 | goto out; |
1681 | 1681 | ||
1682 | /* | 1682 | /* |
1683 | * We first consider expired tasks. Those will likely not be | 1683 | * We first consider expired tasks. Those will likely not be |
1684 | * executed in the near future, and they are most likely to | 1684 | * executed in the near future, and they are most likely to |
1685 | * be cache-cold, thus switching CPUs has the least effect | 1685 | * be cache-cold, thus switching CPUs has the least effect |
1686 | * on them. | 1686 | * on them. |
1687 | */ | 1687 | */ |
1688 | if (busiest->expired->nr_active) { | 1688 | if (busiest->expired->nr_active) { |
1689 | array = busiest->expired; | 1689 | array = busiest->expired; |
1690 | dst_array = this_rq->expired; | 1690 | dst_array = this_rq->expired; |
1691 | } else { | 1691 | } else { |
1692 | array = busiest->active; | 1692 | array = busiest->active; |
1693 | dst_array = this_rq->active; | 1693 | dst_array = this_rq->active; |
1694 | } | 1694 | } |
1695 | 1695 | ||
1696 | new_array: | 1696 | new_array: |
1697 | /* Start searching at priority 0: */ | 1697 | /* Start searching at priority 0: */ |
1698 | idx = 0; | 1698 | idx = 0; |
1699 | skip_bitmap: | 1699 | skip_bitmap: |
1700 | if (!idx) | 1700 | if (!idx) |
1701 | idx = sched_find_first_bit(array->bitmap); | 1701 | idx = sched_find_first_bit(array->bitmap); |
1702 | else | 1702 | else |
1703 | idx = find_next_bit(array->bitmap, MAX_PRIO, idx); | 1703 | idx = find_next_bit(array->bitmap, MAX_PRIO, idx); |
1704 | if (idx >= MAX_PRIO) { | 1704 | if (idx >= MAX_PRIO) { |
1705 | if (array == busiest->expired && busiest->active->nr_active) { | 1705 | if (array == busiest->expired && busiest->active->nr_active) { |
1706 | array = busiest->active; | 1706 | array = busiest->active; |
1707 | dst_array = this_rq->active; | 1707 | dst_array = this_rq->active; |
1708 | goto new_array; | 1708 | goto new_array; |
1709 | } | 1709 | } |
1710 | goto out; | 1710 | goto out; |
1711 | } | 1711 | } |
1712 | 1712 | ||
1713 | head = array->queue + idx; | 1713 | head = array->queue + idx; |
1714 | curr = head->prev; | 1714 | curr = head->prev; |
1715 | skip_queue: | 1715 | skip_queue: |
1716 | tmp = list_entry(curr, task_t, run_list); | 1716 | tmp = list_entry(curr, task_t, run_list); |
1717 | 1717 | ||
1718 | curr = curr->prev; | 1718 | curr = curr->prev; |
1719 | 1719 | ||
1720 | if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle)) { | 1720 | if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle)) { |
1721 | if (curr != head) | 1721 | if (curr != head) |
1722 | goto skip_queue; | 1722 | goto skip_queue; |
1723 | idx++; | 1723 | idx++; |
1724 | goto skip_bitmap; | 1724 | goto skip_bitmap; |
1725 | } | 1725 | } |
1726 | 1726 | ||
1727 | #ifdef CONFIG_SCHEDSTATS | 1727 | #ifdef CONFIG_SCHEDSTATS |
1728 | if (task_hot(tmp, busiest->timestamp_last_tick, sd)) | 1728 | if (task_hot(tmp, busiest->timestamp_last_tick, sd)) |
1729 | schedstat_inc(sd, lb_hot_gained[idle]); | 1729 | schedstat_inc(sd, lb_hot_gained[idle]); |
1730 | #endif | 1730 | #endif |
1731 | 1731 | ||
1732 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); | 1732 | pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); |
1733 | pulled++; | 1733 | pulled++; |
1734 | 1734 | ||
1735 | /* We only want to steal up to the prescribed number of tasks. */ | 1735 | /* We only want to steal up to the prescribed number of tasks. */ |
1736 | if (pulled < max_nr_move) { | 1736 | if (pulled < max_nr_move) { |
1737 | if (curr != head) | 1737 | if (curr != head) |
1738 | goto skip_queue; | 1738 | goto skip_queue; |
1739 | idx++; | 1739 | idx++; |
1740 | goto skip_bitmap; | 1740 | goto skip_bitmap; |
1741 | } | 1741 | } |
1742 | out: | 1742 | out: |
1743 | /* | 1743 | /* |
1744 | * Right now, this is the only place pull_task() is called, | 1744 | * Right now, this is the only place pull_task() is called, |
1745 | * so we can safely collect pull_task() stats here rather than | 1745 | * so we can safely collect pull_task() stats here rather than |
1746 | * inside pull_task(). | 1746 | * inside pull_task(). |
1747 | */ | 1747 | */ |
1748 | schedstat_add(sd, lb_gained[idle], pulled); | 1748 | schedstat_add(sd, lb_gained[idle], pulled); |
1749 | return pulled; | 1749 | return pulled; |
1750 | } | 1750 | } |
1751 | 1751 | ||
1752 | /* | 1752 | /* |
1753 | * find_busiest_group finds and returns the busiest CPU group within the | 1753 | * find_busiest_group finds and returns the busiest CPU group within the |
1754 | * domain. It calculates and returns the number of tasks which should be | 1754 | * domain. It calculates and returns the number of tasks which should be |
1755 | * moved to restore balance via the imbalance parameter. | 1755 | * moved to restore balance via the imbalance parameter. |
1756 | */ | 1756 | */ |
1757 | static struct sched_group * | 1757 | static struct sched_group * |
1758 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 1758 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
1759 | unsigned long *imbalance, enum idle_type idle) | 1759 | unsigned long *imbalance, enum idle_type idle) |
1760 | { | 1760 | { |
1761 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 1761 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
1762 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 1762 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
1763 | 1763 | ||
1764 | max_load = this_load = total_load = total_pwr = 0; | 1764 | max_load = this_load = total_load = total_pwr = 0; |
1765 | 1765 | ||
1766 | do { | 1766 | do { |
1767 | unsigned long load; | 1767 | unsigned long load; |
1768 | int local_group; | 1768 | int local_group; |
1769 | int i; | 1769 | int i; |
1770 | 1770 | ||
1771 | local_group = cpu_isset(this_cpu, group->cpumask); | 1771 | local_group = cpu_isset(this_cpu, group->cpumask); |
1772 | 1772 | ||
1773 | /* Tally up the load of all CPUs in the group */ | 1773 | /* Tally up the load of all CPUs in the group */ |
1774 | avg_load = 0; | 1774 | avg_load = 0; |
1775 | 1775 | ||
1776 | for_each_cpu_mask(i, group->cpumask) { | 1776 | for_each_cpu_mask(i, group->cpumask) { |
1777 | /* Bias balancing toward cpus of our domain */ | 1777 | /* Bias balancing toward cpus of our domain */ |
1778 | if (local_group) | 1778 | if (local_group) |
1779 | load = target_load(i); | 1779 | load = target_load(i); |
1780 | else | 1780 | else |
1781 | load = source_load(i); | 1781 | load = source_load(i); |
1782 | 1782 | ||
1783 | avg_load += load; | 1783 | avg_load += load; |
1784 | } | 1784 | } |
1785 | 1785 | ||
1786 | total_load += avg_load; | 1786 | total_load += avg_load; |
1787 | total_pwr += group->cpu_power; | 1787 | total_pwr += group->cpu_power; |
1788 | 1788 | ||
1789 | /* Adjust by relative CPU power of the group */ | 1789 | /* Adjust by relative CPU power of the group */ |
1790 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | 1790 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; |
1791 | 1791 | ||
1792 | if (local_group) { | 1792 | if (local_group) { |
1793 | this_load = avg_load; | 1793 | this_load = avg_load; |
1794 | this = group; | 1794 | this = group; |
1795 | goto nextgroup; | 1795 | goto nextgroup; |
1796 | } else if (avg_load > max_load) { | 1796 | } else if (avg_load > max_load) { |
1797 | max_load = avg_load; | 1797 | max_load = avg_load; |
1798 | busiest = group; | 1798 | busiest = group; |
1799 | } | 1799 | } |
1800 | nextgroup: | 1800 | nextgroup: |
1801 | group = group->next; | 1801 | group = group->next; |
1802 | } while (group != sd->groups); | 1802 | } while (group != sd->groups); |
1803 | 1803 | ||
1804 | if (!busiest || this_load >= max_load) | 1804 | if (!busiest || this_load >= max_load) |
1805 | goto out_balanced; | 1805 | goto out_balanced; |
1806 | 1806 | ||
1807 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; | 1807 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; |
1808 | 1808 | ||
1809 | if (this_load >= avg_load || | 1809 | if (this_load >= avg_load || |
1810 | 100*max_load <= sd->imbalance_pct*this_load) | 1810 | 100*max_load <= sd->imbalance_pct*this_load) |
1811 | goto out_balanced; | 1811 | goto out_balanced; |
1812 | 1812 | ||
1813 | /* | 1813 | /* |
1814 | * We're trying to get all the cpus to the average_load, so we don't | 1814 | * We're trying to get all the cpus to the average_load, so we don't |
1815 | * want to push ourselves above the average load, nor do we wish to | 1815 | * want to push ourselves above the average load, nor do we wish to |
1816 | * reduce the max loaded cpu below the average load, as either of these | 1816 | * reduce the max loaded cpu below the average load, as either of these |
1817 | * actions would just result in more rebalancing later, and ping-pong | 1817 | * actions would just result in more rebalancing later, and ping-pong |
1818 | * tasks around. Thus we look for the minimum possible imbalance. | 1818 | * tasks around. Thus we look for the minimum possible imbalance. |
1819 | * Negative imbalances (*we* are more loaded than anyone else) will | 1819 | * Negative imbalances (*we* are more loaded than anyone else) will |
1820 | * be counted as no imbalance for these purposes -- we can't fix that | 1820 | * be counted as no imbalance for these purposes -- we can't fix that |
1821 | * by pulling tasks to us. Be careful of negative numbers as they'll | 1821 | * by pulling tasks to us. Be careful of negative numbers as they'll |
1822 | * appear as very large values with unsigned longs. | 1822 | * appear as very large values with unsigned longs. |
1823 | */ | 1823 | */ |
1824 | /* How much load to actually move to equalise the imbalance */ | 1824 | /* How much load to actually move to equalise the imbalance */ |
1825 | *imbalance = min((max_load - avg_load) * busiest->cpu_power, | 1825 | *imbalance = min((max_load - avg_load) * busiest->cpu_power, |
1826 | (avg_load - this_load) * this->cpu_power) | 1826 | (avg_load - this_load) * this->cpu_power) |
1827 | / SCHED_LOAD_SCALE; | 1827 | / SCHED_LOAD_SCALE; |
1828 | 1828 | ||
1829 | if (*imbalance < SCHED_LOAD_SCALE) { | 1829 | if (*imbalance < SCHED_LOAD_SCALE) { |
1830 | unsigned long pwr_now = 0, pwr_move = 0; | 1830 | unsigned long pwr_now = 0, pwr_move = 0; |
1831 | unsigned long tmp; | 1831 | unsigned long tmp; |
1832 | 1832 | ||
1833 | if (max_load - this_load >= SCHED_LOAD_SCALE*2) { | 1833 | if (max_load - this_load >= SCHED_LOAD_SCALE*2) { |
1834 | *imbalance = 1; | 1834 | *imbalance = 1; |
1835 | return busiest; | 1835 | return busiest; |
1836 | } | 1836 | } |
1837 | 1837 | ||
1838 | /* | 1838 | /* |
1839 | * OK, we don't have enough imbalance to justify moving tasks, | 1839 | * OK, we don't have enough imbalance to justify moving tasks, |
1840 | * however we may be able to increase total CPU power used by | 1840 | * however we may be able to increase total CPU power used by |
1841 | * moving them. | 1841 | * moving them. |
1842 | */ | 1842 | */ |
1843 | 1843 | ||
1844 | pwr_now += busiest->cpu_power*min(SCHED_LOAD_SCALE, max_load); | 1844 | pwr_now += busiest->cpu_power*min(SCHED_LOAD_SCALE, max_load); |
1845 | pwr_now += this->cpu_power*min(SCHED_LOAD_SCALE, this_load); | 1845 | pwr_now += this->cpu_power*min(SCHED_LOAD_SCALE, this_load); |
1846 | pwr_now /= SCHED_LOAD_SCALE; | 1846 | pwr_now /= SCHED_LOAD_SCALE; |
1847 | 1847 | ||
1848 | /* Amount of load we'd subtract */ | 1848 | /* Amount of load we'd subtract */ |
1849 | tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/busiest->cpu_power; | 1849 | tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/busiest->cpu_power; |
1850 | if (max_load > tmp) | 1850 | if (max_load > tmp) |
1851 | pwr_move += busiest->cpu_power*min(SCHED_LOAD_SCALE, | 1851 | pwr_move += busiest->cpu_power*min(SCHED_LOAD_SCALE, |
1852 | max_load - tmp); | 1852 | max_load - tmp); |
1853 | 1853 | ||
1854 | /* Amount of load we'd add */ | 1854 | /* Amount of load we'd add */ |
1855 | if (max_load*busiest->cpu_power < | 1855 | if (max_load*busiest->cpu_power < |
1856 | SCHED_LOAD_SCALE*SCHED_LOAD_SCALE) | 1856 | SCHED_LOAD_SCALE*SCHED_LOAD_SCALE) |
1857 | tmp = max_load*busiest->cpu_power/this->cpu_power; | 1857 | tmp = max_load*busiest->cpu_power/this->cpu_power; |
1858 | else | 1858 | else |
1859 | tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/this->cpu_power; | 1859 | tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/this->cpu_power; |
1860 | pwr_move += this->cpu_power*min(SCHED_LOAD_SCALE, this_load + tmp); | 1860 | pwr_move += this->cpu_power*min(SCHED_LOAD_SCALE, this_load + tmp); |
1861 | pwr_move /= SCHED_LOAD_SCALE; | 1861 | pwr_move /= SCHED_LOAD_SCALE; |
1862 | 1862 | ||
1863 | /* Move if we gain throughput */ | 1863 | /* Move if we gain throughput */ |
1864 | if (pwr_move <= pwr_now) | 1864 | if (pwr_move <= pwr_now) |
1865 | goto out_balanced; | 1865 | goto out_balanced; |
1866 | 1866 | ||
1867 | *imbalance = 1; | 1867 | *imbalance = 1; |
1868 | return busiest; | 1868 | return busiest; |
1869 | } | 1869 | } |
1870 | 1870 | ||
1871 | /* Get rid of the scaling factor, rounding down as we divide */ | 1871 | /* Get rid of the scaling factor, rounding down as we divide */ |
1872 | *imbalance = *imbalance / SCHED_LOAD_SCALE; | 1872 | *imbalance = *imbalance / SCHED_LOAD_SCALE; |
1873 | 1873 | ||
1874 | return busiest; | 1874 | return busiest; |
1875 | 1875 | ||
1876 | out_balanced: | 1876 | out_balanced: |
1877 | if (busiest && (idle == NEWLY_IDLE || | 1877 | if (busiest && (idle == NEWLY_IDLE || |
1878 | (idle == SCHED_IDLE && max_load > SCHED_LOAD_SCALE)) ) { | 1878 | (idle == SCHED_IDLE && max_load > SCHED_LOAD_SCALE)) ) { |
1879 | *imbalance = 1; | 1879 | *imbalance = 1; |
1880 | return busiest; | 1880 | return busiest; |
1881 | } | 1881 | } |
1882 | 1882 | ||
1883 | *imbalance = 0; | 1883 | *imbalance = 0; |
1884 | return NULL; | 1884 | return NULL; |
1885 | } | 1885 | } |
1886 | 1886 | ||
1887 | /* | 1887 | /* |
1888 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 1888 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
1889 | */ | 1889 | */ |
1890 | static runqueue_t *find_busiest_queue(struct sched_group *group) | 1890 | static runqueue_t *find_busiest_queue(struct sched_group *group) |
1891 | { | 1891 | { |
1892 | unsigned long load, max_load = 0; | 1892 | unsigned long load, max_load = 0; |
1893 | runqueue_t *busiest = NULL; | 1893 | runqueue_t *busiest = NULL; |
1894 | int i; | 1894 | int i; |
1895 | 1895 | ||
1896 | for_each_cpu_mask(i, group->cpumask) { | 1896 | for_each_cpu_mask(i, group->cpumask) { |
1897 | load = source_load(i); | 1897 | load = source_load(i); |
1898 | 1898 | ||
1899 | if (load > max_load) { | 1899 | if (load > max_load) { |
1900 | max_load = load; | 1900 | max_load = load; |
1901 | busiest = cpu_rq(i); | 1901 | busiest = cpu_rq(i); |
1902 | } | 1902 | } |
1903 | } | 1903 | } |
1904 | 1904 | ||
1905 | return busiest; | 1905 | return busiest; |
1906 | } | 1906 | } |
1907 | 1907 | ||
1908 | /* | 1908 | /* |
1909 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 1909 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
1910 | * tasks if there is an imbalance. | 1910 | * tasks if there is an imbalance. |
1911 | * | 1911 | * |
1912 | * Called with this_rq unlocked. | 1912 | * Called with this_rq unlocked. |
1913 | */ | 1913 | */ |
1914 | static int load_balance(int this_cpu, runqueue_t *this_rq, | 1914 | static int load_balance(int this_cpu, runqueue_t *this_rq, |
1915 | struct sched_domain *sd, enum idle_type idle) | 1915 | struct sched_domain *sd, enum idle_type idle) |
1916 | { | 1916 | { |
1917 | struct sched_group *group; | 1917 | struct sched_group *group; |
1918 | runqueue_t *busiest; | 1918 | runqueue_t *busiest; |
1919 | unsigned long imbalance; | 1919 | unsigned long imbalance; |
1920 | int nr_moved; | 1920 | int nr_moved; |
1921 | 1921 | ||
1922 | spin_lock(&this_rq->lock); | 1922 | spin_lock(&this_rq->lock); |
1923 | schedstat_inc(sd, lb_cnt[idle]); | 1923 | schedstat_inc(sd, lb_cnt[idle]); |
1924 | 1924 | ||
1925 | group = find_busiest_group(sd, this_cpu, &imbalance, idle); | 1925 | group = find_busiest_group(sd, this_cpu, &imbalance, idle); |
1926 | if (!group) { | 1926 | if (!group) { |
1927 | schedstat_inc(sd, lb_nobusyg[idle]); | 1927 | schedstat_inc(sd, lb_nobusyg[idle]); |
1928 | goto out_balanced; | 1928 | goto out_balanced; |
1929 | } | 1929 | } |
1930 | 1930 | ||
1931 | busiest = find_busiest_queue(group); | 1931 | busiest = find_busiest_queue(group); |
1932 | if (!busiest) { | 1932 | if (!busiest) { |
1933 | schedstat_inc(sd, lb_nobusyq[idle]); | 1933 | schedstat_inc(sd, lb_nobusyq[idle]); |
1934 | goto out_balanced; | 1934 | goto out_balanced; |
1935 | } | 1935 | } |
1936 | 1936 | ||
1937 | /* | 1937 | /* |
1938 | * This should be "impossible", but since load | 1938 | * This should be "impossible", but since load |
1939 | * balancing is inherently racy and statistical, | 1939 | * balancing is inherently racy and statistical, |
1940 | * it could happen in theory. | 1940 | * it could happen in theory. |
1941 | */ | 1941 | */ |
1942 | if (unlikely(busiest == this_rq)) { | 1942 | if (unlikely(busiest == this_rq)) { |
1943 | WARN_ON(1); | 1943 | WARN_ON(1); |
1944 | goto out_balanced; | 1944 | goto out_balanced; |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | schedstat_add(sd, lb_imbalance[idle], imbalance); | 1947 | schedstat_add(sd, lb_imbalance[idle], imbalance); |
1948 | 1948 | ||
1949 | nr_moved = 0; | 1949 | nr_moved = 0; |
1950 | if (busiest->nr_running > 1) { | 1950 | if (busiest->nr_running > 1) { |
1951 | /* | 1951 | /* |
1952 | * Attempt to move tasks. If find_busiest_group has found | 1952 | * Attempt to move tasks. If find_busiest_group has found |
1953 | * an imbalance but busiest->nr_running <= 1, the group is | 1953 | * an imbalance but busiest->nr_running <= 1, the group is |
1954 | * still unbalanced. nr_moved simply stays zero, so it is | 1954 | * still unbalanced. nr_moved simply stays zero, so it is |
1955 | * correctly treated as an imbalance. | 1955 | * correctly treated as an imbalance. |
1956 | */ | 1956 | */ |
1957 | double_lock_balance(this_rq, busiest); | 1957 | double_lock_balance(this_rq, busiest); |
1958 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 1958 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
1959 | imbalance, sd, idle); | 1959 | imbalance, sd, idle); |
1960 | spin_unlock(&busiest->lock); | 1960 | spin_unlock(&busiest->lock); |
1961 | } | 1961 | } |
1962 | spin_unlock(&this_rq->lock); | 1962 | spin_unlock(&this_rq->lock); |
1963 | 1963 | ||
1964 | if (!nr_moved) { | 1964 | if (!nr_moved) { |
1965 | schedstat_inc(sd, lb_failed[idle]); | 1965 | schedstat_inc(sd, lb_failed[idle]); |
1966 | sd->nr_balance_failed++; | 1966 | sd->nr_balance_failed++; |
1967 | 1967 | ||
1968 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 1968 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
1969 | int wake = 0; | 1969 | int wake = 0; |
1970 | 1970 | ||
1971 | spin_lock(&busiest->lock); | 1971 | spin_lock(&busiest->lock); |
1972 | if (!busiest->active_balance) { | 1972 | if (!busiest->active_balance) { |
1973 | busiest->active_balance = 1; | 1973 | busiest->active_balance = 1; |
1974 | busiest->push_cpu = this_cpu; | 1974 | busiest->push_cpu = this_cpu; |
1975 | wake = 1; | 1975 | wake = 1; |
1976 | } | 1976 | } |
1977 | spin_unlock(&busiest->lock); | 1977 | spin_unlock(&busiest->lock); |
1978 | if (wake) | 1978 | if (wake) |
1979 | wake_up_process(busiest->migration_thread); | 1979 | wake_up_process(busiest->migration_thread); |
1980 | 1980 | ||
1981 | /* | 1981 | /* |
1982 | * We've kicked active balancing, reset the failure | 1982 | * We've kicked active balancing, reset the failure |
1983 | * counter. | 1983 | * counter. |
1984 | */ | 1984 | */ |
1985 | sd->nr_balance_failed = sd->cache_nice_tries; | 1985 | sd->nr_balance_failed = sd->cache_nice_tries; |
1986 | } | 1986 | } |
1987 | 1987 | ||
1988 | /* | 1988 | /* |
1989 | * We were unbalanced, but unsuccessful in move_tasks(), | 1989 | * We were unbalanced, but unsuccessful in move_tasks(), |
1990 | * so bump the balance_interval to lessen the lock contention. | 1990 | * so bump the balance_interval to lessen the lock contention. |
1991 | */ | 1991 | */ |
1992 | if (sd->balance_interval < sd->max_interval) | 1992 | if (sd->balance_interval < sd->max_interval) |
1993 | sd->balance_interval++; | 1993 | sd->balance_interval++; |
1994 | } else { | 1994 | } else { |
1995 | sd->nr_balance_failed = 0; | 1995 | sd->nr_balance_failed = 0; |
1996 | 1996 | ||
1997 | /* We were unbalanced, so reset the balancing interval */ | 1997 | /* We were unbalanced, so reset the balancing interval */ |
1998 | sd->balance_interval = sd->min_interval; | 1998 | sd->balance_interval = sd->min_interval; |
1999 | } | 1999 | } |
2000 | 2000 | ||
2001 | return nr_moved; | 2001 | return nr_moved; |
2002 | 2002 | ||
2003 | out_balanced: | 2003 | out_balanced: |
2004 | spin_unlock(&this_rq->lock); | 2004 | spin_unlock(&this_rq->lock); |
2005 | 2005 | ||
2006 | schedstat_inc(sd, lb_balanced[idle]); | 2006 | schedstat_inc(sd, lb_balanced[idle]); |
2007 | 2007 | ||
2008 | /* tune up the balancing interval */ | 2008 | /* tune up the balancing interval */ |
2009 | if (sd->balance_interval < sd->max_interval) | 2009 | if (sd->balance_interval < sd->max_interval) |
2010 | sd->balance_interval *= 2; | 2010 | sd->balance_interval *= 2; |
2011 | 2011 | ||
2012 | return 0; | 2012 | return 0; |
2013 | } | 2013 | } |
2014 | 2014 | ||
2015 | /* | 2015 | /* |
2016 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2016 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
2017 | * tasks if there is an imbalance. | 2017 | * tasks if there is an imbalance. |
2018 | * | 2018 | * |
2019 | * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). | 2019 | * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). |
2020 | * this_rq is locked. | 2020 | * this_rq is locked. |
2021 | */ | 2021 | */ |
2022 | static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | 2022 | static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, |
2023 | struct sched_domain *sd) | 2023 | struct sched_domain *sd) |
2024 | { | 2024 | { |
2025 | struct sched_group *group; | 2025 | struct sched_group *group; |
2026 | runqueue_t *busiest = NULL; | 2026 | runqueue_t *busiest = NULL; |
2027 | unsigned long imbalance; | 2027 | unsigned long imbalance; |
2028 | int nr_moved = 0; | 2028 | int nr_moved = 0; |
2029 | 2029 | ||
2030 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); | 2030 | schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); |
2031 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); | 2031 | group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); |
2032 | if (!group) { | 2032 | if (!group) { |
2033 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2033 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
2034 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); | 2034 | schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); |
2035 | goto out; | 2035 | goto out; |
2036 | } | 2036 | } |
2037 | 2037 | ||
2038 | busiest = find_busiest_queue(group); | 2038 | busiest = find_busiest_queue(group); |
2039 | if (!busiest || busiest == this_rq) { | 2039 | if (!busiest || busiest == this_rq) { |
2040 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); | 2040 | schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); |
2041 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2041 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
2042 | goto out; | 2042 | goto out; |
2043 | } | 2043 | } |
2044 | 2044 | ||
2045 | /* Attempt to move tasks */ | 2045 | /* Attempt to move tasks */ |
2046 | double_lock_balance(this_rq, busiest); | 2046 | double_lock_balance(this_rq, busiest); |
2047 | 2047 | ||
2048 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); | 2048 | schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); |
2049 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2049 | nr_moved = move_tasks(this_rq, this_cpu, busiest, |
2050 | imbalance, sd, NEWLY_IDLE); | 2050 | imbalance, sd, NEWLY_IDLE); |
2051 | if (!nr_moved) | 2051 | if (!nr_moved) |
2052 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); | 2052 | schedstat_inc(sd, lb_failed[NEWLY_IDLE]); |
2053 | 2053 | ||
2054 | spin_unlock(&busiest->lock); | 2054 | spin_unlock(&busiest->lock); |
2055 | 2055 | ||
2056 | out: | 2056 | out: |
2057 | return nr_moved; | 2057 | return nr_moved; |
2058 | } | 2058 | } |
2059 | 2059 | ||
2060 | /* | 2060 | /* |
2061 | * idle_balance is called by schedule() if this_cpu is about to become | 2061 | * idle_balance is called by schedule() if this_cpu is about to become |
2062 | * idle. Attempts to pull tasks from other CPUs. | 2062 | * idle. Attempts to pull tasks from other CPUs. |
2063 | */ | 2063 | */ |
2064 | static inline void idle_balance(int this_cpu, runqueue_t *this_rq) | 2064 | static inline void idle_balance(int this_cpu, runqueue_t *this_rq) |
2065 | { | 2065 | { |
2066 | struct sched_domain *sd; | 2066 | struct sched_domain *sd; |
2067 | 2067 | ||
2068 | for_each_domain(this_cpu, sd) { | 2068 | for_each_domain(this_cpu, sd) { |
2069 | if (sd->flags & SD_BALANCE_NEWIDLE) { | 2069 | if (sd->flags & SD_BALANCE_NEWIDLE) { |
2070 | if (load_balance_newidle(this_cpu, this_rq, sd)) { | 2070 | if (load_balance_newidle(this_cpu, this_rq, sd)) { |
2071 | /* We've pulled tasks over so stop searching */ | 2071 | /* We've pulled tasks over so stop searching */ |
2072 | break; | 2072 | break; |
2073 | } | 2073 | } |
2074 | } | 2074 | } |
2075 | } | 2075 | } |
2076 | } | 2076 | } |
2077 | 2077 | ||
2078 | /* | 2078 | /* |
2079 | * active_load_balance is run by migration threads. It pushes running tasks | 2079 | * active_load_balance is run by migration threads. It pushes running tasks |
2080 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be | 2080 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be |
2081 | * running on each physical CPU where possible, and avoids physical / | 2081 | * running on each physical CPU where possible, and avoids physical / |
2082 | * logical imbalances. | 2082 | * logical imbalances. |
2083 | * | 2083 | * |
2084 | * Called with busiest_rq locked. | 2084 | * Called with busiest_rq locked. |
2085 | */ | 2085 | */ |
2086 | static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) | 2086 | static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) |
2087 | { | 2087 | { |
2088 | struct sched_domain *sd; | 2088 | struct sched_domain *sd; |
2089 | struct sched_group *cpu_group; | 2089 | struct sched_group *cpu_group; |
2090 | runqueue_t *target_rq; | 2090 | runqueue_t *target_rq; |
2091 | cpumask_t visited_cpus; | 2091 | cpumask_t visited_cpus; |
2092 | int cpu; | 2092 | int cpu; |
2093 | 2093 | ||
2094 | /* | 2094 | /* |
2095 | * Search for suitable CPUs to push tasks to in successively higher | 2095 | * Search for suitable CPUs to push tasks to in successively higher |
2096 | * domains with SD_LOAD_BALANCE set. | 2096 | * domains with SD_LOAD_BALANCE set. |
2097 | */ | 2097 | */ |
2098 | visited_cpus = CPU_MASK_NONE; | 2098 | visited_cpus = CPU_MASK_NONE; |
2099 | for_each_domain(busiest_cpu, sd) { | 2099 | for_each_domain(busiest_cpu, sd) { |
2100 | if (!(sd->flags & SD_LOAD_BALANCE)) | 2100 | if (!(sd->flags & SD_LOAD_BALANCE)) |
2101 | /* no more domains to search */ | 2101 | /* no more domains to search */ |
2102 | break; | 2102 | break; |
2103 | 2103 | ||
2104 | schedstat_inc(sd, alb_cnt); | 2104 | schedstat_inc(sd, alb_cnt); |
2105 | 2105 | ||
2106 | cpu_group = sd->groups; | 2106 | cpu_group = sd->groups; |
2107 | do { | 2107 | do { |
2108 | for_each_cpu_mask(cpu, cpu_group->cpumask) { | 2108 | for_each_cpu_mask(cpu, cpu_group->cpumask) { |
2109 | if (busiest_rq->nr_running <= 1) | 2109 | if (busiest_rq->nr_running <= 1) |
2110 | /* no more tasks left to move */ | 2110 | /* no more tasks left to move */ |
2111 | return; | 2111 | return; |
2112 | if (cpu_isset(cpu, visited_cpus)) | 2112 | if (cpu_isset(cpu, visited_cpus)) |
2113 | continue; | 2113 | continue; |
2114 | cpu_set(cpu, visited_cpus); | 2114 | cpu_set(cpu, visited_cpus); |
2115 | if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu) | 2115 | if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu) |
2116 | continue; | 2116 | continue; |
2117 | 2117 | ||
2118 | target_rq = cpu_rq(cpu); | 2118 | target_rq = cpu_rq(cpu); |
2119 | /* | 2119 | /* |
2120 | * This condition is "impossible", if it occurs | 2120 | * This condition is "impossible", if it occurs |
2121 | * we need to fix it. Originally reported by | 2121 | * we need to fix it. Originally reported by |
2122 | * Bjorn Helgaas on a 128-cpu setup. | 2122 | * Bjorn Helgaas on a 128-cpu setup. |
2123 | */ | 2123 | */ |
2124 | BUG_ON(busiest_rq == target_rq); | 2124 | BUG_ON(busiest_rq == target_rq); |
2125 | 2125 | ||
2126 | /* move a task from busiest_rq to target_rq */ | 2126 | /* move a task from busiest_rq to target_rq */ |
2127 | double_lock_balance(busiest_rq, target_rq); | 2127 | double_lock_balance(busiest_rq, target_rq); |
2128 | if (move_tasks(target_rq, cpu, busiest_rq, | 2128 | if (move_tasks(target_rq, cpu, busiest_rq, |
2129 | 1, sd, SCHED_IDLE)) { | 2129 | 1, sd, SCHED_IDLE)) { |
2130 | schedstat_inc(sd, alb_pushed); | 2130 | schedstat_inc(sd, alb_pushed); |
2131 | } else { | 2131 | } else { |
2132 | schedstat_inc(sd, alb_failed); | 2132 | schedstat_inc(sd, alb_failed); |
2133 | } | 2133 | } |
2134 | spin_unlock(&target_rq->lock); | 2134 | spin_unlock(&target_rq->lock); |
2135 | } | 2135 | } |
2136 | cpu_group = cpu_group->next; | 2136 | cpu_group = cpu_group->next; |
2137 | } while (cpu_group != sd->groups); | 2137 | } while (cpu_group != sd->groups); |
2138 | } | 2138 | } |
2139 | } | 2139 | } |
2140 | 2140 | ||
2141 | /* | 2141 | /* |
2142 | * rebalance_tick will get called every timer tick, on every CPU. | 2142 | * rebalance_tick will get called every timer tick, on every CPU. |
2143 | * | 2143 | * |
2144 | * It checks each scheduling domain to see if it is due to be balanced, | 2144 | * It checks each scheduling domain to see if it is due to be balanced, |
2145 | * and initiates a balancing operation if so. | 2145 | * and initiates a balancing operation if so. |
2146 | * | 2146 | * |
2147 | * Balancing parameters are set up in arch_init_sched_domains. | 2147 | * Balancing parameters are set up in arch_init_sched_domains. |
2148 | */ | 2148 | */ |
2149 | 2149 | ||
2150 | /* Don't have all balancing operations going off at once */ | 2150 | /* Don't have all balancing operations going off at once */ |
2151 | #define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) | 2151 | #define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) |
2152 | 2152 | ||
2153 | static void rebalance_tick(int this_cpu, runqueue_t *this_rq, | 2153 | static void rebalance_tick(int this_cpu, runqueue_t *this_rq, |
2154 | enum idle_type idle) | 2154 | enum idle_type idle) |
2155 | { | 2155 | { |
2156 | unsigned long old_load, this_load; | 2156 | unsigned long old_load, this_load; |
2157 | unsigned long j = jiffies + CPU_OFFSET(this_cpu); | 2157 | unsigned long j = jiffies + CPU_OFFSET(this_cpu); |
2158 | struct sched_domain *sd; | 2158 | struct sched_domain *sd; |
2159 | 2159 | ||
2160 | /* Update our load */ | 2160 | /* Update our load */ |
2161 | old_load = this_rq->cpu_load; | 2161 | old_load = this_rq->cpu_load; |
2162 | this_load = this_rq->nr_running * SCHED_LOAD_SCALE; | 2162 | this_load = this_rq->nr_running * SCHED_LOAD_SCALE; |
2163 | /* | 2163 | /* |
2164 | * Round up the averaging division if load is increasing. This | 2164 | * Round up the averaging division if load is increasing. This |
2165 | * prevents us from getting stuck on 9 if the load is 10, for | 2165 | * prevents us from getting stuck on 9 if the load is 10, for |
2166 | * example. | 2166 | * example. |
2167 | */ | 2167 | */ |
2168 | if (this_load > old_load) | 2168 | if (this_load > old_load) |
2169 | old_load++; | 2169 | old_load++; |
2170 | this_rq->cpu_load = (old_load + this_load) / 2; | 2170 | this_rq->cpu_load = (old_load + this_load) / 2; |
2171 | 2171 | ||
2172 | for_each_domain(this_cpu, sd) { | 2172 | for_each_domain(this_cpu, sd) { |
2173 | unsigned long interval; | 2173 | unsigned long interval; |
2174 | 2174 | ||
2175 | if (!(sd->flags & SD_LOAD_BALANCE)) | 2175 | if (!(sd->flags & SD_LOAD_BALANCE)) |
2176 | continue; | 2176 | continue; |
2177 | 2177 | ||
2178 | interval = sd->balance_interval; | 2178 | interval = sd->balance_interval; |
2179 | if (idle != SCHED_IDLE) | 2179 | if (idle != SCHED_IDLE) |
2180 | interval *= sd->busy_factor; | 2180 | interval *= sd->busy_factor; |
2181 | 2181 | ||
2182 | /* scale ms to jiffies */ | 2182 | /* scale ms to jiffies */ |
2183 | interval = msecs_to_jiffies(interval); | 2183 | interval = msecs_to_jiffies(interval); |
2184 | if (unlikely(!interval)) | 2184 | if (unlikely(!interval)) |
2185 | interval = 1; | 2185 | interval = 1; |
2186 | 2186 | ||
2187 | if (j - sd->last_balance >= interval) { | 2187 | if (j - sd->last_balance >= interval) { |
2188 | if (load_balance(this_cpu, this_rq, sd, idle)) { | 2188 | if (load_balance(this_cpu, this_rq, sd, idle)) { |
2189 | /* We've pulled tasks over so no longer idle */ | 2189 | /* We've pulled tasks over so no longer idle */ |
2190 | idle = NOT_IDLE; | 2190 | idle = NOT_IDLE; |
2191 | } | 2191 | } |
2192 | sd->last_balance += interval; | 2192 | sd->last_balance += interval; |
2193 | } | 2193 | } |
2194 | } | 2194 | } |
2195 | } | 2195 | } |
2196 | #else | 2196 | #else |
2197 | /* | 2197 | /* |
2198 | * on UP we do not need to balance between CPUs: | 2198 | * on UP we do not need to balance between CPUs: |
2199 | */ | 2199 | */ |
2200 | static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) | 2200 | static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) |
2201 | { | 2201 | { |
2202 | } | 2202 | } |
2203 | static inline void idle_balance(int cpu, runqueue_t *rq) | 2203 | static inline void idle_balance(int cpu, runqueue_t *rq) |
2204 | { | 2204 | { |
2205 | } | 2205 | } |
2206 | #endif | 2206 | #endif |
2207 | 2207 | ||
2208 | static inline int wake_priority_sleeper(runqueue_t *rq) | 2208 | static inline int wake_priority_sleeper(runqueue_t *rq) |
2209 | { | 2209 | { |
2210 | int ret = 0; | 2210 | int ret = 0; |
2211 | #ifdef CONFIG_SCHED_SMT | 2211 | #ifdef CONFIG_SCHED_SMT |
2212 | spin_lock(&rq->lock); | 2212 | spin_lock(&rq->lock); |
2213 | /* | 2213 | /* |
2214 | * If an SMT sibling task has been put to sleep for priority | 2214 | * If an SMT sibling task has been put to sleep for priority |
2215 | * reasons reschedule the idle task to see if it can now run. | 2215 | * reasons reschedule the idle task to see if it can now run. |
2216 | */ | 2216 | */ |
2217 | if (rq->nr_running) { | 2217 | if (rq->nr_running) { |
2218 | resched_task(rq->idle); | 2218 | resched_task(rq->idle); |
2219 | ret = 1; | 2219 | ret = 1; |
2220 | } | 2220 | } |
2221 | spin_unlock(&rq->lock); | 2221 | spin_unlock(&rq->lock); |
2222 | #endif | 2222 | #endif |
2223 | return ret; | 2223 | return ret; |
2224 | } | 2224 | } |
2225 | 2225 | ||
2226 | DEFINE_PER_CPU(struct kernel_stat, kstat); | 2226 | DEFINE_PER_CPU(struct kernel_stat, kstat); |
2227 | 2227 | ||
2228 | EXPORT_PER_CPU_SYMBOL(kstat); | 2228 | EXPORT_PER_CPU_SYMBOL(kstat); |
2229 | 2229 | ||
2230 | /* | 2230 | /* |
2231 | * This is called on clock ticks and on context switches. | 2231 | * This is called on clock ticks and on context switches. |
2232 | * Bank in p->sched_time the ns elapsed since the last tick or switch. | 2232 | * Bank in p->sched_time the ns elapsed since the last tick or switch. |
2233 | */ | 2233 | */ |
2234 | static inline void update_cpu_clock(task_t *p, runqueue_t *rq, | 2234 | static inline void update_cpu_clock(task_t *p, runqueue_t *rq, |
2235 | unsigned long long now) | 2235 | unsigned long long now) |
2236 | { | 2236 | { |
2237 | unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); | 2237 | unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); |
2238 | p->sched_time += now - last; | 2238 | p->sched_time += now - last; |
2239 | } | 2239 | } |
2240 | 2240 | ||
2241 | /* | 2241 | /* |
2242 | * Return current->sched_time plus any more ns on the sched_clock | 2242 | * Return current->sched_time plus any more ns on the sched_clock |
2243 | * that have not yet been banked. | 2243 | * that have not yet been banked. |
2244 | */ | 2244 | */ |
2245 | unsigned long long current_sched_time(const task_t *tsk) | 2245 | unsigned long long current_sched_time(const task_t *tsk) |
2246 | { | 2246 | { |
2247 | unsigned long long ns; | 2247 | unsigned long long ns; |
2248 | unsigned long flags; | 2248 | unsigned long flags; |
2249 | local_irq_save(flags); | 2249 | local_irq_save(flags); |
2250 | ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); | 2250 | ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); |
2251 | ns = tsk->sched_time + (sched_clock() - ns); | 2251 | ns = tsk->sched_time + (sched_clock() - ns); |
2252 | local_irq_restore(flags); | 2252 | local_irq_restore(flags); |
2253 | return ns; | 2253 | return ns; |
2254 | } | 2254 | } |
2255 | 2255 | ||
2256 | /* | 2256 | /* |
2257 | * We place interactive tasks back into the active array, if possible. | 2257 | * We place interactive tasks back into the active array, if possible. |
2258 | * | 2258 | * |
2259 | * To guarantee that this does not starve expired tasks we ignore the | 2259 | * To guarantee that this does not starve expired tasks we ignore the |
2260 | * interactivity of a task if the first expired task had to wait more | 2260 | * interactivity of a task if the first expired task had to wait more |
2261 | * than a 'reasonable' amount of time. This deadline timeout is | 2261 | * than a 'reasonable' amount of time. This deadline timeout is |
2262 | * load-dependent, as the frequency of array switched decreases with | 2262 | * load-dependent, as the frequency of array switched decreases with |
2263 | * increasing number of running tasks. We also ignore the interactivity | 2263 | * increasing number of running tasks. We also ignore the interactivity |
2264 | * if a better static_prio task has expired: | 2264 | * if a better static_prio task has expired: |
2265 | */ | 2265 | */ |
2266 | #define EXPIRED_STARVING(rq) \ | 2266 | #define EXPIRED_STARVING(rq) \ |
2267 | ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ | 2267 | ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ |
2268 | (jiffies - (rq)->expired_timestamp >= \ | 2268 | (jiffies - (rq)->expired_timestamp >= \ |
2269 | STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ | 2269 | STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ |
2270 | ((rq)->curr->static_prio > (rq)->best_expired_prio)) | 2270 | ((rq)->curr->static_prio > (rq)->best_expired_prio)) |
2271 | 2271 | ||
2272 | /* | 2272 | /* |
2273 | * Account user cpu time to a process. | 2273 | * Account user cpu time to a process. |
2274 | * @p: the process that the cpu time gets accounted to | 2274 | * @p: the process that the cpu time gets accounted to |
2275 | * @hardirq_offset: the offset to subtract from hardirq_count() | 2275 | * @hardirq_offset: the offset to subtract from hardirq_count() |
2276 | * @cputime: the cpu time spent in user space since the last update | 2276 | * @cputime: the cpu time spent in user space since the last update |
2277 | */ | 2277 | */ |
2278 | void account_user_time(struct task_struct *p, cputime_t cputime) | 2278 | void account_user_time(struct task_struct *p, cputime_t cputime) |
2279 | { | 2279 | { |
2280 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 2280 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
2281 | cputime64_t tmp; | 2281 | cputime64_t tmp; |
2282 | 2282 | ||
2283 | p->utime = cputime_add(p->utime, cputime); | 2283 | p->utime = cputime_add(p->utime, cputime); |
2284 | 2284 | ||
2285 | /* Add user time to cpustat. */ | 2285 | /* Add user time to cpustat. */ |
2286 | tmp = cputime_to_cputime64(cputime); | 2286 | tmp = cputime_to_cputime64(cputime); |
2287 | if (TASK_NICE(p) > 0) | 2287 | if (TASK_NICE(p) > 0) |
2288 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 2288 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
2289 | else | 2289 | else |
2290 | cpustat->user = cputime64_add(cpustat->user, tmp); | 2290 | cpustat->user = cputime64_add(cpustat->user, tmp); |
2291 | } | 2291 | } |
2292 | 2292 | ||
2293 | /* | 2293 | /* |
2294 | * Account system cpu time to a process. | 2294 | * Account system cpu time to a process. |
2295 | * @p: the process that the cpu time gets accounted to | 2295 | * @p: the process that the cpu time gets accounted to |
2296 | * @hardirq_offset: the offset to subtract from hardirq_count() | 2296 | * @hardirq_offset: the offset to subtract from hardirq_count() |
2297 | * @cputime: the cpu time spent in kernel space since the last update | 2297 | * @cputime: the cpu time spent in kernel space since the last update |
2298 | */ | 2298 | */ |
2299 | void account_system_time(struct task_struct *p, int hardirq_offset, | 2299 | void account_system_time(struct task_struct *p, int hardirq_offset, |
2300 | cputime_t cputime) | 2300 | cputime_t cputime) |
2301 | { | 2301 | { |
2302 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 2302 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
2303 | runqueue_t *rq = this_rq(); | 2303 | runqueue_t *rq = this_rq(); |
2304 | cputime64_t tmp; | 2304 | cputime64_t tmp; |
2305 | 2305 | ||
2306 | p->stime = cputime_add(p->stime, cputime); | 2306 | p->stime = cputime_add(p->stime, cputime); |
2307 | 2307 | ||
2308 | /* Add system time to cpustat. */ | 2308 | /* Add system time to cpustat. */ |
2309 | tmp = cputime_to_cputime64(cputime); | 2309 | tmp = cputime_to_cputime64(cputime); |
2310 | if (hardirq_count() - hardirq_offset) | 2310 | if (hardirq_count() - hardirq_offset) |
2311 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 2311 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
2312 | else if (softirq_count()) | 2312 | else if (softirq_count()) |
2313 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 2313 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
2314 | else if (p != rq->idle) | 2314 | else if (p != rq->idle) |
2315 | cpustat->system = cputime64_add(cpustat->system, tmp); | 2315 | cpustat->system = cputime64_add(cpustat->system, tmp); |
2316 | else if (atomic_read(&rq->nr_iowait) > 0) | 2316 | else if (atomic_read(&rq->nr_iowait) > 0) |
2317 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 2317 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
2318 | else | 2318 | else |
2319 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 2319 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
2320 | /* Account for system time used */ | 2320 | /* Account for system time used */ |
2321 | acct_update_integrals(p); | 2321 | acct_update_integrals(p); |
2322 | /* Update rss highwater mark */ | 2322 | /* Update rss highwater mark */ |
2323 | update_mem_hiwater(p); | 2323 | update_mem_hiwater(p); |
2324 | } | 2324 | } |
2325 | 2325 | ||
2326 | /* | 2326 | /* |
2327 | * Account for involuntary wait time. | 2327 | * Account for involuntary wait time. |
2328 | * @p: the process from which the cpu time has been stolen | 2328 | * @p: the process from which the cpu time has been stolen |
2329 | * @steal: the cpu time spent in involuntary wait | 2329 | * @steal: the cpu time spent in involuntary wait |
2330 | */ | 2330 | */ |
2331 | void account_steal_time(struct task_struct *p, cputime_t steal) | 2331 | void account_steal_time(struct task_struct *p, cputime_t steal) |
2332 | { | 2332 | { |
2333 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 2333 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
2334 | cputime64_t tmp = cputime_to_cputime64(steal); | 2334 | cputime64_t tmp = cputime_to_cputime64(steal); |
2335 | runqueue_t *rq = this_rq(); | 2335 | runqueue_t *rq = this_rq(); |
2336 | 2336 | ||
2337 | if (p == rq->idle) { | 2337 | if (p == rq->idle) { |
2338 | p->stime = cputime_add(p->stime, steal); | 2338 | p->stime = cputime_add(p->stime, steal); |
2339 | if (atomic_read(&rq->nr_iowait) > 0) | 2339 | if (atomic_read(&rq->nr_iowait) > 0) |
2340 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 2340 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
2341 | else | 2341 | else |
2342 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 2342 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
2343 | } else | 2343 | } else |
2344 | cpustat->steal = cputime64_add(cpustat->steal, tmp); | 2344 | cpustat->steal = cputime64_add(cpustat->steal, tmp); |
2345 | } | 2345 | } |
2346 | 2346 | ||
2347 | /* | 2347 | /* |
2348 | * This function gets called by the timer code, with HZ frequency. | 2348 | * This function gets called by the timer code, with HZ frequency. |
2349 | * We call it with interrupts disabled. | 2349 | * We call it with interrupts disabled. |
2350 | * | 2350 | * |
2351 | * It also gets called by the fork code, when changing the parent's | 2351 | * It also gets called by the fork code, when changing the parent's |
2352 | * timeslices. | 2352 | * timeslices. |
2353 | */ | 2353 | */ |
2354 | void scheduler_tick(void) | 2354 | void scheduler_tick(void) |
2355 | { | 2355 | { |
2356 | int cpu = smp_processor_id(); | 2356 | int cpu = smp_processor_id(); |
2357 | runqueue_t *rq = this_rq(); | 2357 | runqueue_t *rq = this_rq(); |
2358 | task_t *p = current; | 2358 | task_t *p = current; |
2359 | unsigned long long now = sched_clock(); | 2359 | unsigned long long now = sched_clock(); |
2360 | 2360 | ||
2361 | update_cpu_clock(p, rq, now); | 2361 | update_cpu_clock(p, rq, now); |
2362 | 2362 | ||
2363 | rq->timestamp_last_tick = now; | 2363 | rq->timestamp_last_tick = now; |
2364 | 2364 | ||
2365 | if (p == rq->idle) { | 2365 | if (p == rq->idle) { |
2366 | if (wake_priority_sleeper(rq)) | 2366 | if (wake_priority_sleeper(rq)) |
2367 | goto out; | 2367 | goto out; |
2368 | rebalance_tick(cpu, rq, SCHED_IDLE); | 2368 | rebalance_tick(cpu, rq, SCHED_IDLE); |
2369 | return; | 2369 | return; |
2370 | } | 2370 | } |
2371 | 2371 | ||
2372 | /* Task might have expired already, but not scheduled off yet */ | 2372 | /* Task might have expired already, but not scheduled off yet */ |
2373 | if (p->array != rq->active) { | 2373 | if (p->array != rq->active) { |
2374 | set_tsk_need_resched(p); | 2374 | set_tsk_need_resched(p); |
2375 | goto out; | 2375 | goto out; |
2376 | } | 2376 | } |
2377 | spin_lock(&rq->lock); | 2377 | spin_lock(&rq->lock); |
2378 | /* | 2378 | /* |
2379 | * The task was running during this tick - update the | 2379 | * The task was running during this tick - update the |
2380 | * time slice counter. Note: we do not update a thread's | 2380 | * time slice counter. Note: we do not update a thread's |
2381 | * priority until it either goes to sleep or uses up its | 2381 | * priority until it either goes to sleep or uses up its |
2382 | * timeslice. This makes it possible for interactive tasks | 2382 | * timeslice. This makes it possible for interactive tasks |
2383 | * to use up their timeslices at their highest priority levels. | 2383 | * to use up their timeslices at their highest priority levels. |
2384 | */ | 2384 | */ |
2385 | if (rt_task(p)) { | 2385 | if (rt_task(p)) { |
2386 | /* | 2386 | /* |
2387 | * RR tasks need a special form of timeslice management. | 2387 | * RR tasks need a special form of timeslice management. |
2388 | * FIFO tasks have no timeslices. | 2388 | * FIFO tasks have no timeslices. |
2389 | */ | 2389 | */ |
2390 | if ((p->policy == SCHED_RR) && !--p->time_slice) { | 2390 | if ((p->policy == SCHED_RR) && !--p->time_slice) { |
2391 | p->time_slice = task_timeslice(p); | 2391 | p->time_slice = task_timeslice(p); |
2392 | p->first_time_slice = 0; | 2392 | p->first_time_slice = 0; |
2393 | set_tsk_need_resched(p); | 2393 | set_tsk_need_resched(p); |
2394 | 2394 | ||
2395 | /* put it at the end of the queue: */ | 2395 | /* put it at the end of the queue: */ |
2396 | requeue_task(p, rq->active); | 2396 | requeue_task(p, rq->active); |
2397 | } | 2397 | } |
2398 | goto out_unlock; | 2398 | goto out_unlock; |
2399 | } | 2399 | } |
2400 | if (!--p->time_slice) { | 2400 | if (!--p->time_slice) { |
2401 | dequeue_task(p, rq->active); | 2401 | dequeue_task(p, rq->active); |
2402 | set_tsk_need_resched(p); | 2402 | set_tsk_need_resched(p); |
2403 | p->prio = effective_prio(p); | 2403 | p->prio = effective_prio(p); |
2404 | p->time_slice = task_timeslice(p); | 2404 | p->time_slice = task_timeslice(p); |
2405 | p->first_time_slice = 0; | 2405 | p->first_time_slice = 0; |
2406 | 2406 | ||
2407 | if (!rq->expired_timestamp) | 2407 | if (!rq->expired_timestamp) |
2408 | rq->expired_timestamp = jiffies; | 2408 | rq->expired_timestamp = jiffies; |
2409 | if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { | 2409 | if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { |
2410 | enqueue_task(p, rq->expired); | 2410 | enqueue_task(p, rq->expired); |
2411 | if (p->static_prio < rq->best_expired_prio) | 2411 | if (p->static_prio < rq->best_expired_prio) |
2412 | rq->best_expired_prio = p->static_prio; | 2412 | rq->best_expired_prio = p->static_prio; |
2413 | } else | 2413 | } else |
2414 | enqueue_task(p, rq->active); | 2414 | enqueue_task(p, rq->active); |
2415 | } else { | 2415 | } else { |
2416 | /* | 2416 | /* |
2417 | * Prevent a too long timeslice allowing a task to monopolize | 2417 | * Prevent a too long timeslice allowing a task to monopolize |
2418 | * the CPU. We do this by splitting up the timeslice into | 2418 | * the CPU. We do this by splitting up the timeslice into |
2419 | * smaller pieces. | 2419 | * smaller pieces. |
2420 | * | 2420 | * |
2421 | * Note: this does not mean the task's timeslices expire or | 2421 | * Note: this does not mean the task's timeslices expire or |
2422 | * get lost in any way, they just might be preempted by | 2422 | * get lost in any way, they just might be preempted by |
2423 | * another task of equal priority. (one with higher | 2423 | * another task of equal priority. (one with higher |
2424 | * priority would have preempted this task already.) We | 2424 | * priority would have preempted this task already.) We |
2425 | * requeue this task to the end of the list on this priority | 2425 | * requeue this task to the end of the list on this priority |
2426 | * level, which is in essence a round-robin of tasks with | 2426 | * level, which is in essence a round-robin of tasks with |
2427 | * equal priority. | 2427 | * equal priority. |
2428 | * | 2428 | * |
2429 | * This only applies to tasks in the interactive | 2429 | * This only applies to tasks in the interactive |
2430 | * delta range with at least TIMESLICE_GRANULARITY to requeue. | 2430 | * delta range with at least TIMESLICE_GRANULARITY to requeue. |
2431 | */ | 2431 | */ |
2432 | if (TASK_INTERACTIVE(p) && !((task_timeslice(p) - | 2432 | if (TASK_INTERACTIVE(p) && !((task_timeslice(p) - |
2433 | p->time_slice) % TIMESLICE_GRANULARITY(p)) && | 2433 | p->time_slice) % TIMESLICE_GRANULARITY(p)) && |
2434 | (p->time_slice >= TIMESLICE_GRANULARITY(p)) && | 2434 | (p->time_slice >= TIMESLICE_GRANULARITY(p)) && |
2435 | (p->array == rq->active)) { | 2435 | (p->array == rq->active)) { |
2436 | 2436 | ||
2437 | requeue_task(p, rq->active); | 2437 | requeue_task(p, rq->active); |
2438 | set_tsk_need_resched(p); | 2438 | set_tsk_need_resched(p); |
2439 | } | 2439 | } |
2440 | } | 2440 | } |
2441 | out_unlock: | 2441 | out_unlock: |
2442 | spin_unlock(&rq->lock); | 2442 | spin_unlock(&rq->lock); |
2443 | out: | 2443 | out: |
2444 | rebalance_tick(cpu, rq, NOT_IDLE); | 2444 | rebalance_tick(cpu, rq, NOT_IDLE); |
2445 | } | 2445 | } |
2446 | 2446 | ||
2447 | #ifdef CONFIG_SCHED_SMT | 2447 | #ifdef CONFIG_SCHED_SMT |
2448 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | 2448 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) |
2449 | { | 2449 | { |
2450 | struct sched_domain *sd = this_rq->sd; | 2450 | struct sched_domain *sd = this_rq->sd; |
2451 | cpumask_t sibling_map; | 2451 | cpumask_t sibling_map; |
2452 | int i; | 2452 | int i; |
2453 | 2453 | ||
2454 | if (!(sd->flags & SD_SHARE_CPUPOWER)) | 2454 | if (!(sd->flags & SD_SHARE_CPUPOWER)) |
2455 | return; | 2455 | return; |
2456 | 2456 | ||
2457 | /* | 2457 | /* |
2458 | * Unlock the current runqueue because we have to lock in | 2458 | * Unlock the current runqueue because we have to lock in |
2459 | * CPU order to avoid deadlocks. Caller knows that we might | 2459 | * CPU order to avoid deadlocks. Caller knows that we might |
2460 | * unlock. We keep IRQs disabled. | 2460 | * unlock. We keep IRQs disabled. |
2461 | */ | 2461 | */ |
2462 | spin_unlock(&this_rq->lock); | 2462 | spin_unlock(&this_rq->lock); |
2463 | 2463 | ||
2464 | sibling_map = sd->span; | 2464 | sibling_map = sd->span; |
2465 | 2465 | ||
2466 | for_each_cpu_mask(i, sibling_map) | 2466 | for_each_cpu_mask(i, sibling_map) |
2467 | spin_lock(&cpu_rq(i)->lock); | 2467 | spin_lock(&cpu_rq(i)->lock); |
2468 | /* | 2468 | /* |
2469 | * We clear this CPU from the mask. This both simplifies the | 2469 | * We clear this CPU from the mask. This both simplifies the |
2470 | * inner loop and keps this_rq locked when we exit: | 2470 | * inner loop and keps this_rq locked when we exit: |
2471 | */ | 2471 | */ |
2472 | cpu_clear(this_cpu, sibling_map); | 2472 | cpu_clear(this_cpu, sibling_map); |
2473 | 2473 | ||
2474 | for_each_cpu_mask(i, sibling_map) { | 2474 | for_each_cpu_mask(i, sibling_map) { |
2475 | runqueue_t *smt_rq = cpu_rq(i); | 2475 | runqueue_t *smt_rq = cpu_rq(i); |
2476 | 2476 | ||
2477 | /* | 2477 | /* |
2478 | * If an SMT sibling task is sleeping due to priority | 2478 | * If an SMT sibling task is sleeping due to priority |
2479 | * reasons wake it up now. | 2479 | * reasons wake it up now. |
2480 | */ | 2480 | */ |
2481 | if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running) | 2481 | if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running) |
2482 | resched_task(smt_rq->idle); | 2482 | resched_task(smt_rq->idle); |
2483 | } | 2483 | } |
2484 | 2484 | ||
2485 | for_each_cpu_mask(i, sibling_map) | 2485 | for_each_cpu_mask(i, sibling_map) |
2486 | spin_unlock(&cpu_rq(i)->lock); | 2486 | spin_unlock(&cpu_rq(i)->lock); |
2487 | /* | 2487 | /* |
2488 | * We exit with this_cpu's rq still held and IRQs | 2488 | * We exit with this_cpu's rq still held and IRQs |
2489 | * still disabled: | 2489 | * still disabled: |
2490 | */ | 2490 | */ |
2491 | } | 2491 | } |
2492 | 2492 | ||
2493 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | 2493 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) |
2494 | { | 2494 | { |
2495 | struct sched_domain *sd = this_rq->sd; | 2495 | struct sched_domain *sd = this_rq->sd; |
2496 | cpumask_t sibling_map; | 2496 | cpumask_t sibling_map; |
2497 | prio_array_t *array; | 2497 | prio_array_t *array; |
2498 | int ret = 0, i; | 2498 | int ret = 0, i; |
2499 | task_t *p; | 2499 | task_t *p; |
2500 | 2500 | ||
2501 | if (!(sd->flags & SD_SHARE_CPUPOWER)) | 2501 | if (!(sd->flags & SD_SHARE_CPUPOWER)) |
2502 | return 0; | 2502 | return 0; |
2503 | 2503 | ||
2504 | /* | 2504 | /* |
2505 | * The same locking rules and details apply as for | 2505 | * The same locking rules and details apply as for |
2506 | * wake_sleeping_dependent(): | 2506 | * wake_sleeping_dependent(): |
2507 | */ | 2507 | */ |
2508 | spin_unlock(&this_rq->lock); | 2508 | spin_unlock(&this_rq->lock); |
2509 | sibling_map = sd->span; | 2509 | sibling_map = sd->span; |
2510 | for_each_cpu_mask(i, sibling_map) | 2510 | for_each_cpu_mask(i, sibling_map) |
2511 | spin_lock(&cpu_rq(i)->lock); | 2511 | spin_lock(&cpu_rq(i)->lock); |
2512 | cpu_clear(this_cpu, sibling_map); | 2512 | cpu_clear(this_cpu, sibling_map); |
2513 | 2513 | ||
2514 | /* | 2514 | /* |
2515 | * Establish next task to be run - it might have gone away because | 2515 | * Establish next task to be run - it might have gone away because |
2516 | * we released the runqueue lock above: | 2516 | * we released the runqueue lock above: |
2517 | */ | 2517 | */ |
2518 | if (!this_rq->nr_running) | 2518 | if (!this_rq->nr_running) |
2519 | goto out_unlock; | 2519 | goto out_unlock; |
2520 | array = this_rq->active; | 2520 | array = this_rq->active; |
2521 | if (!array->nr_active) | 2521 | if (!array->nr_active) |
2522 | array = this_rq->expired; | 2522 | array = this_rq->expired; |
2523 | BUG_ON(!array->nr_active); | 2523 | BUG_ON(!array->nr_active); |
2524 | 2524 | ||
2525 | p = list_entry(array->queue[sched_find_first_bit(array->bitmap)].next, | 2525 | p = list_entry(array->queue[sched_find_first_bit(array->bitmap)].next, |
2526 | task_t, run_list); | 2526 | task_t, run_list); |
2527 | 2527 | ||
2528 | for_each_cpu_mask(i, sibling_map) { | 2528 | for_each_cpu_mask(i, sibling_map) { |
2529 | runqueue_t *smt_rq = cpu_rq(i); | 2529 | runqueue_t *smt_rq = cpu_rq(i); |
2530 | task_t *smt_curr = smt_rq->curr; | 2530 | task_t *smt_curr = smt_rq->curr; |
2531 | 2531 | ||
2532 | /* | 2532 | /* |
2533 | * If a user task with lower static priority than the | 2533 | * If a user task with lower static priority than the |
2534 | * running task on the SMT sibling is trying to schedule, | 2534 | * running task on the SMT sibling is trying to schedule, |
2535 | * delay it till there is proportionately less timeslice | 2535 | * delay it till there is proportionately less timeslice |
2536 | * left of the sibling task to prevent a lower priority | 2536 | * left of the sibling task to prevent a lower priority |
2537 | * task from using an unfair proportion of the | 2537 | * task from using an unfair proportion of the |
2538 | * physical cpu's resources. -ck | 2538 | * physical cpu's resources. -ck |
2539 | */ | 2539 | */ |
2540 | if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2540 | if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > |
2541 | task_timeslice(p) || rt_task(smt_curr)) && | 2541 | task_timeslice(p) || rt_task(smt_curr)) && |
2542 | p->mm && smt_curr->mm && !rt_task(p)) | 2542 | p->mm && smt_curr->mm && !rt_task(p)) |
2543 | ret = 1; | 2543 | ret = 1; |
2544 | 2544 | ||
2545 | /* | 2545 | /* |
2546 | * Reschedule a lower priority task on the SMT sibling, | 2546 | * Reschedule a lower priority task on the SMT sibling, |
2547 | * or wake it up if it has been put to sleep for priority | 2547 | * or wake it up if it has been put to sleep for priority |
2548 | * reasons. | 2548 | * reasons. |
2549 | */ | 2549 | */ |
2550 | if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > | 2550 | if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > |
2551 | task_timeslice(smt_curr) || rt_task(p)) && | 2551 | task_timeslice(smt_curr) || rt_task(p)) && |
2552 | smt_curr->mm && p->mm && !rt_task(smt_curr)) || | 2552 | smt_curr->mm && p->mm && !rt_task(smt_curr)) || |
2553 | (smt_curr == smt_rq->idle && smt_rq->nr_running)) | 2553 | (smt_curr == smt_rq->idle && smt_rq->nr_running)) |
2554 | resched_task(smt_curr); | 2554 | resched_task(smt_curr); |
2555 | } | 2555 | } |
2556 | out_unlock: | 2556 | out_unlock: |
2557 | for_each_cpu_mask(i, sibling_map) | 2557 | for_each_cpu_mask(i, sibling_map) |
2558 | spin_unlock(&cpu_rq(i)->lock); | 2558 | spin_unlock(&cpu_rq(i)->lock); |
2559 | return ret; | 2559 | return ret; |
2560 | } | 2560 | } |
2561 | #else | 2561 | #else |
2562 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) | 2562 | static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) |
2563 | { | 2563 | { |
2564 | } | 2564 | } |
2565 | 2565 | ||
2566 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) | 2566 | static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) |
2567 | { | 2567 | { |
2568 | return 0; | 2568 | return 0; |
2569 | } | 2569 | } |
2570 | #endif | 2570 | #endif |
2571 | 2571 | ||
2572 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 2572 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) |
2573 | 2573 | ||
2574 | void fastcall add_preempt_count(int val) | 2574 | void fastcall add_preempt_count(int val) |
2575 | { | 2575 | { |
2576 | /* | 2576 | /* |
2577 | * Underflow? | 2577 | * Underflow? |
2578 | */ | 2578 | */ |
2579 | BUG_ON(((int)preempt_count() < 0)); | 2579 | BUG_ON(((int)preempt_count() < 0)); |
2580 | preempt_count() += val; | 2580 | preempt_count() += val; |
2581 | /* | 2581 | /* |
2582 | * Spinlock count overflowing soon? | 2582 | * Spinlock count overflowing soon? |
2583 | */ | 2583 | */ |
2584 | BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); | 2584 | BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); |
2585 | } | 2585 | } |
2586 | EXPORT_SYMBOL(add_preempt_count); | 2586 | EXPORT_SYMBOL(add_preempt_count); |
2587 | 2587 | ||
2588 | void fastcall sub_preempt_count(int val) | 2588 | void fastcall sub_preempt_count(int val) |
2589 | { | 2589 | { |
2590 | /* | 2590 | /* |
2591 | * Underflow? | 2591 | * Underflow? |
2592 | */ | 2592 | */ |
2593 | BUG_ON(val > preempt_count()); | 2593 | BUG_ON(val > preempt_count()); |
2594 | /* | 2594 | /* |
2595 | * Is the spinlock portion underflowing? | 2595 | * Is the spinlock portion underflowing? |
2596 | */ | 2596 | */ |
2597 | BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); | 2597 | BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); |
2598 | preempt_count() -= val; | 2598 | preempt_count() -= val; |
2599 | } | 2599 | } |
2600 | EXPORT_SYMBOL(sub_preempt_count); | 2600 | EXPORT_SYMBOL(sub_preempt_count); |
2601 | 2601 | ||
2602 | #endif | 2602 | #endif |
2603 | 2603 | ||
2604 | /* | 2604 | /* |
2605 | * schedule() is the main scheduler function. | 2605 | * schedule() is the main scheduler function. |
2606 | */ | 2606 | */ |
2607 | asmlinkage void __sched schedule(void) | 2607 | asmlinkage void __sched schedule(void) |
2608 | { | 2608 | { |
2609 | long *switch_count; | 2609 | long *switch_count; |
2610 | task_t *prev, *next; | 2610 | task_t *prev, *next; |
2611 | runqueue_t *rq; | 2611 | runqueue_t *rq; |
2612 | prio_array_t *array; | 2612 | prio_array_t *array; |
2613 | struct list_head *queue; | 2613 | struct list_head *queue; |
2614 | unsigned long long now; | 2614 | unsigned long long now; |
2615 | unsigned long run_time; | 2615 | unsigned long run_time; |
2616 | int cpu, idx; | 2616 | int cpu, idx; |
2617 | 2617 | ||
2618 | /* | 2618 | /* |
2619 | * Test if we are atomic. Since do_exit() needs to call into | 2619 | * Test if we are atomic. Since do_exit() needs to call into |
2620 | * schedule() atomically, we ignore that path for now. | 2620 | * schedule() atomically, we ignore that path for now. |
2621 | * Otherwise, whine if we are scheduling when we should not be. | 2621 | * Otherwise, whine if we are scheduling when we should not be. |
2622 | */ | 2622 | */ |
2623 | if (likely(!current->exit_state)) { | 2623 | if (likely(!current->exit_state)) { |
2624 | if (unlikely(in_atomic())) { | 2624 | if (unlikely(in_atomic())) { |
2625 | printk(KERN_ERR "scheduling while atomic: " | 2625 | printk(KERN_ERR "scheduling while atomic: " |
2626 | "%s/0x%08x/%d\n", | 2626 | "%s/0x%08x/%d\n", |
2627 | current->comm, preempt_count(), current->pid); | 2627 | current->comm, preempt_count(), current->pid); |
2628 | dump_stack(); | 2628 | dump_stack(); |
2629 | } | 2629 | } |
2630 | } | 2630 | } |
2631 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 2631 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
2632 | 2632 | ||
2633 | need_resched: | 2633 | need_resched: |
2634 | preempt_disable(); | 2634 | preempt_disable(); |
2635 | prev = current; | 2635 | prev = current; |
2636 | release_kernel_lock(prev); | 2636 | release_kernel_lock(prev); |
2637 | need_resched_nonpreemptible: | 2637 | need_resched_nonpreemptible: |
2638 | rq = this_rq(); | 2638 | rq = this_rq(); |
2639 | 2639 | ||
2640 | /* | 2640 | /* |
2641 | * The idle thread is not allowed to schedule! | 2641 | * The idle thread is not allowed to schedule! |
2642 | * Remove this check after it has been exercised a bit. | 2642 | * Remove this check after it has been exercised a bit. |
2643 | */ | 2643 | */ |
2644 | if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) { | 2644 | if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) { |
2645 | printk(KERN_ERR "bad: scheduling from the idle thread!\n"); | 2645 | printk(KERN_ERR "bad: scheduling from the idle thread!\n"); |
2646 | dump_stack(); | 2646 | dump_stack(); |
2647 | } | 2647 | } |
2648 | 2648 | ||
2649 | schedstat_inc(rq, sched_cnt); | 2649 | schedstat_inc(rq, sched_cnt); |
2650 | now = sched_clock(); | 2650 | now = sched_clock(); |
2651 | if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) { | 2651 | if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) { |
2652 | run_time = now - prev->timestamp; | 2652 | run_time = now - prev->timestamp; |
2653 | if (unlikely((long long)(now - prev->timestamp) < 0)) | 2653 | if (unlikely((long long)(now - prev->timestamp) < 0)) |
2654 | run_time = 0; | 2654 | run_time = 0; |
2655 | } else | 2655 | } else |
2656 | run_time = NS_MAX_SLEEP_AVG; | 2656 | run_time = NS_MAX_SLEEP_AVG; |
2657 | 2657 | ||
2658 | /* | 2658 | /* |
2659 | * Tasks charged proportionately less run_time at high sleep_avg to | 2659 | * Tasks charged proportionately less run_time at high sleep_avg to |
2660 | * delay them losing their interactive status | 2660 | * delay them losing their interactive status |
2661 | */ | 2661 | */ |
2662 | run_time /= (CURRENT_BONUS(prev) ? : 1); | 2662 | run_time /= (CURRENT_BONUS(prev) ? : 1); |
2663 | 2663 | ||
2664 | spin_lock_irq(&rq->lock); | 2664 | spin_lock_irq(&rq->lock); |
2665 | 2665 | ||
2666 | if (unlikely(prev->flags & PF_DEAD)) | 2666 | if (unlikely(prev->flags & PF_DEAD)) |
2667 | prev->state = EXIT_DEAD; | 2667 | prev->state = EXIT_DEAD; |
2668 | 2668 | ||
2669 | switch_count = &prev->nivcsw; | 2669 | switch_count = &prev->nivcsw; |
2670 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 2670 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
2671 | switch_count = &prev->nvcsw; | 2671 | switch_count = &prev->nvcsw; |
2672 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | 2672 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && |
2673 | unlikely(signal_pending(prev)))) | 2673 | unlikely(signal_pending(prev)))) |
2674 | prev->state = TASK_RUNNING; | 2674 | prev->state = TASK_RUNNING; |
2675 | else { | 2675 | else { |
2676 | if (prev->state == TASK_UNINTERRUPTIBLE) | 2676 | if (prev->state == TASK_UNINTERRUPTIBLE) |
2677 | rq->nr_uninterruptible++; | 2677 | rq->nr_uninterruptible++; |
2678 | deactivate_task(prev, rq); | 2678 | deactivate_task(prev, rq); |
2679 | } | 2679 | } |
2680 | } | 2680 | } |
2681 | 2681 | ||
2682 | cpu = smp_processor_id(); | 2682 | cpu = smp_processor_id(); |
2683 | if (unlikely(!rq->nr_running)) { | 2683 | if (unlikely(!rq->nr_running)) { |
2684 | go_idle: | 2684 | go_idle: |
2685 | idle_balance(cpu, rq); | 2685 | idle_balance(cpu, rq); |
2686 | if (!rq->nr_running) { | 2686 | if (!rq->nr_running) { |
2687 | next = rq->idle; | 2687 | next = rq->idle; |
2688 | rq->expired_timestamp = 0; | 2688 | rq->expired_timestamp = 0; |
2689 | wake_sleeping_dependent(cpu, rq); | 2689 | wake_sleeping_dependent(cpu, rq); |
2690 | /* | 2690 | /* |
2691 | * wake_sleeping_dependent() might have released | 2691 | * wake_sleeping_dependent() might have released |
2692 | * the runqueue, so break out if we got new | 2692 | * the runqueue, so break out if we got new |
2693 | * tasks meanwhile: | 2693 | * tasks meanwhile: |
2694 | */ | 2694 | */ |
2695 | if (!rq->nr_running) | 2695 | if (!rq->nr_running) |
2696 | goto switch_tasks; | 2696 | goto switch_tasks; |
2697 | } | 2697 | } |
2698 | } else { | 2698 | } else { |
2699 | if (dependent_sleeper(cpu, rq)) { | 2699 | if (dependent_sleeper(cpu, rq)) { |
2700 | next = rq->idle; | 2700 | next = rq->idle; |
2701 | goto switch_tasks; | 2701 | goto switch_tasks; |
2702 | } | 2702 | } |
2703 | /* | 2703 | /* |
2704 | * dependent_sleeper() releases and reacquires the runqueue | 2704 | * dependent_sleeper() releases and reacquires the runqueue |
2705 | * lock, hence go into the idle loop if the rq went | 2705 | * lock, hence go into the idle loop if the rq went |
2706 | * empty meanwhile: | 2706 | * empty meanwhile: |
2707 | */ | 2707 | */ |
2708 | if (unlikely(!rq->nr_running)) | 2708 | if (unlikely(!rq->nr_running)) |
2709 | goto go_idle; | 2709 | goto go_idle; |
2710 | } | 2710 | } |
2711 | 2711 | ||
2712 | array = rq->active; | 2712 | array = rq->active; |
2713 | if (unlikely(!array->nr_active)) { | 2713 | if (unlikely(!array->nr_active)) { |
2714 | /* | 2714 | /* |
2715 | * Switch the active and expired arrays. | 2715 | * Switch the active and expired arrays. |
2716 | */ | 2716 | */ |
2717 | schedstat_inc(rq, sched_switch); | 2717 | schedstat_inc(rq, sched_switch); |
2718 | rq->active = rq->expired; | 2718 | rq->active = rq->expired; |
2719 | rq->expired = array; | 2719 | rq->expired = array; |
2720 | array = rq->active; | 2720 | array = rq->active; |
2721 | rq->expired_timestamp = 0; | 2721 | rq->expired_timestamp = 0; |
2722 | rq->best_expired_prio = MAX_PRIO; | 2722 | rq->best_expired_prio = MAX_PRIO; |
2723 | } | 2723 | } |
2724 | 2724 | ||
2725 | idx = sched_find_first_bit(array->bitmap); | 2725 | idx = sched_find_first_bit(array->bitmap); |
2726 | queue = array->queue + idx; | 2726 | queue = array->queue + idx; |
2727 | next = list_entry(queue->next, task_t, run_list); | 2727 | next = list_entry(queue->next, task_t, run_list); |
2728 | 2728 | ||
2729 | if (!rt_task(next) && next->activated > 0) { | 2729 | if (!rt_task(next) && next->activated > 0) { |
2730 | unsigned long long delta = now - next->timestamp; | 2730 | unsigned long long delta = now - next->timestamp; |
2731 | if (unlikely((long long)(now - next->timestamp) < 0)) | 2731 | if (unlikely((long long)(now - next->timestamp) < 0)) |
2732 | delta = 0; | 2732 | delta = 0; |
2733 | 2733 | ||
2734 | if (next->activated == 1) | 2734 | if (next->activated == 1) |
2735 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; | 2735 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; |
2736 | 2736 | ||
2737 | array = next->array; | 2737 | array = next->array; |
2738 | dequeue_task(next, array); | 2738 | dequeue_task(next, array); |
2739 | recalc_task_prio(next, next->timestamp + delta); | 2739 | recalc_task_prio(next, next->timestamp + delta); |
2740 | enqueue_task(next, array); | 2740 | enqueue_task(next, array); |
2741 | } | 2741 | } |
2742 | next->activated = 0; | 2742 | next->activated = 0; |
2743 | switch_tasks: | 2743 | switch_tasks: |
2744 | if (next == rq->idle) | 2744 | if (next == rq->idle) |
2745 | schedstat_inc(rq, sched_goidle); | 2745 | schedstat_inc(rq, sched_goidle); |
2746 | prefetch(next); | 2746 | prefetch(next); |
2747 | clear_tsk_need_resched(prev); | 2747 | clear_tsk_need_resched(prev); |
2748 | rcu_qsctr_inc(task_cpu(prev)); | 2748 | rcu_qsctr_inc(task_cpu(prev)); |
2749 | 2749 | ||
2750 | update_cpu_clock(prev, rq, now); | 2750 | update_cpu_clock(prev, rq, now); |
2751 | 2751 | ||
2752 | prev->sleep_avg -= run_time; | 2752 | prev->sleep_avg -= run_time; |
2753 | if ((long)prev->sleep_avg <= 0) | 2753 | if ((long)prev->sleep_avg <= 0) |
2754 | prev->sleep_avg = 0; | 2754 | prev->sleep_avg = 0; |
2755 | prev->timestamp = prev->last_ran = now; | 2755 | prev->timestamp = prev->last_ran = now; |
2756 | 2756 | ||
2757 | sched_info_switch(prev, next); | 2757 | sched_info_switch(prev, next); |
2758 | if (likely(prev != next)) { | 2758 | if (likely(prev != next)) { |
2759 | next->timestamp = now; | 2759 | next->timestamp = now; |
2760 | rq->nr_switches++; | 2760 | rq->nr_switches++; |
2761 | rq->curr = next; | 2761 | rq->curr = next; |
2762 | ++*switch_count; | 2762 | ++*switch_count; |
2763 | 2763 | ||
2764 | prepare_arch_switch(rq, next); | 2764 | prepare_arch_switch(rq, next); |
2765 | prev = context_switch(rq, prev, next); | 2765 | prev = context_switch(rq, prev, next); |
2766 | barrier(); | 2766 | barrier(); |
2767 | 2767 | ||
2768 | finish_task_switch(prev); | 2768 | finish_task_switch(prev); |
2769 | } else | 2769 | } else |
2770 | spin_unlock_irq(&rq->lock); | 2770 | spin_unlock_irq(&rq->lock); |
2771 | 2771 | ||
2772 | prev = current; | 2772 | prev = current; |
2773 | if (unlikely(reacquire_kernel_lock(prev) < 0)) | 2773 | if (unlikely(reacquire_kernel_lock(prev) < 0)) |
2774 | goto need_resched_nonpreemptible; | 2774 | goto need_resched_nonpreemptible; |
2775 | preempt_enable_no_resched(); | 2775 | preempt_enable_no_resched(); |
2776 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 2776 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) |
2777 | goto need_resched; | 2777 | goto need_resched; |
2778 | } | 2778 | } |
2779 | 2779 | ||
2780 | EXPORT_SYMBOL(schedule); | 2780 | EXPORT_SYMBOL(schedule); |
2781 | 2781 | ||
2782 | #ifdef CONFIG_PREEMPT | 2782 | #ifdef CONFIG_PREEMPT |
2783 | /* | 2783 | /* |
2784 | * this is is the entry point to schedule() from in-kernel preemption | 2784 | * this is is the entry point to schedule() from in-kernel preemption |
2785 | * off of preempt_enable. Kernel preemptions off return from interrupt | 2785 | * off of preempt_enable. Kernel preemptions off return from interrupt |
2786 | * occur there and call schedule directly. | 2786 | * occur there and call schedule directly. |
2787 | */ | 2787 | */ |
2788 | asmlinkage void __sched preempt_schedule(void) | 2788 | asmlinkage void __sched preempt_schedule(void) |
2789 | { | 2789 | { |
2790 | struct thread_info *ti = current_thread_info(); | 2790 | struct thread_info *ti = current_thread_info(); |
2791 | #ifdef CONFIG_PREEMPT_BKL | 2791 | #ifdef CONFIG_PREEMPT_BKL |
2792 | struct task_struct *task = current; | 2792 | struct task_struct *task = current; |
2793 | int saved_lock_depth; | 2793 | int saved_lock_depth; |
2794 | #endif | 2794 | #endif |
2795 | /* | 2795 | /* |
2796 | * If there is a non-zero preempt_count or interrupts are disabled, | 2796 | * If there is a non-zero preempt_count or interrupts are disabled, |
2797 | * we do not want to preempt the current task. Just return.. | 2797 | * we do not want to preempt the current task. Just return.. |
2798 | */ | 2798 | */ |
2799 | if (unlikely(ti->preempt_count || irqs_disabled())) | 2799 | if (unlikely(ti->preempt_count || irqs_disabled())) |
2800 | return; | 2800 | return; |
2801 | 2801 | ||
2802 | need_resched: | 2802 | need_resched: |
2803 | add_preempt_count(PREEMPT_ACTIVE); | 2803 | add_preempt_count(PREEMPT_ACTIVE); |
2804 | /* | 2804 | /* |
2805 | * We keep the big kernel semaphore locked, but we | 2805 | * We keep the big kernel semaphore locked, but we |
2806 | * clear ->lock_depth so that schedule() doesnt | 2806 | * clear ->lock_depth so that schedule() doesnt |
2807 | * auto-release the semaphore: | 2807 | * auto-release the semaphore: |
2808 | */ | 2808 | */ |
2809 | #ifdef CONFIG_PREEMPT_BKL | 2809 | #ifdef CONFIG_PREEMPT_BKL |
2810 | saved_lock_depth = task->lock_depth; | 2810 | saved_lock_depth = task->lock_depth; |
2811 | task->lock_depth = -1; | 2811 | task->lock_depth = -1; |
2812 | #endif | 2812 | #endif |
2813 | schedule(); | 2813 | schedule(); |
2814 | #ifdef CONFIG_PREEMPT_BKL | 2814 | #ifdef CONFIG_PREEMPT_BKL |
2815 | task->lock_depth = saved_lock_depth; | 2815 | task->lock_depth = saved_lock_depth; |
2816 | #endif | 2816 | #endif |
2817 | sub_preempt_count(PREEMPT_ACTIVE); | 2817 | sub_preempt_count(PREEMPT_ACTIVE); |
2818 | 2818 | ||
2819 | /* we could miss a preemption opportunity between schedule and now */ | 2819 | /* we could miss a preemption opportunity between schedule and now */ |
2820 | barrier(); | 2820 | barrier(); |
2821 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 2821 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) |
2822 | goto need_resched; | 2822 | goto need_resched; |
2823 | } | 2823 | } |
2824 | 2824 | ||
2825 | EXPORT_SYMBOL(preempt_schedule); | 2825 | EXPORT_SYMBOL(preempt_schedule); |
2826 | 2826 | ||
2827 | /* | 2827 | /* |
2828 | * this is is the entry point to schedule() from kernel preemption | 2828 | * this is is the entry point to schedule() from kernel preemption |
2829 | * off of irq context. | 2829 | * off of irq context. |
2830 | * Note, that this is called and return with irqs disabled. This will | 2830 | * Note, that this is called and return with irqs disabled. This will |
2831 | * protect us against recursive calling from irq. | 2831 | * protect us against recursive calling from irq. |
2832 | */ | 2832 | */ |
2833 | asmlinkage void __sched preempt_schedule_irq(void) | 2833 | asmlinkage void __sched preempt_schedule_irq(void) |
2834 | { | 2834 | { |
2835 | struct thread_info *ti = current_thread_info(); | 2835 | struct thread_info *ti = current_thread_info(); |
2836 | #ifdef CONFIG_PREEMPT_BKL | 2836 | #ifdef CONFIG_PREEMPT_BKL |
2837 | struct task_struct *task = current; | 2837 | struct task_struct *task = current; |
2838 | int saved_lock_depth; | 2838 | int saved_lock_depth; |
2839 | #endif | 2839 | #endif |
2840 | /* Catch callers which need to be fixed*/ | 2840 | /* Catch callers which need to be fixed*/ |
2841 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 2841 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
2842 | 2842 | ||
2843 | need_resched: | 2843 | need_resched: |
2844 | add_preempt_count(PREEMPT_ACTIVE); | 2844 | add_preempt_count(PREEMPT_ACTIVE); |
2845 | /* | 2845 | /* |
2846 | * We keep the big kernel semaphore locked, but we | 2846 | * We keep the big kernel semaphore locked, but we |
2847 | * clear ->lock_depth so that schedule() doesnt | 2847 | * clear ->lock_depth so that schedule() doesnt |
2848 | * auto-release the semaphore: | 2848 | * auto-release the semaphore: |
2849 | */ | 2849 | */ |
2850 | #ifdef CONFIG_PREEMPT_BKL | 2850 | #ifdef CONFIG_PREEMPT_BKL |
2851 | saved_lock_depth = task->lock_depth; | 2851 | saved_lock_depth = task->lock_depth; |
2852 | task->lock_depth = -1; | 2852 | task->lock_depth = -1; |
2853 | #endif | 2853 | #endif |
2854 | local_irq_enable(); | 2854 | local_irq_enable(); |
2855 | schedule(); | 2855 | schedule(); |
2856 | local_irq_disable(); | 2856 | local_irq_disable(); |
2857 | #ifdef CONFIG_PREEMPT_BKL | 2857 | #ifdef CONFIG_PREEMPT_BKL |
2858 | task->lock_depth = saved_lock_depth; | 2858 | task->lock_depth = saved_lock_depth; |
2859 | #endif | 2859 | #endif |
2860 | sub_preempt_count(PREEMPT_ACTIVE); | 2860 | sub_preempt_count(PREEMPT_ACTIVE); |
2861 | 2861 | ||
2862 | /* we could miss a preemption opportunity between schedule and now */ | 2862 | /* we could miss a preemption opportunity between schedule and now */ |
2863 | barrier(); | 2863 | barrier(); |
2864 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 2864 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) |
2865 | goto need_resched; | 2865 | goto need_resched; |
2866 | } | 2866 | } |
2867 | 2867 | ||
2868 | #endif /* CONFIG_PREEMPT */ | 2868 | #endif /* CONFIG_PREEMPT */ |
2869 | 2869 | ||
2870 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) | 2870 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) |
2871 | { | 2871 | { |
2872 | task_t *p = curr->task; | 2872 | task_t *p = curr->task; |
2873 | return try_to_wake_up(p, mode, sync); | 2873 | return try_to_wake_up(p, mode, sync); |
2874 | } | 2874 | } |
2875 | 2875 | ||
2876 | EXPORT_SYMBOL(default_wake_function); | 2876 | EXPORT_SYMBOL(default_wake_function); |
2877 | 2877 | ||
2878 | /* | 2878 | /* |
2879 | * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just | 2879 | * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just |
2880 | * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve | 2880 | * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve |
2881 | * number) then we wake all the non-exclusive tasks and one exclusive task. | 2881 | * number) then we wake all the non-exclusive tasks and one exclusive task. |
2882 | * | 2882 | * |
2883 | * There are circumstances in which we can try to wake a task which has already | 2883 | * There are circumstances in which we can try to wake a task which has already |
2884 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 2884 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
2885 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 2885 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
2886 | */ | 2886 | */ |
2887 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 2887 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
2888 | int nr_exclusive, int sync, void *key) | 2888 | int nr_exclusive, int sync, void *key) |
2889 | { | 2889 | { |
2890 | struct list_head *tmp, *next; | 2890 | struct list_head *tmp, *next; |
2891 | 2891 | ||
2892 | list_for_each_safe(tmp, next, &q->task_list) { | 2892 | list_for_each_safe(tmp, next, &q->task_list) { |
2893 | wait_queue_t *curr; | 2893 | wait_queue_t *curr; |
2894 | unsigned flags; | 2894 | unsigned flags; |
2895 | curr = list_entry(tmp, wait_queue_t, task_list); | 2895 | curr = list_entry(tmp, wait_queue_t, task_list); |
2896 | flags = curr->flags; | 2896 | flags = curr->flags; |
2897 | if (curr->func(curr, mode, sync, key) && | 2897 | if (curr->func(curr, mode, sync, key) && |
2898 | (flags & WQ_FLAG_EXCLUSIVE) && | 2898 | (flags & WQ_FLAG_EXCLUSIVE) && |
2899 | !--nr_exclusive) | 2899 | !--nr_exclusive) |
2900 | break; | 2900 | break; |
2901 | } | 2901 | } |
2902 | } | 2902 | } |
2903 | 2903 | ||
2904 | /** | 2904 | /** |
2905 | * __wake_up - wake up threads blocked on a waitqueue. | 2905 | * __wake_up - wake up threads blocked on a waitqueue. |
2906 | * @q: the waitqueue | 2906 | * @q: the waitqueue |
2907 | * @mode: which threads | 2907 | * @mode: which threads |
2908 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 2908 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
2909 | * @key: is directly passed to the wakeup function | 2909 | * @key: is directly passed to the wakeup function |
2910 | */ | 2910 | */ |
2911 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, | 2911 | void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, |
2912 | int nr_exclusive, void *key) | 2912 | int nr_exclusive, void *key) |
2913 | { | 2913 | { |
2914 | unsigned long flags; | 2914 | unsigned long flags; |
2915 | 2915 | ||
2916 | spin_lock_irqsave(&q->lock, flags); | 2916 | spin_lock_irqsave(&q->lock, flags); |
2917 | __wake_up_common(q, mode, nr_exclusive, 0, key); | 2917 | __wake_up_common(q, mode, nr_exclusive, 0, key); |
2918 | spin_unlock_irqrestore(&q->lock, flags); | 2918 | spin_unlock_irqrestore(&q->lock, flags); |
2919 | } | 2919 | } |
2920 | 2920 | ||
2921 | EXPORT_SYMBOL(__wake_up); | 2921 | EXPORT_SYMBOL(__wake_up); |
2922 | 2922 | ||
2923 | /* | 2923 | /* |
2924 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. | 2924 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. |
2925 | */ | 2925 | */ |
2926 | void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | 2926 | void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) |
2927 | { | 2927 | { |
2928 | __wake_up_common(q, mode, 1, 0, NULL); | 2928 | __wake_up_common(q, mode, 1, 0, NULL); |
2929 | } | 2929 | } |
2930 | 2930 | ||
2931 | /** | 2931 | /** |
2932 | * __wake_up_sync - wake up threads blocked on a waitqueue. | 2932 | * __wake_up_sync - wake up threads blocked on a waitqueue. |
2933 | * @q: the waitqueue | 2933 | * @q: the waitqueue |
2934 | * @mode: which threads | 2934 | * @mode: which threads |
2935 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 2935 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
2936 | * | 2936 | * |
2937 | * The sync wakeup differs that the waker knows that it will schedule | 2937 | * The sync wakeup differs that the waker knows that it will schedule |
2938 | * away soon, so while the target thread will be woken up, it will not | 2938 | * away soon, so while the target thread will be woken up, it will not |
2939 | * be migrated to another CPU - ie. the two threads are 'synchronized' | 2939 | * be migrated to another CPU - ie. the two threads are 'synchronized' |
2940 | * with each other. This can prevent needless bouncing between CPUs. | 2940 | * with each other. This can prevent needless bouncing between CPUs. |
2941 | * | 2941 | * |
2942 | * On UP it can prevent extra preemption. | 2942 | * On UP it can prevent extra preemption. |
2943 | */ | 2943 | */ |
2944 | void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) | 2944 | void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) |
2945 | { | 2945 | { |
2946 | unsigned long flags; | 2946 | unsigned long flags; |
2947 | int sync = 1; | 2947 | int sync = 1; |
2948 | 2948 | ||
2949 | if (unlikely(!q)) | 2949 | if (unlikely(!q)) |
2950 | return; | 2950 | return; |
2951 | 2951 | ||
2952 | if (unlikely(!nr_exclusive)) | 2952 | if (unlikely(!nr_exclusive)) |
2953 | sync = 0; | 2953 | sync = 0; |
2954 | 2954 | ||
2955 | spin_lock_irqsave(&q->lock, flags); | 2955 | spin_lock_irqsave(&q->lock, flags); |
2956 | __wake_up_common(q, mode, nr_exclusive, sync, NULL); | 2956 | __wake_up_common(q, mode, nr_exclusive, sync, NULL); |
2957 | spin_unlock_irqrestore(&q->lock, flags); | 2957 | spin_unlock_irqrestore(&q->lock, flags); |
2958 | } | 2958 | } |
2959 | EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | 2959 | EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ |
2960 | 2960 | ||
2961 | void fastcall complete(struct completion *x) | 2961 | void fastcall complete(struct completion *x) |
2962 | { | 2962 | { |
2963 | unsigned long flags; | 2963 | unsigned long flags; |
2964 | 2964 | ||
2965 | spin_lock_irqsave(&x->wait.lock, flags); | 2965 | spin_lock_irqsave(&x->wait.lock, flags); |
2966 | x->done++; | 2966 | x->done++; |
2967 | __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, | 2967 | __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, |
2968 | 1, 0, NULL); | 2968 | 1, 0, NULL); |
2969 | spin_unlock_irqrestore(&x->wait.lock, flags); | 2969 | spin_unlock_irqrestore(&x->wait.lock, flags); |
2970 | } | 2970 | } |
2971 | EXPORT_SYMBOL(complete); | 2971 | EXPORT_SYMBOL(complete); |
2972 | 2972 | ||
2973 | void fastcall complete_all(struct completion *x) | 2973 | void fastcall complete_all(struct completion *x) |
2974 | { | 2974 | { |
2975 | unsigned long flags; | 2975 | unsigned long flags; |
2976 | 2976 | ||
2977 | spin_lock_irqsave(&x->wait.lock, flags); | 2977 | spin_lock_irqsave(&x->wait.lock, flags); |
2978 | x->done += UINT_MAX/2; | 2978 | x->done += UINT_MAX/2; |
2979 | __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, | 2979 | __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, |
2980 | 0, 0, NULL); | 2980 | 0, 0, NULL); |
2981 | spin_unlock_irqrestore(&x->wait.lock, flags); | 2981 | spin_unlock_irqrestore(&x->wait.lock, flags); |
2982 | } | 2982 | } |
2983 | EXPORT_SYMBOL(complete_all); | 2983 | EXPORT_SYMBOL(complete_all); |
2984 | 2984 | ||
2985 | void fastcall __sched wait_for_completion(struct completion *x) | 2985 | void fastcall __sched wait_for_completion(struct completion *x) |
2986 | { | 2986 | { |
2987 | might_sleep(); | 2987 | might_sleep(); |
2988 | spin_lock_irq(&x->wait.lock); | 2988 | spin_lock_irq(&x->wait.lock); |
2989 | if (!x->done) { | 2989 | if (!x->done) { |
2990 | DECLARE_WAITQUEUE(wait, current); | 2990 | DECLARE_WAITQUEUE(wait, current); |
2991 | 2991 | ||
2992 | wait.flags |= WQ_FLAG_EXCLUSIVE; | 2992 | wait.flags |= WQ_FLAG_EXCLUSIVE; |
2993 | __add_wait_queue_tail(&x->wait, &wait); | 2993 | __add_wait_queue_tail(&x->wait, &wait); |
2994 | do { | 2994 | do { |
2995 | __set_current_state(TASK_UNINTERRUPTIBLE); | 2995 | __set_current_state(TASK_UNINTERRUPTIBLE); |
2996 | spin_unlock_irq(&x->wait.lock); | 2996 | spin_unlock_irq(&x->wait.lock); |
2997 | schedule(); | 2997 | schedule(); |
2998 | spin_lock_irq(&x->wait.lock); | 2998 | spin_lock_irq(&x->wait.lock); |
2999 | } while (!x->done); | 2999 | } while (!x->done); |
3000 | __remove_wait_queue(&x->wait, &wait); | 3000 | __remove_wait_queue(&x->wait, &wait); |
3001 | } | 3001 | } |
3002 | x->done--; | 3002 | x->done--; |
3003 | spin_unlock_irq(&x->wait.lock); | 3003 | spin_unlock_irq(&x->wait.lock); |
3004 | } | 3004 | } |
3005 | EXPORT_SYMBOL(wait_for_completion); | 3005 | EXPORT_SYMBOL(wait_for_completion); |
3006 | 3006 | ||
3007 | unsigned long fastcall __sched | 3007 | unsigned long fastcall __sched |
3008 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) | 3008 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) |
3009 | { | 3009 | { |
3010 | might_sleep(); | 3010 | might_sleep(); |
3011 | 3011 | ||
3012 | spin_lock_irq(&x->wait.lock); | 3012 | spin_lock_irq(&x->wait.lock); |
3013 | if (!x->done) { | 3013 | if (!x->done) { |
3014 | DECLARE_WAITQUEUE(wait, current); | 3014 | DECLARE_WAITQUEUE(wait, current); |
3015 | 3015 | ||
3016 | wait.flags |= WQ_FLAG_EXCLUSIVE; | 3016 | wait.flags |= WQ_FLAG_EXCLUSIVE; |
3017 | __add_wait_queue_tail(&x->wait, &wait); | 3017 | __add_wait_queue_tail(&x->wait, &wait); |
3018 | do { | 3018 | do { |
3019 | __set_current_state(TASK_UNINTERRUPTIBLE); | 3019 | __set_current_state(TASK_UNINTERRUPTIBLE); |
3020 | spin_unlock_irq(&x->wait.lock); | 3020 | spin_unlock_irq(&x->wait.lock); |
3021 | timeout = schedule_timeout(timeout); | 3021 | timeout = schedule_timeout(timeout); |
3022 | spin_lock_irq(&x->wait.lock); | 3022 | spin_lock_irq(&x->wait.lock); |
3023 | if (!timeout) { | 3023 | if (!timeout) { |
3024 | __remove_wait_queue(&x->wait, &wait); | 3024 | __remove_wait_queue(&x->wait, &wait); |
3025 | goto out; | 3025 | goto out; |
3026 | } | 3026 | } |
3027 | } while (!x->done); | 3027 | } while (!x->done); |
3028 | __remove_wait_queue(&x->wait, &wait); | 3028 | __remove_wait_queue(&x->wait, &wait); |
3029 | } | 3029 | } |
3030 | x->done--; | 3030 | x->done--; |
3031 | out: | 3031 | out: |
3032 | spin_unlock_irq(&x->wait.lock); | 3032 | spin_unlock_irq(&x->wait.lock); |
3033 | return timeout; | 3033 | return timeout; |
3034 | } | 3034 | } |
3035 | EXPORT_SYMBOL(wait_for_completion_timeout); | 3035 | EXPORT_SYMBOL(wait_for_completion_timeout); |
3036 | 3036 | ||
3037 | int fastcall __sched wait_for_completion_interruptible(struct completion *x) | 3037 | int fastcall __sched wait_for_completion_interruptible(struct completion *x) |
3038 | { | 3038 | { |
3039 | int ret = 0; | 3039 | int ret = 0; |
3040 | 3040 | ||
3041 | might_sleep(); | 3041 | might_sleep(); |
3042 | 3042 | ||
3043 | spin_lock_irq(&x->wait.lock); | 3043 | spin_lock_irq(&x->wait.lock); |
3044 | if (!x->done) { | 3044 | if (!x->done) { |
3045 | DECLARE_WAITQUEUE(wait, current); | 3045 | DECLARE_WAITQUEUE(wait, current); |
3046 | 3046 | ||
3047 | wait.flags |= WQ_FLAG_EXCLUSIVE; | 3047 | wait.flags |= WQ_FLAG_EXCLUSIVE; |
3048 | __add_wait_queue_tail(&x->wait, &wait); | 3048 | __add_wait_queue_tail(&x->wait, &wait); |
3049 | do { | 3049 | do { |
3050 | if (signal_pending(current)) { | 3050 | if (signal_pending(current)) { |
3051 | ret = -ERESTARTSYS; | 3051 | ret = -ERESTARTSYS; |
3052 | __remove_wait_queue(&x->wait, &wait); | 3052 | __remove_wait_queue(&x->wait, &wait); |
3053 | goto out; | 3053 | goto out; |
3054 | } | 3054 | } |
3055 | __set_current_state(TASK_INTERRUPTIBLE); | 3055 | __set_current_state(TASK_INTERRUPTIBLE); |
3056 | spin_unlock_irq(&x->wait.lock); | 3056 | spin_unlock_irq(&x->wait.lock); |
3057 | schedule(); | 3057 | schedule(); |
3058 | spin_lock_irq(&x->wait.lock); | 3058 | spin_lock_irq(&x->wait.lock); |
3059 | } while (!x->done); | 3059 | } while (!x->done); |
3060 | __remove_wait_queue(&x->wait, &wait); | 3060 | __remove_wait_queue(&x->wait, &wait); |
3061 | } | 3061 | } |
3062 | x->done--; | 3062 | x->done--; |
3063 | out: | 3063 | out: |
3064 | spin_unlock_irq(&x->wait.lock); | 3064 | spin_unlock_irq(&x->wait.lock); |
3065 | 3065 | ||
3066 | return ret; | 3066 | return ret; |
3067 | } | 3067 | } |
3068 | EXPORT_SYMBOL(wait_for_completion_interruptible); | 3068 | EXPORT_SYMBOL(wait_for_completion_interruptible); |
3069 | 3069 | ||
3070 | unsigned long fastcall __sched | 3070 | unsigned long fastcall __sched |
3071 | wait_for_completion_interruptible_timeout(struct completion *x, | 3071 | wait_for_completion_interruptible_timeout(struct completion *x, |
3072 | unsigned long timeout) | 3072 | unsigned long timeout) |
3073 | { | 3073 | { |
3074 | might_sleep(); | 3074 | might_sleep(); |
3075 | 3075 | ||
3076 | spin_lock_irq(&x->wait.lock); | 3076 | spin_lock_irq(&x->wait.lock); |
3077 | if (!x->done) { | 3077 | if (!x->done) { |
3078 | DECLARE_WAITQUEUE(wait, current); | 3078 | DECLARE_WAITQUEUE(wait, current); |
3079 | 3079 | ||
3080 | wait.flags |= WQ_FLAG_EXCLUSIVE; | 3080 | wait.flags |= WQ_FLAG_EXCLUSIVE; |
3081 | __add_wait_queue_tail(&x->wait, &wait); | 3081 | __add_wait_queue_tail(&x->wait, &wait); |
3082 | do { | 3082 | do { |
3083 | if (signal_pending(current)) { | 3083 | if (signal_pending(current)) { |
3084 | timeout = -ERESTARTSYS; | 3084 | timeout = -ERESTARTSYS; |
3085 | __remove_wait_queue(&x->wait, &wait); | 3085 | __remove_wait_queue(&x->wait, &wait); |
3086 | goto out; | 3086 | goto out; |
3087 | } | 3087 | } |
3088 | __set_current_state(TASK_INTERRUPTIBLE); | 3088 | __set_current_state(TASK_INTERRUPTIBLE); |
3089 | spin_unlock_irq(&x->wait.lock); | 3089 | spin_unlock_irq(&x->wait.lock); |
3090 | timeout = schedule_timeout(timeout); | 3090 | timeout = schedule_timeout(timeout); |
3091 | spin_lock_irq(&x->wait.lock); | 3091 | spin_lock_irq(&x->wait.lock); |
3092 | if (!timeout) { | 3092 | if (!timeout) { |
3093 | __remove_wait_queue(&x->wait, &wait); | 3093 | __remove_wait_queue(&x->wait, &wait); |
3094 | goto out; | 3094 | goto out; |
3095 | } | 3095 | } |
3096 | } while (!x->done); | 3096 | } while (!x->done); |
3097 | __remove_wait_queue(&x->wait, &wait); | 3097 | __remove_wait_queue(&x->wait, &wait); |
3098 | } | 3098 | } |
3099 | x->done--; | 3099 | x->done--; |
3100 | out: | 3100 | out: |
3101 | spin_unlock_irq(&x->wait.lock); | 3101 | spin_unlock_irq(&x->wait.lock); |
3102 | return timeout; | 3102 | return timeout; |
3103 | } | 3103 | } |
3104 | EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); | 3104 | EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); |
3105 | 3105 | ||
3106 | 3106 | ||
3107 | #define SLEEP_ON_VAR \ | 3107 | #define SLEEP_ON_VAR \ |
3108 | unsigned long flags; \ | 3108 | unsigned long flags; \ |
3109 | wait_queue_t wait; \ | 3109 | wait_queue_t wait; \ |
3110 | init_waitqueue_entry(&wait, current); | 3110 | init_waitqueue_entry(&wait, current); |
3111 | 3111 | ||
3112 | #define SLEEP_ON_HEAD \ | 3112 | #define SLEEP_ON_HEAD \ |
3113 | spin_lock_irqsave(&q->lock,flags); \ | 3113 | spin_lock_irqsave(&q->lock,flags); \ |
3114 | __add_wait_queue(q, &wait); \ | 3114 | __add_wait_queue(q, &wait); \ |
3115 | spin_unlock(&q->lock); | 3115 | spin_unlock(&q->lock); |
3116 | 3116 | ||
3117 | #define SLEEP_ON_TAIL \ | 3117 | #define SLEEP_ON_TAIL \ |
3118 | spin_lock_irq(&q->lock); \ | 3118 | spin_lock_irq(&q->lock); \ |
3119 | __remove_wait_queue(q, &wait); \ | 3119 | __remove_wait_queue(q, &wait); \ |
3120 | spin_unlock_irqrestore(&q->lock, flags); | 3120 | spin_unlock_irqrestore(&q->lock, flags); |
3121 | 3121 | ||
3122 | void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) | 3122 | void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q) |
3123 | { | 3123 | { |
3124 | SLEEP_ON_VAR | 3124 | SLEEP_ON_VAR |
3125 | 3125 | ||
3126 | current->state = TASK_INTERRUPTIBLE; | 3126 | current->state = TASK_INTERRUPTIBLE; |
3127 | 3127 | ||
3128 | SLEEP_ON_HEAD | 3128 | SLEEP_ON_HEAD |
3129 | schedule(); | 3129 | schedule(); |
3130 | SLEEP_ON_TAIL | 3130 | SLEEP_ON_TAIL |
3131 | } | 3131 | } |
3132 | 3132 | ||
3133 | EXPORT_SYMBOL(interruptible_sleep_on); | 3133 | EXPORT_SYMBOL(interruptible_sleep_on); |
3134 | 3134 | ||
3135 | long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3135 | long fastcall __sched interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) |
3136 | { | 3136 | { |
3137 | SLEEP_ON_VAR | 3137 | SLEEP_ON_VAR |
3138 | 3138 | ||
3139 | current->state = TASK_INTERRUPTIBLE; | 3139 | current->state = TASK_INTERRUPTIBLE; |
3140 | 3140 | ||
3141 | SLEEP_ON_HEAD | 3141 | SLEEP_ON_HEAD |
3142 | timeout = schedule_timeout(timeout); | 3142 | timeout = schedule_timeout(timeout); |
3143 | SLEEP_ON_TAIL | 3143 | SLEEP_ON_TAIL |
3144 | 3144 | ||
3145 | return timeout; | 3145 | return timeout; |
3146 | } | 3146 | } |
3147 | 3147 | ||
3148 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); | 3148 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); |
3149 | 3149 | ||
3150 | void fastcall __sched sleep_on(wait_queue_head_t *q) | 3150 | void fastcall __sched sleep_on(wait_queue_head_t *q) |
3151 | { | 3151 | { |
3152 | SLEEP_ON_VAR | 3152 | SLEEP_ON_VAR |
3153 | 3153 | ||
3154 | current->state = TASK_UNINTERRUPTIBLE; | 3154 | current->state = TASK_UNINTERRUPTIBLE; |
3155 | 3155 | ||
3156 | SLEEP_ON_HEAD | 3156 | SLEEP_ON_HEAD |
3157 | schedule(); | 3157 | schedule(); |
3158 | SLEEP_ON_TAIL | 3158 | SLEEP_ON_TAIL |
3159 | } | 3159 | } |
3160 | 3160 | ||
3161 | EXPORT_SYMBOL(sleep_on); | 3161 | EXPORT_SYMBOL(sleep_on); |
3162 | 3162 | ||
3163 | long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3163 | long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) |
3164 | { | 3164 | { |
3165 | SLEEP_ON_VAR | 3165 | SLEEP_ON_VAR |
3166 | 3166 | ||
3167 | current->state = TASK_UNINTERRUPTIBLE; | 3167 | current->state = TASK_UNINTERRUPTIBLE; |
3168 | 3168 | ||
3169 | SLEEP_ON_HEAD | 3169 | SLEEP_ON_HEAD |
3170 | timeout = schedule_timeout(timeout); | 3170 | timeout = schedule_timeout(timeout); |
3171 | SLEEP_ON_TAIL | 3171 | SLEEP_ON_TAIL |
3172 | 3172 | ||
3173 | return timeout; | 3173 | return timeout; |
3174 | } | 3174 | } |
3175 | 3175 | ||
3176 | EXPORT_SYMBOL(sleep_on_timeout); | 3176 | EXPORT_SYMBOL(sleep_on_timeout); |
3177 | 3177 | ||
3178 | void set_user_nice(task_t *p, long nice) | 3178 | void set_user_nice(task_t *p, long nice) |
3179 | { | 3179 | { |
3180 | unsigned long flags; | 3180 | unsigned long flags; |
3181 | prio_array_t *array; | 3181 | prio_array_t *array; |
3182 | runqueue_t *rq; | 3182 | runqueue_t *rq; |
3183 | int old_prio, new_prio, delta; | 3183 | int old_prio, new_prio, delta; |
3184 | 3184 | ||
3185 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) | 3185 | if (TASK_NICE(p) == nice || nice < -20 || nice > 19) |
3186 | return; | 3186 | return; |
3187 | /* | 3187 | /* |
3188 | * We have to be careful, if called from sys_setpriority(), | 3188 | * We have to be careful, if called from sys_setpriority(), |
3189 | * the task might be in the middle of scheduling on another CPU. | 3189 | * the task might be in the middle of scheduling on another CPU. |
3190 | */ | 3190 | */ |
3191 | rq = task_rq_lock(p, &flags); | 3191 | rq = task_rq_lock(p, &flags); |
3192 | /* | 3192 | /* |
3193 | * The RT priorities are set via sched_setscheduler(), but we still | 3193 | * The RT priorities are set via sched_setscheduler(), but we still |
3194 | * allow the 'normal' nice value to be set - but as expected | 3194 | * allow the 'normal' nice value to be set - but as expected |
3195 | * it wont have any effect on scheduling until the task is | 3195 | * it wont have any effect on scheduling until the task is |
3196 | * not SCHED_NORMAL: | 3196 | * not SCHED_NORMAL: |
3197 | */ | 3197 | */ |
3198 | if (rt_task(p)) { | 3198 | if (rt_task(p)) { |
3199 | p->static_prio = NICE_TO_PRIO(nice); | 3199 | p->static_prio = NICE_TO_PRIO(nice); |
3200 | goto out_unlock; | 3200 | goto out_unlock; |
3201 | } | 3201 | } |
3202 | array = p->array; | 3202 | array = p->array; |
3203 | if (array) | 3203 | if (array) |
3204 | dequeue_task(p, array); | 3204 | dequeue_task(p, array); |
3205 | 3205 | ||
3206 | old_prio = p->prio; | 3206 | old_prio = p->prio; |
3207 | new_prio = NICE_TO_PRIO(nice); | 3207 | new_prio = NICE_TO_PRIO(nice); |
3208 | delta = new_prio - old_prio; | 3208 | delta = new_prio - old_prio; |
3209 | p->static_prio = NICE_TO_PRIO(nice); | 3209 | p->static_prio = NICE_TO_PRIO(nice); |
3210 | p->prio += delta; | 3210 | p->prio += delta; |
3211 | 3211 | ||
3212 | if (array) { | 3212 | if (array) { |
3213 | enqueue_task(p, array); | 3213 | enqueue_task(p, array); |
3214 | /* | 3214 | /* |
3215 | * If the task increased its priority or is running and | 3215 | * If the task increased its priority or is running and |
3216 | * lowered its priority, then reschedule its CPU: | 3216 | * lowered its priority, then reschedule its CPU: |
3217 | */ | 3217 | */ |
3218 | if (delta < 0 || (delta > 0 && task_running(rq, p))) | 3218 | if (delta < 0 || (delta > 0 && task_running(rq, p))) |
3219 | resched_task(rq->curr); | 3219 | resched_task(rq->curr); |
3220 | } | 3220 | } |
3221 | out_unlock: | 3221 | out_unlock: |
3222 | task_rq_unlock(rq, &flags); | 3222 | task_rq_unlock(rq, &flags); |
3223 | } | 3223 | } |
3224 | 3224 | ||
3225 | EXPORT_SYMBOL(set_user_nice); | 3225 | EXPORT_SYMBOL(set_user_nice); |
3226 | 3226 | ||
3227 | /* | 3227 | /* |
3228 | * can_nice - check if a task can reduce its nice value | 3228 | * can_nice - check if a task can reduce its nice value |
3229 | * @p: task | 3229 | * @p: task |
3230 | * @nice: nice value | 3230 | * @nice: nice value |
3231 | */ | 3231 | */ |
3232 | int can_nice(const task_t *p, const int nice) | 3232 | int can_nice(const task_t *p, const int nice) |
3233 | { | 3233 | { |
3234 | /* convert nice value [19,-20] to rlimit style value [0,39] */ | 3234 | /* convert nice value [19,-20] to rlimit style value [0,39] */ |
3235 | int nice_rlim = 19 - nice; | 3235 | int nice_rlim = 19 - nice; |
3236 | return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || | 3236 | return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || |
3237 | capable(CAP_SYS_NICE)); | 3237 | capable(CAP_SYS_NICE)); |
3238 | } | 3238 | } |
3239 | 3239 | ||
3240 | #ifdef __ARCH_WANT_SYS_NICE | 3240 | #ifdef __ARCH_WANT_SYS_NICE |
3241 | 3241 | ||
3242 | /* | 3242 | /* |
3243 | * sys_nice - change the priority of the current process. | 3243 | * sys_nice - change the priority of the current process. |
3244 | * @increment: priority increment | 3244 | * @increment: priority increment |
3245 | * | 3245 | * |
3246 | * sys_setpriority is a more generic, but much slower function that | 3246 | * sys_setpriority is a more generic, but much slower function that |
3247 | * does similar things. | 3247 | * does similar things. |
3248 | */ | 3248 | */ |
3249 | asmlinkage long sys_nice(int increment) | 3249 | asmlinkage long sys_nice(int increment) |
3250 | { | 3250 | { |
3251 | int retval; | 3251 | int retval; |
3252 | long nice; | 3252 | long nice; |
3253 | 3253 | ||
3254 | /* | 3254 | /* |
3255 | * Setpriority might change our priority at the same moment. | 3255 | * Setpriority might change our priority at the same moment. |
3256 | * We don't have to worry. Conceptually one call occurs first | 3256 | * We don't have to worry. Conceptually one call occurs first |
3257 | * and we have a single winner. | 3257 | * and we have a single winner. |
3258 | */ | 3258 | */ |
3259 | if (increment < -40) | 3259 | if (increment < -40) |
3260 | increment = -40; | 3260 | increment = -40; |
3261 | if (increment > 40) | 3261 | if (increment > 40) |
3262 | increment = 40; | 3262 | increment = 40; |
3263 | 3263 | ||
3264 | nice = PRIO_TO_NICE(current->static_prio) + increment; | 3264 | nice = PRIO_TO_NICE(current->static_prio) + increment; |
3265 | if (nice < -20) | 3265 | if (nice < -20) |
3266 | nice = -20; | 3266 | nice = -20; |
3267 | if (nice > 19) | 3267 | if (nice > 19) |
3268 | nice = 19; | 3268 | nice = 19; |
3269 | 3269 | ||
3270 | if (increment < 0 && !can_nice(current, nice)) | 3270 | if (increment < 0 && !can_nice(current, nice)) |
3271 | return -EPERM; | 3271 | return -EPERM; |
3272 | 3272 | ||
3273 | retval = security_task_setnice(current, nice); | 3273 | retval = security_task_setnice(current, nice); |
3274 | if (retval) | 3274 | if (retval) |
3275 | return retval; | 3275 | return retval; |
3276 | 3276 | ||
3277 | set_user_nice(current, nice); | 3277 | set_user_nice(current, nice); |
3278 | return 0; | 3278 | return 0; |
3279 | } | 3279 | } |
3280 | 3280 | ||
3281 | #endif | 3281 | #endif |
3282 | 3282 | ||
3283 | /** | 3283 | /** |
3284 | * task_prio - return the priority value of a given task. | 3284 | * task_prio - return the priority value of a given task. |
3285 | * @p: the task in question. | 3285 | * @p: the task in question. |
3286 | * | 3286 | * |
3287 | * This is the priority value as seen by users in /proc. | 3287 | * This is the priority value as seen by users in /proc. |
3288 | * RT tasks are offset by -200. Normal tasks are centered | 3288 | * RT tasks are offset by -200. Normal tasks are centered |
3289 | * around 0, value goes from -16 to +15. | 3289 | * around 0, value goes from -16 to +15. |
3290 | */ | 3290 | */ |
3291 | int task_prio(const task_t *p) | 3291 | int task_prio(const task_t *p) |
3292 | { | 3292 | { |
3293 | return p->prio - MAX_RT_PRIO; | 3293 | return p->prio - MAX_RT_PRIO; |
3294 | } | 3294 | } |
3295 | 3295 | ||
3296 | /** | 3296 | /** |
3297 | * task_nice - return the nice value of a given task. | 3297 | * task_nice - return the nice value of a given task. |
3298 | * @p: the task in question. | 3298 | * @p: the task in question. |
3299 | */ | 3299 | */ |
3300 | int task_nice(const task_t *p) | 3300 | int task_nice(const task_t *p) |
3301 | { | 3301 | { |
3302 | return TASK_NICE(p); | 3302 | return TASK_NICE(p); |
3303 | } | 3303 | } |
3304 | 3304 | ||
3305 | /* | 3305 | /* |
3306 | * The only users of task_nice are binfmt_elf and binfmt_elf32. | 3306 | * The only users of task_nice are binfmt_elf and binfmt_elf32. |
3307 | * binfmt_elf is no longer modular, but binfmt_elf32 still is. | 3307 | * binfmt_elf is no longer modular, but binfmt_elf32 still is. |
3308 | * Therefore, task_nice is needed if there is a compat_mode. | 3308 | * Therefore, task_nice is needed if there is a compat_mode. |
3309 | */ | 3309 | */ |
3310 | #ifdef CONFIG_COMPAT | 3310 | #ifdef CONFIG_COMPAT |
3311 | EXPORT_SYMBOL_GPL(task_nice); | 3311 | EXPORT_SYMBOL_GPL(task_nice); |
3312 | #endif | 3312 | #endif |
3313 | 3313 | ||
3314 | /** | 3314 | /** |
3315 | * idle_cpu - is a given cpu idle currently? | 3315 | * idle_cpu - is a given cpu idle currently? |
3316 | * @cpu: the processor in question. | 3316 | * @cpu: the processor in question. |
3317 | */ | 3317 | */ |
3318 | int idle_cpu(int cpu) | 3318 | int idle_cpu(int cpu) |
3319 | { | 3319 | { |
3320 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; | 3320 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; |
3321 | } | 3321 | } |
3322 | 3322 | ||
3323 | EXPORT_SYMBOL_GPL(idle_cpu); | 3323 | EXPORT_SYMBOL_GPL(idle_cpu); |
3324 | 3324 | ||
3325 | /** | 3325 | /** |
3326 | * idle_task - return the idle task for a given cpu. | 3326 | * idle_task - return the idle task for a given cpu. |
3327 | * @cpu: the processor in question. | 3327 | * @cpu: the processor in question. |
3328 | */ | 3328 | */ |
3329 | task_t *idle_task(int cpu) | 3329 | task_t *idle_task(int cpu) |
3330 | { | 3330 | { |
3331 | return cpu_rq(cpu)->idle; | 3331 | return cpu_rq(cpu)->idle; |
3332 | } | 3332 | } |
3333 | 3333 | ||
3334 | /** | 3334 | /** |
3335 | * find_process_by_pid - find a process with a matching PID value. | 3335 | * find_process_by_pid - find a process with a matching PID value. |
3336 | * @pid: the pid in question. | 3336 | * @pid: the pid in question. |
3337 | */ | 3337 | */ |
3338 | static inline task_t *find_process_by_pid(pid_t pid) | 3338 | static inline task_t *find_process_by_pid(pid_t pid) |
3339 | { | 3339 | { |
3340 | return pid ? find_task_by_pid(pid) : current; | 3340 | return pid ? find_task_by_pid(pid) : current; |
3341 | } | 3341 | } |
3342 | 3342 | ||
3343 | /* Actually do priority change: must hold rq lock. */ | 3343 | /* Actually do priority change: must hold rq lock. */ |
3344 | static void __setscheduler(struct task_struct *p, int policy, int prio) | 3344 | static void __setscheduler(struct task_struct *p, int policy, int prio) |
3345 | { | 3345 | { |
3346 | BUG_ON(p->array); | 3346 | BUG_ON(p->array); |
3347 | p->policy = policy; | 3347 | p->policy = policy; |
3348 | p->rt_priority = prio; | 3348 | p->rt_priority = prio; |
3349 | if (policy != SCHED_NORMAL) | 3349 | if (policy != SCHED_NORMAL) |
3350 | p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; | 3350 | p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; |
3351 | else | 3351 | else |
3352 | p->prio = p->static_prio; | 3352 | p->prio = p->static_prio; |
3353 | } | 3353 | } |
3354 | 3354 | ||
3355 | /** | 3355 | /** |
3356 | * sched_setscheduler - change the scheduling policy and/or RT priority of | 3356 | * sched_setscheduler - change the scheduling policy and/or RT priority of |
3357 | * a thread. | 3357 | * a thread. |
3358 | * @p: the task in question. | 3358 | * @p: the task in question. |
3359 | * @policy: new policy. | 3359 | * @policy: new policy. |
3360 | * @param: structure containing the new RT priority. | 3360 | * @param: structure containing the new RT priority. |
3361 | */ | 3361 | */ |
3362 | int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) | 3362 | int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param) |
3363 | { | 3363 | { |
3364 | int retval; | 3364 | int retval; |
3365 | int oldprio, oldpolicy = -1; | 3365 | int oldprio, oldpolicy = -1; |
3366 | prio_array_t *array; | 3366 | prio_array_t *array; |
3367 | unsigned long flags; | 3367 | unsigned long flags; |
3368 | runqueue_t *rq; | 3368 | runqueue_t *rq; |
3369 | 3369 | ||
3370 | recheck: | 3370 | recheck: |
3371 | /* double check policy once rq lock held */ | 3371 | /* double check policy once rq lock held */ |
3372 | if (policy < 0) | 3372 | if (policy < 0) |
3373 | policy = oldpolicy = p->policy; | 3373 | policy = oldpolicy = p->policy; |
3374 | else if (policy != SCHED_FIFO && policy != SCHED_RR && | 3374 | else if (policy != SCHED_FIFO && policy != SCHED_RR && |
3375 | policy != SCHED_NORMAL) | 3375 | policy != SCHED_NORMAL) |
3376 | return -EINVAL; | 3376 | return -EINVAL; |
3377 | /* | 3377 | /* |
3378 | * Valid priorities for SCHED_FIFO and SCHED_RR are | 3378 | * Valid priorities for SCHED_FIFO and SCHED_RR are |
3379 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. | 3379 | * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. |
3380 | */ | 3380 | */ |
3381 | if (param->sched_priority < 0 || | 3381 | if (param->sched_priority < 0 || |
3382 | param->sched_priority > MAX_USER_RT_PRIO-1) | 3382 | param->sched_priority > MAX_USER_RT_PRIO-1) |
3383 | return -EINVAL; | 3383 | return -EINVAL; |
3384 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) | 3384 | if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) |
3385 | return -EINVAL; | 3385 | return -EINVAL; |
3386 | 3386 | ||
3387 | if ((policy == SCHED_FIFO || policy == SCHED_RR) && | 3387 | if ((policy == SCHED_FIFO || policy == SCHED_RR) && |
3388 | param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur && | 3388 | param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur && |
3389 | !capable(CAP_SYS_NICE)) | 3389 | !capable(CAP_SYS_NICE)) |
3390 | return -EPERM; | 3390 | return -EPERM; |
3391 | if ((current->euid != p->euid) && (current->euid != p->uid) && | 3391 | if ((current->euid != p->euid) && (current->euid != p->uid) && |
3392 | !capable(CAP_SYS_NICE)) | 3392 | !capable(CAP_SYS_NICE)) |
3393 | return -EPERM; | 3393 | return -EPERM; |
3394 | 3394 | ||
3395 | retval = security_task_setscheduler(p, policy, param); | 3395 | retval = security_task_setscheduler(p, policy, param); |
3396 | if (retval) | 3396 | if (retval) |
3397 | return retval; | 3397 | return retval; |
3398 | /* | 3398 | /* |
3399 | * To be able to change p->policy safely, the apropriate | 3399 | * To be able to change p->policy safely, the apropriate |
3400 | * runqueue lock must be held. | 3400 | * runqueue lock must be held. |
3401 | */ | 3401 | */ |
3402 | rq = task_rq_lock(p, &flags); | 3402 | rq = task_rq_lock(p, &flags); |
3403 | /* recheck policy now with rq lock held */ | 3403 | /* recheck policy now with rq lock held */ |
3404 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 3404 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
3405 | policy = oldpolicy = -1; | 3405 | policy = oldpolicy = -1; |
3406 | task_rq_unlock(rq, &flags); | 3406 | task_rq_unlock(rq, &flags); |
3407 | goto recheck; | 3407 | goto recheck; |
3408 | } | 3408 | } |
3409 | array = p->array; | 3409 | array = p->array; |
3410 | if (array) | 3410 | if (array) |
3411 | deactivate_task(p, rq); | 3411 | deactivate_task(p, rq); |
3412 | oldprio = p->prio; | 3412 | oldprio = p->prio; |
3413 | __setscheduler(p, policy, param->sched_priority); | 3413 | __setscheduler(p, policy, param->sched_priority); |
3414 | if (array) { | 3414 | if (array) { |
3415 | __activate_task(p, rq); | 3415 | __activate_task(p, rq); |
3416 | /* | 3416 | /* |
3417 | * Reschedule if we are currently running on this runqueue and | 3417 | * Reschedule if we are currently running on this runqueue and |
3418 | * our priority decreased, or if we are not currently running on | 3418 | * our priority decreased, or if we are not currently running on |
3419 | * this runqueue and our priority is higher than the current's | 3419 | * this runqueue and our priority is higher than the current's |
3420 | */ | 3420 | */ |
3421 | if (task_running(rq, p)) { | 3421 | if (task_running(rq, p)) { |
3422 | if (p->prio > oldprio) | 3422 | if (p->prio > oldprio) |
3423 | resched_task(rq->curr); | 3423 | resched_task(rq->curr); |
3424 | } else if (TASK_PREEMPTS_CURR(p, rq)) | 3424 | } else if (TASK_PREEMPTS_CURR(p, rq)) |
3425 | resched_task(rq->curr); | 3425 | resched_task(rq->curr); |
3426 | } | 3426 | } |
3427 | task_rq_unlock(rq, &flags); | 3427 | task_rq_unlock(rq, &flags); |
3428 | return 0; | 3428 | return 0; |
3429 | } | 3429 | } |
3430 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 3430 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
3431 | 3431 | ||
3432 | static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 3432 | static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) |
3433 | { | 3433 | { |
3434 | int retval; | 3434 | int retval; |
3435 | struct sched_param lparam; | 3435 | struct sched_param lparam; |
3436 | struct task_struct *p; | 3436 | struct task_struct *p; |
3437 | 3437 | ||
3438 | if (!param || pid < 0) | 3438 | if (!param || pid < 0) |
3439 | return -EINVAL; | 3439 | return -EINVAL; |
3440 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) | 3440 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) |
3441 | return -EFAULT; | 3441 | return -EFAULT; |
3442 | read_lock_irq(&tasklist_lock); | 3442 | read_lock_irq(&tasklist_lock); |
3443 | p = find_process_by_pid(pid); | 3443 | p = find_process_by_pid(pid); |
3444 | if (!p) { | 3444 | if (!p) { |
3445 | read_unlock_irq(&tasklist_lock); | 3445 | read_unlock_irq(&tasklist_lock); |
3446 | return -ESRCH; | 3446 | return -ESRCH; |
3447 | } | 3447 | } |
3448 | retval = sched_setscheduler(p, policy, &lparam); | 3448 | retval = sched_setscheduler(p, policy, &lparam); |
3449 | read_unlock_irq(&tasklist_lock); | 3449 | read_unlock_irq(&tasklist_lock); |
3450 | return retval; | 3450 | return retval; |
3451 | } | 3451 | } |
3452 | 3452 | ||
3453 | /** | 3453 | /** |
3454 | * sys_sched_setscheduler - set/change the scheduler policy and RT priority | 3454 | * sys_sched_setscheduler - set/change the scheduler policy and RT priority |
3455 | * @pid: the pid in question. | 3455 | * @pid: the pid in question. |
3456 | * @policy: new policy. | 3456 | * @policy: new policy. |
3457 | * @param: structure containing the new RT priority. | 3457 | * @param: structure containing the new RT priority. |
3458 | */ | 3458 | */ |
3459 | asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, | 3459 | asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, |
3460 | struct sched_param __user *param) | 3460 | struct sched_param __user *param) |
3461 | { | 3461 | { |
3462 | return do_sched_setscheduler(pid, policy, param); | 3462 | return do_sched_setscheduler(pid, policy, param); |
3463 | } | 3463 | } |
3464 | 3464 | ||
3465 | /** | 3465 | /** |
3466 | * sys_sched_setparam - set/change the RT priority of a thread | 3466 | * sys_sched_setparam - set/change the RT priority of a thread |
3467 | * @pid: the pid in question. | 3467 | * @pid: the pid in question. |
3468 | * @param: structure containing the new RT priority. | 3468 | * @param: structure containing the new RT priority. |
3469 | */ | 3469 | */ |
3470 | asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | 3470 | asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) |
3471 | { | 3471 | { |
3472 | return do_sched_setscheduler(pid, -1, param); | 3472 | return do_sched_setscheduler(pid, -1, param); |
3473 | } | 3473 | } |
3474 | 3474 | ||
3475 | /** | 3475 | /** |
3476 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread | 3476 | * sys_sched_getscheduler - get the policy (scheduling class) of a thread |
3477 | * @pid: the pid in question. | 3477 | * @pid: the pid in question. |
3478 | */ | 3478 | */ |
3479 | asmlinkage long sys_sched_getscheduler(pid_t pid) | 3479 | asmlinkage long sys_sched_getscheduler(pid_t pid) |
3480 | { | 3480 | { |
3481 | int retval = -EINVAL; | 3481 | int retval = -EINVAL; |
3482 | task_t *p; | 3482 | task_t *p; |
3483 | 3483 | ||
3484 | if (pid < 0) | 3484 | if (pid < 0) |
3485 | goto out_nounlock; | 3485 | goto out_nounlock; |
3486 | 3486 | ||
3487 | retval = -ESRCH; | 3487 | retval = -ESRCH; |
3488 | read_lock(&tasklist_lock); | 3488 | read_lock(&tasklist_lock); |
3489 | p = find_process_by_pid(pid); | 3489 | p = find_process_by_pid(pid); |
3490 | if (p) { | 3490 | if (p) { |
3491 | retval = security_task_getscheduler(p); | 3491 | retval = security_task_getscheduler(p); |
3492 | if (!retval) | 3492 | if (!retval) |
3493 | retval = p->policy; | 3493 | retval = p->policy; |
3494 | } | 3494 | } |
3495 | read_unlock(&tasklist_lock); | 3495 | read_unlock(&tasklist_lock); |
3496 | 3496 | ||
3497 | out_nounlock: | 3497 | out_nounlock: |
3498 | return retval; | 3498 | return retval; |
3499 | } | 3499 | } |
3500 | 3500 | ||
3501 | /** | 3501 | /** |
3502 | * sys_sched_getscheduler - get the RT priority of a thread | 3502 | * sys_sched_getscheduler - get the RT priority of a thread |
3503 | * @pid: the pid in question. | 3503 | * @pid: the pid in question. |
3504 | * @param: structure containing the RT priority. | 3504 | * @param: structure containing the RT priority. |
3505 | */ | 3505 | */ |
3506 | asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | 3506 | asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) |
3507 | { | 3507 | { |
3508 | struct sched_param lp; | 3508 | struct sched_param lp; |
3509 | int retval = -EINVAL; | 3509 | int retval = -EINVAL; |
3510 | task_t *p; | 3510 | task_t *p; |
3511 | 3511 | ||
3512 | if (!param || pid < 0) | 3512 | if (!param || pid < 0) |
3513 | goto out_nounlock; | 3513 | goto out_nounlock; |
3514 | 3514 | ||
3515 | read_lock(&tasklist_lock); | 3515 | read_lock(&tasklist_lock); |
3516 | p = find_process_by_pid(pid); | 3516 | p = find_process_by_pid(pid); |
3517 | retval = -ESRCH; | 3517 | retval = -ESRCH; |
3518 | if (!p) | 3518 | if (!p) |
3519 | goto out_unlock; | 3519 | goto out_unlock; |
3520 | 3520 | ||
3521 | retval = security_task_getscheduler(p); | 3521 | retval = security_task_getscheduler(p); |
3522 | if (retval) | 3522 | if (retval) |
3523 | goto out_unlock; | 3523 | goto out_unlock; |
3524 | 3524 | ||
3525 | lp.sched_priority = p->rt_priority; | 3525 | lp.sched_priority = p->rt_priority; |
3526 | read_unlock(&tasklist_lock); | 3526 | read_unlock(&tasklist_lock); |
3527 | 3527 | ||
3528 | /* | 3528 | /* |
3529 | * This one might sleep, we cannot do it with a spinlock held ... | 3529 | * This one might sleep, we cannot do it with a spinlock held ... |
3530 | */ | 3530 | */ |
3531 | retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; | 3531 | retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; |
3532 | 3532 | ||
3533 | out_nounlock: | 3533 | out_nounlock: |
3534 | return retval; | 3534 | return retval; |
3535 | 3535 | ||
3536 | out_unlock: | 3536 | out_unlock: |
3537 | read_unlock(&tasklist_lock); | 3537 | read_unlock(&tasklist_lock); |
3538 | return retval; | 3538 | return retval; |
3539 | } | 3539 | } |
3540 | 3540 | ||
3541 | long sched_setaffinity(pid_t pid, cpumask_t new_mask) | 3541 | long sched_setaffinity(pid_t pid, cpumask_t new_mask) |
3542 | { | 3542 | { |
3543 | task_t *p; | 3543 | task_t *p; |
3544 | int retval; | 3544 | int retval; |
3545 | cpumask_t cpus_allowed; | 3545 | cpumask_t cpus_allowed; |
3546 | 3546 | ||
3547 | lock_cpu_hotplug(); | 3547 | lock_cpu_hotplug(); |
3548 | read_lock(&tasklist_lock); | 3548 | read_lock(&tasklist_lock); |
3549 | 3549 | ||
3550 | p = find_process_by_pid(pid); | 3550 | p = find_process_by_pid(pid); |
3551 | if (!p) { | 3551 | if (!p) { |
3552 | read_unlock(&tasklist_lock); | 3552 | read_unlock(&tasklist_lock); |
3553 | unlock_cpu_hotplug(); | 3553 | unlock_cpu_hotplug(); |
3554 | return -ESRCH; | 3554 | return -ESRCH; |
3555 | } | 3555 | } |
3556 | 3556 | ||
3557 | /* | 3557 | /* |
3558 | * It is not safe to call set_cpus_allowed with the | 3558 | * It is not safe to call set_cpus_allowed with the |
3559 | * tasklist_lock held. We will bump the task_struct's | 3559 | * tasklist_lock held. We will bump the task_struct's |
3560 | * usage count and then drop tasklist_lock. | 3560 | * usage count and then drop tasklist_lock. |
3561 | */ | 3561 | */ |
3562 | get_task_struct(p); | 3562 | get_task_struct(p); |
3563 | read_unlock(&tasklist_lock); | 3563 | read_unlock(&tasklist_lock); |
3564 | 3564 | ||
3565 | retval = -EPERM; | 3565 | retval = -EPERM; |
3566 | if ((current->euid != p->euid) && (current->euid != p->uid) && | 3566 | if ((current->euid != p->euid) && (current->euid != p->uid) && |
3567 | !capable(CAP_SYS_NICE)) | 3567 | !capable(CAP_SYS_NICE)) |
3568 | goto out_unlock; | 3568 | goto out_unlock; |
3569 | 3569 | ||
3570 | cpus_allowed = cpuset_cpus_allowed(p); | 3570 | cpus_allowed = cpuset_cpus_allowed(p); |
3571 | cpus_and(new_mask, new_mask, cpus_allowed); | 3571 | cpus_and(new_mask, new_mask, cpus_allowed); |
3572 | retval = set_cpus_allowed(p, new_mask); | 3572 | retval = set_cpus_allowed(p, new_mask); |
3573 | 3573 | ||
3574 | out_unlock: | 3574 | out_unlock: |
3575 | put_task_struct(p); | 3575 | put_task_struct(p); |
3576 | unlock_cpu_hotplug(); | 3576 | unlock_cpu_hotplug(); |
3577 | return retval; | 3577 | return retval; |
3578 | } | 3578 | } |
3579 | 3579 | ||
3580 | static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, | 3580 | static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, |
3581 | cpumask_t *new_mask) | 3581 | cpumask_t *new_mask) |
3582 | { | 3582 | { |
3583 | if (len < sizeof(cpumask_t)) { | 3583 | if (len < sizeof(cpumask_t)) { |
3584 | memset(new_mask, 0, sizeof(cpumask_t)); | 3584 | memset(new_mask, 0, sizeof(cpumask_t)); |
3585 | } else if (len > sizeof(cpumask_t)) { | 3585 | } else if (len > sizeof(cpumask_t)) { |
3586 | len = sizeof(cpumask_t); | 3586 | len = sizeof(cpumask_t); |
3587 | } | 3587 | } |
3588 | return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; | 3588 | return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; |
3589 | } | 3589 | } |
3590 | 3590 | ||
3591 | /** | 3591 | /** |
3592 | * sys_sched_setaffinity - set the cpu affinity of a process | 3592 | * sys_sched_setaffinity - set the cpu affinity of a process |
3593 | * @pid: pid of the process | 3593 | * @pid: pid of the process |
3594 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr | 3594 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr |
3595 | * @user_mask_ptr: user-space pointer to the new cpu mask | 3595 | * @user_mask_ptr: user-space pointer to the new cpu mask |
3596 | */ | 3596 | */ |
3597 | asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, | 3597 | asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, |
3598 | unsigned long __user *user_mask_ptr) | 3598 | unsigned long __user *user_mask_ptr) |
3599 | { | 3599 | { |
3600 | cpumask_t new_mask; | 3600 | cpumask_t new_mask; |
3601 | int retval; | 3601 | int retval; |
3602 | 3602 | ||
3603 | retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); | 3603 | retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); |
3604 | if (retval) | 3604 | if (retval) |
3605 | return retval; | 3605 | return retval; |
3606 | 3606 | ||
3607 | return sched_setaffinity(pid, new_mask); | 3607 | return sched_setaffinity(pid, new_mask); |
3608 | } | 3608 | } |
3609 | 3609 | ||
3610 | /* | 3610 | /* |
3611 | * Represents all cpu's present in the system | 3611 | * Represents all cpu's present in the system |
3612 | * In systems capable of hotplug, this map could dynamically grow | 3612 | * In systems capable of hotplug, this map could dynamically grow |
3613 | * as new cpu's are detected in the system via any platform specific | 3613 | * as new cpu's are detected in the system via any platform specific |
3614 | * method, such as ACPI for e.g. | 3614 | * method, such as ACPI for e.g. |
3615 | */ | 3615 | */ |
3616 | 3616 | ||
3617 | cpumask_t cpu_present_map; | 3617 | cpumask_t cpu_present_map; |
3618 | EXPORT_SYMBOL(cpu_present_map); | 3618 | EXPORT_SYMBOL(cpu_present_map); |
3619 | 3619 | ||
3620 | #ifndef CONFIG_SMP | 3620 | #ifndef CONFIG_SMP |
3621 | cpumask_t cpu_online_map = CPU_MASK_ALL; | 3621 | cpumask_t cpu_online_map = CPU_MASK_ALL; |
3622 | cpumask_t cpu_possible_map = CPU_MASK_ALL; | 3622 | cpumask_t cpu_possible_map = CPU_MASK_ALL; |
3623 | #endif | 3623 | #endif |
3624 | 3624 | ||
3625 | long sched_getaffinity(pid_t pid, cpumask_t *mask) | 3625 | long sched_getaffinity(pid_t pid, cpumask_t *mask) |
3626 | { | 3626 | { |
3627 | int retval; | 3627 | int retval; |
3628 | task_t *p; | 3628 | task_t *p; |
3629 | 3629 | ||
3630 | lock_cpu_hotplug(); | 3630 | lock_cpu_hotplug(); |
3631 | read_lock(&tasklist_lock); | 3631 | read_lock(&tasklist_lock); |
3632 | 3632 | ||
3633 | retval = -ESRCH; | 3633 | retval = -ESRCH; |
3634 | p = find_process_by_pid(pid); | 3634 | p = find_process_by_pid(pid); |
3635 | if (!p) | 3635 | if (!p) |
3636 | goto out_unlock; | 3636 | goto out_unlock; |
3637 | 3637 | ||
3638 | retval = 0; | 3638 | retval = 0; |
3639 | cpus_and(*mask, p->cpus_allowed, cpu_possible_map); | 3639 | cpus_and(*mask, p->cpus_allowed, cpu_possible_map); |
3640 | 3640 | ||
3641 | out_unlock: | 3641 | out_unlock: |
3642 | read_unlock(&tasklist_lock); | 3642 | read_unlock(&tasklist_lock); |
3643 | unlock_cpu_hotplug(); | 3643 | unlock_cpu_hotplug(); |
3644 | if (retval) | 3644 | if (retval) |
3645 | return retval; | 3645 | return retval; |
3646 | 3646 | ||
3647 | return 0; | 3647 | return 0; |
3648 | } | 3648 | } |
3649 | 3649 | ||
3650 | /** | 3650 | /** |
3651 | * sys_sched_getaffinity - get the cpu affinity of a process | 3651 | * sys_sched_getaffinity - get the cpu affinity of a process |
3652 | * @pid: pid of the process | 3652 | * @pid: pid of the process |
3653 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr | 3653 | * @len: length in bytes of the bitmask pointed to by user_mask_ptr |
3654 | * @user_mask_ptr: user-space pointer to hold the current cpu mask | 3654 | * @user_mask_ptr: user-space pointer to hold the current cpu mask |
3655 | */ | 3655 | */ |
3656 | asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, | 3656 | asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, |
3657 | unsigned long __user *user_mask_ptr) | 3657 | unsigned long __user *user_mask_ptr) |
3658 | { | 3658 | { |
3659 | int ret; | 3659 | int ret; |
3660 | cpumask_t mask; | 3660 | cpumask_t mask; |
3661 | 3661 | ||
3662 | if (len < sizeof(cpumask_t)) | 3662 | if (len < sizeof(cpumask_t)) |
3663 | return -EINVAL; | 3663 | return -EINVAL; |
3664 | 3664 | ||
3665 | ret = sched_getaffinity(pid, &mask); | 3665 | ret = sched_getaffinity(pid, &mask); |
3666 | if (ret < 0) | 3666 | if (ret < 0) |
3667 | return ret; | 3667 | return ret; |
3668 | 3668 | ||
3669 | if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) | 3669 | if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) |
3670 | return -EFAULT; | 3670 | return -EFAULT; |
3671 | 3671 | ||
3672 | return sizeof(cpumask_t); | 3672 | return sizeof(cpumask_t); |
3673 | } | 3673 | } |
3674 | 3674 | ||
3675 | /** | 3675 | /** |
3676 | * sys_sched_yield - yield the current processor to other threads. | 3676 | * sys_sched_yield - yield the current processor to other threads. |
3677 | * | 3677 | * |
3678 | * this function yields the current CPU by moving the calling thread | 3678 | * this function yields the current CPU by moving the calling thread |
3679 | * to the expired array. If there are no other threads running on this | 3679 | * to the expired array. If there are no other threads running on this |
3680 | * CPU then this function will return. | 3680 | * CPU then this function will return. |
3681 | */ | 3681 | */ |
3682 | asmlinkage long sys_sched_yield(void) | 3682 | asmlinkage long sys_sched_yield(void) |
3683 | { | 3683 | { |
3684 | runqueue_t *rq = this_rq_lock(); | 3684 | runqueue_t *rq = this_rq_lock(); |
3685 | prio_array_t *array = current->array; | 3685 | prio_array_t *array = current->array; |
3686 | prio_array_t *target = rq->expired; | 3686 | prio_array_t *target = rq->expired; |
3687 | 3687 | ||
3688 | schedstat_inc(rq, yld_cnt); | 3688 | schedstat_inc(rq, yld_cnt); |
3689 | /* | 3689 | /* |
3690 | * We implement yielding by moving the task into the expired | 3690 | * We implement yielding by moving the task into the expired |
3691 | * queue. | 3691 | * queue. |
3692 | * | 3692 | * |
3693 | * (special rule: RT tasks will just roundrobin in the active | 3693 | * (special rule: RT tasks will just roundrobin in the active |
3694 | * array.) | 3694 | * array.) |
3695 | */ | 3695 | */ |
3696 | if (rt_task(current)) | 3696 | if (rt_task(current)) |
3697 | target = rq->active; | 3697 | target = rq->active; |
3698 | 3698 | ||
3699 | if (current->array->nr_active == 1) { | 3699 | if (current->array->nr_active == 1) { |
3700 | schedstat_inc(rq, yld_act_empty); | 3700 | schedstat_inc(rq, yld_act_empty); |
3701 | if (!rq->expired->nr_active) | 3701 | if (!rq->expired->nr_active) |
3702 | schedstat_inc(rq, yld_both_empty); | 3702 | schedstat_inc(rq, yld_both_empty); |
3703 | } else if (!rq->expired->nr_active) | 3703 | } else if (!rq->expired->nr_active) |
3704 | schedstat_inc(rq, yld_exp_empty); | 3704 | schedstat_inc(rq, yld_exp_empty); |
3705 | 3705 | ||
3706 | if (array != target) { | 3706 | if (array != target) { |
3707 | dequeue_task(current, array); | 3707 | dequeue_task(current, array); |
3708 | enqueue_task(current, target); | 3708 | enqueue_task(current, target); |
3709 | } else | 3709 | } else |
3710 | /* | 3710 | /* |
3711 | * requeue_task is cheaper so perform that if possible. | 3711 | * requeue_task is cheaper so perform that if possible. |
3712 | */ | 3712 | */ |
3713 | requeue_task(current, array); | 3713 | requeue_task(current, array); |
3714 | 3714 | ||
3715 | /* | 3715 | /* |
3716 | * Since we are going to call schedule() anyway, there's | 3716 | * Since we are going to call schedule() anyway, there's |
3717 | * no need to preempt or enable interrupts: | 3717 | * no need to preempt or enable interrupts: |
3718 | */ | 3718 | */ |
3719 | __release(rq->lock); | 3719 | __release(rq->lock); |
3720 | _raw_spin_unlock(&rq->lock); | 3720 | _raw_spin_unlock(&rq->lock); |
3721 | preempt_enable_no_resched(); | 3721 | preempt_enable_no_resched(); |
3722 | 3722 | ||
3723 | schedule(); | 3723 | schedule(); |
3724 | 3724 | ||
3725 | return 0; | 3725 | return 0; |
3726 | } | 3726 | } |
3727 | 3727 | ||
3728 | static inline void __cond_resched(void) | 3728 | static inline void __cond_resched(void) |
3729 | { | 3729 | { |
3730 | do { | 3730 | do { |
3731 | add_preempt_count(PREEMPT_ACTIVE); | 3731 | add_preempt_count(PREEMPT_ACTIVE); |
3732 | schedule(); | 3732 | schedule(); |
3733 | sub_preempt_count(PREEMPT_ACTIVE); | 3733 | sub_preempt_count(PREEMPT_ACTIVE); |
3734 | } while (need_resched()); | 3734 | } while (need_resched()); |
3735 | } | 3735 | } |
3736 | 3736 | ||
3737 | int __sched cond_resched(void) | 3737 | int __sched cond_resched(void) |
3738 | { | 3738 | { |
3739 | if (need_resched()) { | 3739 | if (need_resched()) { |
3740 | __cond_resched(); | 3740 | __cond_resched(); |
3741 | return 1; | 3741 | return 1; |
3742 | } | 3742 | } |
3743 | return 0; | 3743 | return 0; |
3744 | } | 3744 | } |
3745 | 3745 | ||
3746 | EXPORT_SYMBOL(cond_resched); | 3746 | EXPORT_SYMBOL(cond_resched); |
3747 | 3747 | ||
3748 | /* | 3748 | /* |
3749 | * cond_resched_lock() - if a reschedule is pending, drop the given lock, | 3749 | * cond_resched_lock() - if a reschedule is pending, drop the given lock, |
3750 | * call schedule, and on return reacquire the lock. | 3750 | * call schedule, and on return reacquire the lock. |
3751 | * | 3751 | * |
3752 | * This works OK both with and without CONFIG_PREEMPT. We do strange low-level | 3752 | * This works OK both with and without CONFIG_PREEMPT. We do strange low-level |
3753 | * operations here to prevent schedule() from being called twice (once via | 3753 | * operations here to prevent schedule() from being called twice (once via |
3754 | * spin_unlock(), once by hand). | 3754 | * spin_unlock(), once by hand). |
3755 | */ | 3755 | */ |
3756 | int cond_resched_lock(spinlock_t * lock) | 3756 | int cond_resched_lock(spinlock_t * lock) |
3757 | { | 3757 | { |
3758 | int ret = 0; | 3758 | int ret = 0; |
3759 | 3759 | ||
3760 | if (need_lockbreak(lock)) { | 3760 | if (need_lockbreak(lock)) { |
3761 | spin_unlock(lock); | 3761 | spin_unlock(lock); |
3762 | cpu_relax(); | 3762 | cpu_relax(); |
3763 | ret = 1; | 3763 | ret = 1; |
3764 | spin_lock(lock); | 3764 | spin_lock(lock); |
3765 | } | 3765 | } |
3766 | if (need_resched()) { | 3766 | if (need_resched()) { |
3767 | _raw_spin_unlock(lock); | 3767 | _raw_spin_unlock(lock); |
3768 | preempt_enable_no_resched(); | 3768 | preempt_enable_no_resched(); |
3769 | __cond_resched(); | 3769 | __cond_resched(); |
3770 | ret = 1; | 3770 | ret = 1; |
3771 | spin_lock(lock); | 3771 | spin_lock(lock); |
3772 | } | 3772 | } |
3773 | return ret; | 3773 | return ret; |
3774 | } | 3774 | } |
3775 | 3775 | ||
3776 | EXPORT_SYMBOL(cond_resched_lock); | 3776 | EXPORT_SYMBOL(cond_resched_lock); |
3777 | 3777 | ||
3778 | int __sched cond_resched_softirq(void) | 3778 | int __sched cond_resched_softirq(void) |
3779 | { | 3779 | { |
3780 | BUG_ON(!in_softirq()); | 3780 | BUG_ON(!in_softirq()); |
3781 | 3781 | ||
3782 | if (need_resched()) { | 3782 | if (need_resched()) { |
3783 | __local_bh_enable(); | 3783 | __local_bh_enable(); |
3784 | __cond_resched(); | 3784 | __cond_resched(); |
3785 | local_bh_disable(); | 3785 | local_bh_disable(); |
3786 | return 1; | 3786 | return 1; |
3787 | } | 3787 | } |
3788 | return 0; | 3788 | return 0; |
3789 | } | 3789 | } |
3790 | 3790 | ||
3791 | EXPORT_SYMBOL(cond_resched_softirq); | 3791 | EXPORT_SYMBOL(cond_resched_softirq); |
3792 | 3792 | ||
3793 | 3793 | ||
3794 | /** | 3794 | /** |
3795 | * yield - yield the current processor to other threads. | 3795 | * yield - yield the current processor to other threads. |
3796 | * | 3796 | * |
3797 | * this is a shortcut for kernel-space yielding - it marks the | 3797 | * this is a shortcut for kernel-space yielding - it marks the |
3798 | * thread runnable and calls sys_sched_yield(). | 3798 | * thread runnable and calls sys_sched_yield(). |
3799 | */ | 3799 | */ |
3800 | void __sched yield(void) | 3800 | void __sched yield(void) |
3801 | { | 3801 | { |
3802 | set_current_state(TASK_RUNNING); | 3802 | set_current_state(TASK_RUNNING); |
3803 | sys_sched_yield(); | 3803 | sys_sched_yield(); |
3804 | } | 3804 | } |
3805 | 3805 | ||
3806 | EXPORT_SYMBOL(yield); | 3806 | EXPORT_SYMBOL(yield); |
3807 | 3807 | ||
3808 | /* | 3808 | /* |
3809 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 3809 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
3810 | * that process accounting knows that this is a task in IO wait state. | 3810 | * that process accounting knows that this is a task in IO wait state. |
3811 | * | 3811 | * |
3812 | * But don't do that if it is a deliberate, throttling IO wait (this task | 3812 | * But don't do that if it is a deliberate, throttling IO wait (this task |
3813 | * has set its backing_dev_info: the queue against which it should throttle) | 3813 | * has set its backing_dev_info: the queue against which it should throttle) |
3814 | */ | 3814 | */ |
3815 | void __sched io_schedule(void) | 3815 | void __sched io_schedule(void) |
3816 | { | 3816 | { |
3817 | struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id()); | 3817 | struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id()); |
3818 | 3818 | ||
3819 | atomic_inc(&rq->nr_iowait); | 3819 | atomic_inc(&rq->nr_iowait); |
3820 | schedule(); | 3820 | schedule(); |
3821 | atomic_dec(&rq->nr_iowait); | 3821 | atomic_dec(&rq->nr_iowait); |
3822 | } | 3822 | } |
3823 | 3823 | ||
3824 | EXPORT_SYMBOL(io_schedule); | 3824 | EXPORT_SYMBOL(io_schedule); |
3825 | 3825 | ||
3826 | long __sched io_schedule_timeout(long timeout) | 3826 | long __sched io_schedule_timeout(long timeout) |
3827 | { | 3827 | { |
3828 | struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id()); | 3828 | struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id()); |
3829 | long ret; | 3829 | long ret; |
3830 | 3830 | ||
3831 | atomic_inc(&rq->nr_iowait); | 3831 | atomic_inc(&rq->nr_iowait); |
3832 | ret = schedule_timeout(timeout); | 3832 | ret = schedule_timeout(timeout); |
3833 | atomic_dec(&rq->nr_iowait); | 3833 | atomic_dec(&rq->nr_iowait); |
3834 | return ret; | 3834 | return ret; |
3835 | } | 3835 | } |
3836 | 3836 | ||
3837 | /** | 3837 | /** |
3838 | * sys_sched_get_priority_max - return maximum RT priority. | 3838 | * sys_sched_get_priority_max - return maximum RT priority. |
3839 | * @policy: scheduling class. | 3839 | * @policy: scheduling class. |
3840 | * | 3840 | * |
3841 | * this syscall returns the maximum rt_priority that can be used | 3841 | * this syscall returns the maximum rt_priority that can be used |
3842 | * by a given scheduling class. | 3842 | * by a given scheduling class. |
3843 | */ | 3843 | */ |
3844 | asmlinkage long sys_sched_get_priority_max(int policy) | 3844 | asmlinkage long sys_sched_get_priority_max(int policy) |
3845 | { | 3845 | { |
3846 | int ret = -EINVAL; | 3846 | int ret = -EINVAL; |
3847 | 3847 | ||
3848 | switch (policy) { | 3848 | switch (policy) { |
3849 | case SCHED_FIFO: | 3849 | case SCHED_FIFO: |
3850 | case SCHED_RR: | 3850 | case SCHED_RR: |
3851 | ret = MAX_USER_RT_PRIO-1; | 3851 | ret = MAX_USER_RT_PRIO-1; |
3852 | break; | 3852 | break; |
3853 | case SCHED_NORMAL: | 3853 | case SCHED_NORMAL: |
3854 | ret = 0; | 3854 | ret = 0; |
3855 | break; | 3855 | break; |
3856 | } | 3856 | } |
3857 | return ret; | 3857 | return ret; |
3858 | } | 3858 | } |
3859 | 3859 | ||
3860 | /** | 3860 | /** |
3861 | * sys_sched_get_priority_min - return minimum RT priority. | 3861 | * sys_sched_get_priority_min - return minimum RT priority. |
3862 | * @policy: scheduling class. | 3862 | * @policy: scheduling class. |
3863 | * | 3863 | * |
3864 | * this syscall returns the minimum rt_priority that can be used | 3864 | * this syscall returns the minimum rt_priority that can be used |
3865 | * by a given scheduling class. | 3865 | * by a given scheduling class. |
3866 | */ | 3866 | */ |
3867 | asmlinkage long sys_sched_get_priority_min(int policy) | 3867 | asmlinkage long sys_sched_get_priority_min(int policy) |
3868 | { | 3868 | { |
3869 | int ret = -EINVAL; | 3869 | int ret = -EINVAL; |
3870 | 3870 | ||
3871 | switch (policy) { | 3871 | switch (policy) { |
3872 | case SCHED_FIFO: | 3872 | case SCHED_FIFO: |
3873 | case SCHED_RR: | 3873 | case SCHED_RR: |
3874 | ret = 1; | 3874 | ret = 1; |
3875 | break; | 3875 | break; |
3876 | case SCHED_NORMAL: | 3876 | case SCHED_NORMAL: |
3877 | ret = 0; | 3877 | ret = 0; |
3878 | } | 3878 | } |
3879 | return ret; | 3879 | return ret; |
3880 | } | 3880 | } |
3881 | 3881 | ||
3882 | /** | 3882 | /** |
3883 | * sys_sched_rr_get_interval - return the default timeslice of a process. | 3883 | * sys_sched_rr_get_interval - return the default timeslice of a process. |
3884 | * @pid: pid of the process. | 3884 | * @pid: pid of the process. |
3885 | * @interval: userspace pointer to the timeslice value. | 3885 | * @interval: userspace pointer to the timeslice value. |
3886 | * | 3886 | * |
3887 | * this syscall writes the default timeslice value of a given process | 3887 | * this syscall writes the default timeslice value of a given process |
3888 | * into the user-space timespec buffer. A value of '0' means infinity. | 3888 | * into the user-space timespec buffer. A value of '0' means infinity. |
3889 | */ | 3889 | */ |
3890 | asmlinkage | 3890 | asmlinkage |
3891 | long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | 3891 | long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) |
3892 | { | 3892 | { |
3893 | int retval = -EINVAL; | 3893 | int retval = -EINVAL; |
3894 | struct timespec t; | 3894 | struct timespec t; |
3895 | task_t *p; | 3895 | task_t *p; |
3896 | 3896 | ||
3897 | if (pid < 0) | 3897 | if (pid < 0) |
3898 | goto out_nounlock; | 3898 | goto out_nounlock; |
3899 | 3899 | ||
3900 | retval = -ESRCH; | 3900 | retval = -ESRCH; |
3901 | read_lock(&tasklist_lock); | 3901 | read_lock(&tasklist_lock); |
3902 | p = find_process_by_pid(pid); | 3902 | p = find_process_by_pid(pid); |
3903 | if (!p) | 3903 | if (!p) |
3904 | goto out_unlock; | 3904 | goto out_unlock; |
3905 | 3905 | ||
3906 | retval = security_task_getscheduler(p); | 3906 | retval = security_task_getscheduler(p); |
3907 | if (retval) | 3907 | if (retval) |
3908 | goto out_unlock; | 3908 | goto out_unlock; |
3909 | 3909 | ||
3910 | jiffies_to_timespec(p->policy & SCHED_FIFO ? | 3910 | jiffies_to_timespec(p->policy & SCHED_FIFO ? |
3911 | 0 : task_timeslice(p), &t); | 3911 | 0 : task_timeslice(p), &t); |
3912 | read_unlock(&tasklist_lock); | 3912 | read_unlock(&tasklist_lock); |
3913 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 3913 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
3914 | out_nounlock: | 3914 | out_nounlock: |
3915 | return retval; | 3915 | return retval; |
3916 | out_unlock: | 3916 | out_unlock: |
3917 | read_unlock(&tasklist_lock); | 3917 | read_unlock(&tasklist_lock); |
3918 | return retval; | 3918 | return retval; |
3919 | } | 3919 | } |
3920 | 3920 | ||
3921 | static inline struct task_struct *eldest_child(struct task_struct *p) | 3921 | static inline struct task_struct *eldest_child(struct task_struct *p) |
3922 | { | 3922 | { |
3923 | if (list_empty(&p->children)) return NULL; | 3923 | if (list_empty(&p->children)) return NULL; |
3924 | return list_entry(p->children.next,struct task_struct,sibling); | 3924 | return list_entry(p->children.next,struct task_struct,sibling); |
3925 | } | 3925 | } |
3926 | 3926 | ||
3927 | static inline struct task_struct *older_sibling(struct task_struct *p) | 3927 | static inline struct task_struct *older_sibling(struct task_struct *p) |
3928 | { | 3928 | { |
3929 | if (p->sibling.prev==&p->parent->children) return NULL; | 3929 | if (p->sibling.prev==&p->parent->children) return NULL; |
3930 | return list_entry(p->sibling.prev,struct task_struct,sibling); | 3930 | return list_entry(p->sibling.prev,struct task_struct,sibling); |
3931 | } | 3931 | } |
3932 | 3932 | ||
3933 | static inline struct task_struct *younger_sibling(struct task_struct *p) | 3933 | static inline struct task_struct *younger_sibling(struct task_struct *p) |
3934 | { | 3934 | { |
3935 | if (p->sibling.next==&p->parent->children) return NULL; | 3935 | if (p->sibling.next==&p->parent->children) return NULL; |
3936 | return list_entry(p->sibling.next,struct task_struct,sibling); | 3936 | return list_entry(p->sibling.next,struct task_struct,sibling); |
3937 | } | 3937 | } |
3938 | 3938 | ||
3939 | static void show_task(task_t * p) | 3939 | static void show_task(task_t * p) |
3940 | { | 3940 | { |
3941 | task_t *relative; | 3941 | task_t *relative; |
3942 | unsigned state; | 3942 | unsigned state; |
3943 | unsigned long free = 0; | 3943 | unsigned long free = 0; |
3944 | static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; | 3944 | static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; |
3945 | 3945 | ||
3946 | printk("%-13.13s ", p->comm); | 3946 | printk("%-13.13s ", p->comm); |
3947 | state = p->state ? __ffs(p->state) + 1 : 0; | 3947 | state = p->state ? __ffs(p->state) + 1 : 0; |
3948 | if (state < ARRAY_SIZE(stat_nam)) | 3948 | if (state < ARRAY_SIZE(stat_nam)) |
3949 | printk(stat_nam[state]); | 3949 | printk(stat_nam[state]); |
3950 | else | 3950 | else |
3951 | printk("?"); | 3951 | printk("?"); |
3952 | #if (BITS_PER_LONG == 32) | 3952 | #if (BITS_PER_LONG == 32) |
3953 | if (state == TASK_RUNNING) | 3953 | if (state == TASK_RUNNING) |
3954 | printk(" running "); | 3954 | printk(" running "); |
3955 | else | 3955 | else |
3956 | printk(" %08lX ", thread_saved_pc(p)); | 3956 | printk(" %08lX ", thread_saved_pc(p)); |
3957 | #else | 3957 | #else |
3958 | if (state == TASK_RUNNING) | 3958 | if (state == TASK_RUNNING) |
3959 | printk(" running task "); | 3959 | printk(" running task "); |
3960 | else | 3960 | else |
3961 | printk(" %016lx ", thread_saved_pc(p)); | 3961 | printk(" %016lx ", thread_saved_pc(p)); |
3962 | #endif | 3962 | #endif |
3963 | #ifdef CONFIG_DEBUG_STACK_USAGE | 3963 | #ifdef CONFIG_DEBUG_STACK_USAGE |
3964 | { | 3964 | { |
3965 | unsigned long * n = (unsigned long *) (p->thread_info+1); | 3965 | unsigned long * n = (unsigned long *) (p->thread_info+1); |
3966 | while (!*n) | 3966 | while (!*n) |
3967 | n++; | 3967 | n++; |
3968 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); | 3968 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); |
3969 | } | 3969 | } |
3970 | #endif | 3970 | #endif |
3971 | printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); | 3971 | printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); |
3972 | if ((relative = eldest_child(p))) | 3972 | if ((relative = eldest_child(p))) |
3973 | printk("%5d ", relative->pid); | 3973 | printk("%5d ", relative->pid); |
3974 | else | 3974 | else |
3975 | printk(" "); | 3975 | printk(" "); |
3976 | if ((relative = younger_sibling(p))) | 3976 | if ((relative = younger_sibling(p))) |
3977 | printk("%7d", relative->pid); | 3977 | printk("%7d", relative->pid); |
3978 | else | 3978 | else |
3979 | printk(" "); | 3979 | printk(" "); |
3980 | if ((relative = older_sibling(p))) | 3980 | if ((relative = older_sibling(p))) |
3981 | printk(" %5d", relative->pid); | 3981 | printk(" %5d", relative->pid); |
3982 | else | 3982 | else |
3983 | printk(" "); | 3983 | printk(" "); |
3984 | if (!p->mm) | 3984 | if (!p->mm) |
3985 | printk(" (L-TLB)\n"); | 3985 | printk(" (L-TLB)\n"); |
3986 | else | 3986 | else |
3987 | printk(" (NOTLB)\n"); | 3987 | printk(" (NOTLB)\n"); |
3988 | 3988 | ||
3989 | if (state != TASK_RUNNING) | 3989 | if (state != TASK_RUNNING) |
3990 | show_stack(p, NULL); | 3990 | show_stack(p, NULL); |
3991 | } | 3991 | } |
3992 | 3992 | ||
3993 | void show_state(void) | 3993 | void show_state(void) |
3994 | { | 3994 | { |
3995 | task_t *g, *p; | 3995 | task_t *g, *p; |
3996 | 3996 | ||
3997 | #if (BITS_PER_LONG == 32) | 3997 | #if (BITS_PER_LONG == 32) |
3998 | printk("\n" | 3998 | printk("\n" |
3999 | " sibling\n"); | 3999 | " sibling\n"); |
4000 | printk(" task PC pid father child younger older\n"); | 4000 | printk(" task PC pid father child younger older\n"); |
4001 | #else | 4001 | #else |
4002 | printk("\n" | 4002 | printk("\n" |
4003 | " sibling\n"); | 4003 | " sibling\n"); |
4004 | printk(" task PC pid father child younger older\n"); | 4004 | printk(" task PC pid father child younger older\n"); |
4005 | #endif | 4005 | #endif |
4006 | read_lock(&tasklist_lock); | 4006 | read_lock(&tasklist_lock); |
4007 | do_each_thread(g, p) { | 4007 | do_each_thread(g, p) { |
4008 | /* | 4008 | /* |
4009 | * reset the NMI-timeout, listing all files on a slow | 4009 | * reset the NMI-timeout, listing all files on a slow |
4010 | * console might take alot of time: | 4010 | * console might take alot of time: |
4011 | */ | 4011 | */ |
4012 | touch_nmi_watchdog(); | 4012 | touch_nmi_watchdog(); |
4013 | show_task(p); | 4013 | show_task(p); |
4014 | } while_each_thread(g, p); | 4014 | } while_each_thread(g, p); |
4015 | 4015 | ||
4016 | read_unlock(&tasklist_lock); | 4016 | read_unlock(&tasklist_lock); |
4017 | } | 4017 | } |
4018 | 4018 | ||
4019 | void __devinit init_idle(task_t *idle, int cpu) | 4019 | void __devinit init_idle(task_t *idle, int cpu) |
4020 | { | 4020 | { |
4021 | runqueue_t *rq = cpu_rq(cpu); | 4021 | runqueue_t *rq = cpu_rq(cpu); |
4022 | unsigned long flags; | 4022 | unsigned long flags; |
4023 | 4023 | ||
4024 | idle->sleep_avg = 0; | 4024 | idle->sleep_avg = 0; |
4025 | idle->array = NULL; | 4025 | idle->array = NULL; |
4026 | idle->prio = MAX_PRIO; | 4026 | idle->prio = MAX_PRIO; |
4027 | idle->state = TASK_RUNNING; | 4027 | idle->state = TASK_RUNNING; |
4028 | idle->cpus_allowed = cpumask_of_cpu(cpu); | 4028 | idle->cpus_allowed = cpumask_of_cpu(cpu); |
4029 | set_task_cpu(idle, cpu); | 4029 | set_task_cpu(idle, cpu); |
4030 | 4030 | ||
4031 | spin_lock_irqsave(&rq->lock, flags); | 4031 | spin_lock_irqsave(&rq->lock, flags); |
4032 | rq->curr = rq->idle = idle; | 4032 | rq->curr = rq->idle = idle; |
4033 | set_tsk_need_resched(idle); | 4033 | set_tsk_need_resched(idle); |
4034 | spin_unlock_irqrestore(&rq->lock, flags); | 4034 | spin_unlock_irqrestore(&rq->lock, flags); |
4035 | 4035 | ||
4036 | /* Set the preempt count _outside_ the spinlocks! */ | 4036 | /* Set the preempt count _outside_ the spinlocks! */ |
4037 | #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) | 4037 | #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) |
4038 | idle->thread_info->preempt_count = (idle->lock_depth >= 0); | 4038 | idle->thread_info->preempt_count = (idle->lock_depth >= 0); |
4039 | #else | 4039 | #else |
4040 | idle->thread_info->preempt_count = 0; | 4040 | idle->thread_info->preempt_count = 0; |
4041 | #endif | 4041 | #endif |
4042 | } | 4042 | } |
4043 | 4043 | ||
4044 | /* | 4044 | /* |
4045 | * In a system that switches off the HZ timer nohz_cpu_mask | 4045 | * In a system that switches off the HZ timer nohz_cpu_mask |
4046 | * indicates which cpus entered this state. This is used | 4046 | * indicates which cpus entered this state. This is used |
4047 | * in the rcu update to wait only for active cpus. For system | 4047 | * in the rcu update to wait only for active cpus. For system |
4048 | * which do not switch off the HZ timer nohz_cpu_mask should | 4048 | * which do not switch off the HZ timer nohz_cpu_mask should |
4049 | * always be CPU_MASK_NONE. | 4049 | * always be CPU_MASK_NONE. |
4050 | */ | 4050 | */ |
4051 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | 4051 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; |
4052 | 4052 | ||
4053 | #ifdef CONFIG_SMP | 4053 | #ifdef CONFIG_SMP |
4054 | /* | 4054 | /* |
4055 | * This is how migration works: | 4055 | * This is how migration works: |
4056 | * | 4056 | * |
4057 | * 1) we queue a migration_req_t structure in the source CPU's | 4057 | * 1) we queue a migration_req_t structure in the source CPU's |
4058 | * runqueue and wake up that CPU's migration thread. | 4058 | * runqueue and wake up that CPU's migration thread. |
4059 | * 2) we down() the locked semaphore => thread blocks. | 4059 | * 2) we down() the locked semaphore => thread blocks. |
4060 | * 3) migration thread wakes up (implicitly it forces the migrated | 4060 | * 3) migration thread wakes up (implicitly it forces the migrated |
4061 | * thread off the CPU) | 4061 | * thread off the CPU) |
4062 | * 4) it gets the migration request and checks whether the migrated | 4062 | * 4) it gets the migration request and checks whether the migrated |
4063 | * task is still in the wrong runqueue. | 4063 | * task is still in the wrong runqueue. |
4064 | * 5) if it's in the wrong runqueue then the migration thread removes | 4064 | * 5) if it's in the wrong runqueue then the migration thread removes |
4065 | * it and puts it into the right queue. | 4065 | * it and puts it into the right queue. |
4066 | * 6) migration thread up()s the semaphore. | 4066 | * 6) migration thread up()s the semaphore. |
4067 | * 7) we wake up and the migration is done. | 4067 | * 7) we wake up and the migration is done. |
4068 | */ | 4068 | */ |
4069 | 4069 | ||
4070 | /* | 4070 | /* |
4071 | * Change a given task's CPU affinity. Migrate the thread to a | 4071 | * Change a given task's CPU affinity. Migrate the thread to a |
4072 | * proper CPU and schedule it away if the CPU it's executing on | 4072 | * proper CPU and schedule it away if the CPU it's executing on |
4073 | * is removed from the allowed bitmask. | 4073 | * is removed from the allowed bitmask. |
4074 | * | 4074 | * |
4075 | * NOTE: the caller must have a valid reference to the task, the | 4075 | * NOTE: the caller must have a valid reference to the task, the |
4076 | * task must not exit() & deallocate itself prematurely. The | 4076 | * task must not exit() & deallocate itself prematurely. The |
4077 | * call is not atomic; no spinlocks may be held. | 4077 | * call is not atomic; no spinlocks may be held. |
4078 | */ | 4078 | */ |
4079 | int set_cpus_allowed(task_t *p, cpumask_t new_mask) | 4079 | int set_cpus_allowed(task_t *p, cpumask_t new_mask) |
4080 | { | 4080 | { |
4081 | unsigned long flags; | 4081 | unsigned long flags; |
4082 | int ret = 0; | 4082 | int ret = 0; |
4083 | migration_req_t req; | 4083 | migration_req_t req; |
4084 | runqueue_t *rq; | 4084 | runqueue_t *rq; |
4085 | 4085 | ||
4086 | rq = task_rq_lock(p, &flags); | 4086 | rq = task_rq_lock(p, &flags); |
4087 | if (!cpus_intersects(new_mask, cpu_online_map)) { | 4087 | if (!cpus_intersects(new_mask, cpu_online_map)) { |
4088 | ret = -EINVAL; | 4088 | ret = -EINVAL; |
4089 | goto out; | 4089 | goto out; |
4090 | } | 4090 | } |
4091 | 4091 | ||
4092 | p->cpus_allowed = new_mask; | 4092 | p->cpus_allowed = new_mask; |
4093 | /* Can the task run on the task's current CPU? If so, we're done */ | 4093 | /* Can the task run on the task's current CPU? If so, we're done */ |
4094 | if (cpu_isset(task_cpu(p), new_mask)) | 4094 | if (cpu_isset(task_cpu(p), new_mask)) |
4095 | goto out; | 4095 | goto out; |
4096 | 4096 | ||
4097 | if (migrate_task(p, any_online_cpu(new_mask), &req)) { | 4097 | if (migrate_task(p, any_online_cpu(new_mask), &req)) { |
4098 | /* Need help from migration thread: drop lock and wait. */ | 4098 | /* Need help from migration thread: drop lock and wait. */ |
4099 | task_rq_unlock(rq, &flags); | 4099 | task_rq_unlock(rq, &flags); |
4100 | wake_up_process(rq->migration_thread); | 4100 | wake_up_process(rq->migration_thread); |
4101 | wait_for_completion(&req.done); | 4101 | wait_for_completion(&req.done); |
4102 | tlb_migrate_finish(p->mm); | 4102 | tlb_migrate_finish(p->mm); |
4103 | return 0; | 4103 | return 0; |
4104 | } | 4104 | } |
4105 | out: | 4105 | out: |
4106 | task_rq_unlock(rq, &flags); | 4106 | task_rq_unlock(rq, &flags); |
4107 | return ret; | 4107 | return ret; |
4108 | } | 4108 | } |
4109 | 4109 | ||
4110 | EXPORT_SYMBOL_GPL(set_cpus_allowed); | 4110 | EXPORT_SYMBOL_GPL(set_cpus_allowed); |
4111 | 4111 | ||
4112 | /* | 4112 | /* |
4113 | * Move (not current) task off this cpu, onto dest cpu. We're doing | 4113 | * Move (not current) task off this cpu, onto dest cpu. We're doing |
4114 | * this because either it can't run here any more (set_cpus_allowed() | 4114 | * this because either it can't run here any more (set_cpus_allowed() |
4115 | * away from this CPU, or CPU going down), or because we're | 4115 | * away from this CPU, or CPU going down), or because we're |
4116 | * attempting to rebalance this task on exec (sched_exec). | 4116 | * attempting to rebalance this task on exec (sched_exec). |
4117 | * | 4117 | * |
4118 | * So we race with normal scheduler movements, but that's OK, as long | 4118 | * So we race with normal scheduler movements, but that's OK, as long |
4119 | * as the task is no longer on this CPU. | 4119 | * as the task is no longer on this CPU. |
4120 | */ | 4120 | */ |
4121 | static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 4121 | static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
4122 | { | 4122 | { |
4123 | runqueue_t *rq_dest, *rq_src; | 4123 | runqueue_t *rq_dest, *rq_src; |
4124 | 4124 | ||
4125 | if (unlikely(cpu_is_offline(dest_cpu))) | 4125 | if (unlikely(cpu_is_offline(dest_cpu))) |
4126 | return; | 4126 | return; |
4127 | 4127 | ||
4128 | rq_src = cpu_rq(src_cpu); | 4128 | rq_src = cpu_rq(src_cpu); |
4129 | rq_dest = cpu_rq(dest_cpu); | 4129 | rq_dest = cpu_rq(dest_cpu); |
4130 | 4130 | ||
4131 | double_rq_lock(rq_src, rq_dest); | 4131 | double_rq_lock(rq_src, rq_dest); |
4132 | /* Already moved. */ | 4132 | /* Already moved. */ |
4133 | if (task_cpu(p) != src_cpu) | 4133 | if (task_cpu(p) != src_cpu) |
4134 | goto out; | 4134 | goto out; |
4135 | /* Affinity changed (again). */ | 4135 | /* Affinity changed (again). */ |
4136 | if (!cpu_isset(dest_cpu, p->cpus_allowed)) | 4136 | if (!cpu_isset(dest_cpu, p->cpus_allowed)) |
4137 | goto out; | 4137 | goto out; |
4138 | 4138 | ||
4139 | set_task_cpu(p, dest_cpu); | 4139 | set_task_cpu(p, dest_cpu); |
4140 | if (p->array) { | 4140 | if (p->array) { |
4141 | /* | 4141 | /* |
4142 | * Sync timestamp with rq_dest's before activating. | 4142 | * Sync timestamp with rq_dest's before activating. |
4143 | * The same thing could be achieved by doing this step | 4143 | * The same thing could be achieved by doing this step |
4144 | * afterwards, and pretending it was a local activate. | 4144 | * afterwards, and pretending it was a local activate. |
4145 | * This way is cleaner and logically correct. | 4145 | * This way is cleaner and logically correct. |
4146 | */ | 4146 | */ |
4147 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick | 4147 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick |
4148 | + rq_dest->timestamp_last_tick; | 4148 | + rq_dest->timestamp_last_tick; |
4149 | deactivate_task(p, rq_src); | 4149 | deactivate_task(p, rq_src); |
4150 | activate_task(p, rq_dest, 0); | 4150 | activate_task(p, rq_dest, 0); |
4151 | if (TASK_PREEMPTS_CURR(p, rq_dest)) | 4151 | if (TASK_PREEMPTS_CURR(p, rq_dest)) |
4152 | resched_task(rq_dest->curr); | 4152 | resched_task(rq_dest->curr); |
4153 | } | 4153 | } |
4154 | 4154 | ||
4155 | out: | 4155 | out: |
4156 | double_rq_unlock(rq_src, rq_dest); | 4156 | double_rq_unlock(rq_src, rq_dest); |
4157 | } | 4157 | } |
4158 | 4158 | ||
4159 | /* | 4159 | /* |
4160 | * migration_thread - this is a highprio system thread that performs | 4160 | * migration_thread - this is a highprio system thread that performs |
4161 | * thread migration by bumping thread off CPU then 'pushing' onto | 4161 | * thread migration by bumping thread off CPU then 'pushing' onto |
4162 | * another runqueue. | 4162 | * another runqueue. |
4163 | */ | 4163 | */ |
4164 | static int migration_thread(void * data) | 4164 | static int migration_thread(void * data) |
4165 | { | 4165 | { |
4166 | runqueue_t *rq; | 4166 | runqueue_t *rq; |
4167 | int cpu = (long)data; | 4167 | int cpu = (long)data; |
4168 | 4168 | ||
4169 | rq = cpu_rq(cpu); | 4169 | rq = cpu_rq(cpu); |
4170 | BUG_ON(rq->migration_thread != current); | 4170 | BUG_ON(rq->migration_thread != current); |
4171 | 4171 | ||
4172 | set_current_state(TASK_INTERRUPTIBLE); | 4172 | set_current_state(TASK_INTERRUPTIBLE); |
4173 | while (!kthread_should_stop()) { | 4173 | while (!kthread_should_stop()) { |
4174 | struct list_head *head; | 4174 | struct list_head *head; |
4175 | migration_req_t *req; | 4175 | migration_req_t *req; |
4176 | 4176 | ||
4177 | if (current->flags & PF_FREEZE) | 4177 | if (current->flags & PF_FREEZE) |
4178 | refrigerator(PF_FREEZE); | 4178 | refrigerator(PF_FREEZE); |
4179 | 4179 | ||
4180 | spin_lock_irq(&rq->lock); | 4180 | spin_lock_irq(&rq->lock); |
4181 | 4181 | ||
4182 | if (cpu_is_offline(cpu)) { | 4182 | if (cpu_is_offline(cpu)) { |
4183 | spin_unlock_irq(&rq->lock); | 4183 | spin_unlock_irq(&rq->lock); |
4184 | goto wait_to_die; | 4184 | goto wait_to_die; |
4185 | } | 4185 | } |
4186 | 4186 | ||
4187 | if (rq->active_balance) { | 4187 | if (rq->active_balance) { |
4188 | active_load_balance(rq, cpu); | 4188 | active_load_balance(rq, cpu); |
4189 | rq->active_balance = 0; | 4189 | rq->active_balance = 0; |
4190 | } | 4190 | } |
4191 | 4191 | ||
4192 | head = &rq->migration_queue; | 4192 | head = &rq->migration_queue; |
4193 | 4193 | ||
4194 | if (list_empty(head)) { | 4194 | if (list_empty(head)) { |
4195 | spin_unlock_irq(&rq->lock); | 4195 | spin_unlock_irq(&rq->lock); |
4196 | schedule(); | 4196 | schedule(); |
4197 | set_current_state(TASK_INTERRUPTIBLE); | 4197 | set_current_state(TASK_INTERRUPTIBLE); |
4198 | continue; | 4198 | continue; |
4199 | } | 4199 | } |
4200 | req = list_entry(head->next, migration_req_t, list); | 4200 | req = list_entry(head->next, migration_req_t, list); |
4201 | list_del_init(head->next); | 4201 | list_del_init(head->next); |
4202 | 4202 | ||
4203 | if (req->type == REQ_MOVE_TASK) { | 4203 | if (req->type == REQ_MOVE_TASK) { |
4204 | spin_unlock(&rq->lock); | 4204 | spin_unlock(&rq->lock); |
4205 | __migrate_task(req->task, cpu, req->dest_cpu); | 4205 | __migrate_task(req->task, cpu, req->dest_cpu); |
4206 | local_irq_enable(); | 4206 | local_irq_enable(); |
4207 | } else if (req->type == REQ_SET_DOMAIN) { | 4207 | } else if (req->type == REQ_SET_DOMAIN) { |
4208 | rq->sd = req->sd; | 4208 | rq->sd = req->sd; |
4209 | spin_unlock_irq(&rq->lock); | 4209 | spin_unlock_irq(&rq->lock); |
4210 | } else { | 4210 | } else { |
4211 | spin_unlock_irq(&rq->lock); | 4211 | spin_unlock_irq(&rq->lock); |
4212 | WARN_ON(1); | 4212 | WARN_ON(1); |
4213 | } | 4213 | } |
4214 | 4214 | ||
4215 | complete(&req->done); | 4215 | complete(&req->done); |
4216 | } | 4216 | } |
4217 | __set_current_state(TASK_RUNNING); | 4217 | __set_current_state(TASK_RUNNING); |
4218 | return 0; | 4218 | return 0; |
4219 | 4219 | ||
4220 | wait_to_die: | 4220 | wait_to_die: |
4221 | /* Wait for kthread_stop */ | 4221 | /* Wait for kthread_stop */ |
4222 | set_current_state(TASK_INTERRUPTIBLE); | 4222 | set_current_state(TASK_INTERRUPTIBLE); |
4223 | while (!kthread_should_stop()) { | 4223 | while (!kthread_should_stop()) { |
4224 | schedule(); | 4224 | schedule(); |
4225 | set_current_state(TASK_INTERRUPTIBLE); | 4225 | set_current_state(TASK_INTERRUPTIBLE); |
4226 | } | 4226 | } |
4227 | __set_current_state(TASK_RUNNING); | 4227 | __set_current_state(TASK_RUNNING); |
4228 | return 0; | 4228 | return 0; |
4229 | } | 4229 | } |
4230 | 4230 | ||
4231 | #ifdef CONFIG_HOTPLUG_CPU | 4231 | #ifdef CONFIG_HOTPLUG_CPU |
4232 | /* Figure out where task on dead CPU should go, use force if neccessary. */ | 4232 | /* Figure out where task on dead CPU should go, use force if neccessary. */ |
4233 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) | 4233 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) |
4234 | { | 4234 | { |
4235 | int dest_cpu; | 4235 | int dest_cpu; |
4236 | cpumask_t mask; | 4236 | cpumask_t mask; |
4237 | 4237 | ||
4238 | /* On same node? */ | 4238 | /* On same node? */ |
4239 | mask = node_to_cpumask(cpu_to_node(dead_cpu)); | 4239 | mask = node_to_cpumask(cpu_to_node(dead_cpu)); |
4240 | cpus_and(mask, mask, tsk->cpus_allowed); | 4240 | cpus_and(mask, mask, tsk->cpus_allowed); |
4241 | dest_cpu = any_online_cpu(mask); | 4241 | dest_cpu = any_online_cpu(mask); |
4242 | 4242 | ||
4243 | /* On any allowed CPU? */ | 4243 | /* On any allowed CPU? */ |
4244 | if (dest_cpu == NR_CPUS) | 4244 | if (dest_cpu == NR_CPUS) |
4245 | dest_cpu = any_online_cpu(tsk->cpus_allowed); | 4245 | dest_cpu = any_online_cpu(tsk->cpus_allowed); |
4246 | 4246 | ||
4247 | /* No more Mr. Nice Guy. */ | 4247 | /* No more Mr. Nice Guy. */ |
4248 | if (dest_cpu == NR_CPUS) { | 4248 | if (dest_cpu == NR_CPUS) { |
4249 | cpus_setall(tsk->cpus_allowed); | 4249 | cpus_setall(tsk->cpus_allowed); |
4250 | dest_cpu = any_online_cpu(tsk->cpus_allowed); | 4250 | dest_cpu = any_online_cpu(tsk->cpus_allowed); |
4251 | 4251 | ||
4252 | /* | 4252 | /* |
4253 | * Don't tell them about moving exiting tasks or | 4253 | * Don't tell them about moving exiting tasks or |
4254 | * kernel threads (both mm NULL), since they never | 4254 | * kernel threads (both mm NULL), since they never |
4255 | * leave kernel. | 4255 | * leave kernel. |
4256 | */ | 4256 | */ |
4257 | if (tsk->mm && printk_ratelimit()) | 4257 | if (tsk->mm && printk_ratelimit()) |
4258 | printk(KERN_INFO "process %d (%s) no " | 4258 | printk(KERN_INFO "process %d (%s) no " |
4259 | "longer affine to cpu%d\n", | 4259 | "longer affine to cpu%d\n", |
4260 | tsk->pid, tsk->comm, dead_cpu); | 4260 | tsk->pid, tsk->comm, dead_cpu); |
4261 | } | 4261 | } |
4262 | __migrate_task(tsk, dead_cpu, dest_cpu); | 4262 | __migrate_task(tsk, dead_cpu, dest_cpu); |
4263 | } | 4263 | } |
4264 | 4264 | ||
4265 | /* | 4265 | /* |
4266 | * While a dead CPU has no uninterruptible tasks queued at this point, | 4266 | * While a dead CPU has no uninterruptible tasks queued at this point, |
4267 | * it might still have a nonzero ->nr_uninterruptible counter, because | 4267 | * it might still have a nonzero ->nr_uninterruptible counter, because |
4268 | * for performance reasons the counter is not stricly tracking tasks to | 4268 | * for performance reasons the counter is not stricly tracking tasks to |
4269 | * their home CPUs. So we just add the counter to another CPU's counter, | 4269 | * their home CPUs. So we just add the counter to another CPU's counter, |
4270 | * to keep the global sum constant after CPU-down: | 4270 | * to keep the global sum constant after CPU-down: |
4271 | */ | 4271 | */ |
4272 | static void migrate_nr_uninterruptible(runqueue_t *rq_src) | 4272 | static void migrate_nr_uninterruptible(runqueue_t *rq_src) |
4273 | { | 4273 | { |
4274 | runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); | 4274 | runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); |
4275 | unsigned long flags; | 4275 | unsigned long flags; |
4276 | 4276 | ||
4277 | local_irq_save(flags); | 4277 | local_irq_save(flags); |
4278 | double_rq_lock(rq_src, rq_dest); | 4278 | double_rq_lock(rq_src, rq_dest); |
4279 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; | 4279 | rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; |
4280 | rq_src->nr_uninterruptible = 0; | 4280 | rq_src->nr_uninterruptible = 0; |
4281 | double_rq_unlock(rq_src, rq_dest); | 4281 | double_rq_unlock(rq_src, rq_dest); |
4282 | local_irq_restore(flags); | 4282 | local_irq_restore(flags); |
4283 | } | 4283 | } |
4284 | 4284 | ||
4285 | /* Run through task list and migrate tasks from the dead cpu. */ | 4285 | /* Run through task list and migrate tasks from the dead cpu. */ |
4286 | static void migrate_live_tasks(int src_cpu) | 4286 | static void migrate_live_tasks(int src_cpu) |
4287 | { | 4287 | { |
4288 | struct task_struct *tsk, *t; | 4288 | struct task_struct *tsk, *t; |
4289 | 4289 | ||
4290 | write_lock_irq(&tasklist_lock); | 4290 | write_lock_irq(&tasklist_lock); |
4291 | 4291 | ||
4292 | do_each_thread(t, tsk) { | 4292 | do_each_thread(t, tsk) { |
4293 | if (tsk == current) | 4293 | if (tsk == current) |
4294 | continue; | 4294 | continue; |
4295 | 4295 | ||
4296 | if (task_cpu(tsk) == src_cpu) | 4296 | if (task_cpu(tsk) == src_cpu) |
4297 | move_task_off_dead_cpu(src_cpu, tsk); | 4297 | move_task_off_dead_cpu(src_cpu, tsk); |
4298 | } while_each_thread(t, tsk); | 4298 | } while_each_thread(t, tsk); |
4299 | 4299 | ||
4300 | write_unlock_irq(&tasklist_lock); | 4300 | write_unlock_irq(&tasklist_lock); |
4301 | } | 4301 | } |
4302 | 4302 | ||
4303 | /* Schedules idle task to be the next runnable task on current CPU. | 4303 | /* Schedules idle task to be the next runnable task on current CPU. |
4304 | * It does so by boosting its priority to highest possible and adding it to | 4304 | * It does so by boosting its priority to highest possible and adding it to |
4305 | * the _front_ of runqueue. Used by CPU offline code. | 4305 | * the _front_ of runqueue. Used by CPU offline code. |
4306 | */ | 4306 | */ |
4307 | void sched_idle_next(void) | 4307 | void sched_idle_next(void) |
4308 | { | 4308 | { |
4309 | int cpu = smp_processor_id(); | 4309 | int cpu = smp_processor_id(); |
4310 | runqueue_t *rq = this_rq(); | 4310 | runqueue_t *rq = this_rq(); |
4311 | struct task_struct *p = rq->idle; | 4311 | struct task_struct *p = rq->idle; |
4312 | unsigned long flags; | 4312 | unsigned long flags; |
4313 | 4313 | ||
4314 | /* cpu has to be offline */ | 4314 | /* cpu has to be offline */ |
4315 | BUG_ON(cpu_online(cpu)); | 4315 | BUG_ON(cpu_online(cpu)); |
4316 | 4316 | ||
4317 | /* Strictly not necessary since rest of the CPUs are stopped by now | 4317 | /* Strictly not necessary since rest of the CPUs are stopped by now |
4318 | * and interrupts disabled on current cpu. | 4318 | * and interrupts disabled on current cpu. |
4319 | */ | 4319 | */ |
4320 | spin_lock_irqsave(&rq->lock, flags); | 4320 | spin_lock_irqsave(&rq->lock, flags); |
4321 | 4321 | ||
4322 | __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); | 4322 | __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); |
4323 | /* Add idle task to _front_ of it's priority queue */ | 4323 | /* Add idle task to _front_ of it's priority queue */ |
4324 | __activate_idle_task(p, rq); | 4324 | __activate_idle_task(p, rq); |
4325 | 4325 | ||
4326 | spin_unlock_irqrestore(&rq->lock, flags); | 4326 | spin_unlock_irqrestore(&rq->lock, flags); |
4327 | } | 4327 | } |
4328 | 4328 | ||
4329 | /* Ensures that the idle task is using init_mm right before its cpu goes | 4329 | /* Ensures that the idle task is using init_mm right before its cpu goes |
4330 | * offline. | 4330 | * offline. |
4331 | */ | 4331 | */ |
4332 | void idle_task_exit(void) | 4332 | void idle_task_exit(void) |
4333 | { | 4333 | { |
4334 | struct mm_struct *mm = current->active_mm; | 4334 | struct mm_struct *mm = current->active_mm; |
4335 | 4335 | ||
4336 | BUG_ON(cpu_online(smp_processor_id())); | 4336 | BUG_ON(cpu_online(smp_processor_id())); |
4337 | 4337 | ||
4338 | if (mm != &init_mm) | 4338 | if (mm != &init_mm) |
4339 | switch_mm(mm, &init_mm, current); | 4339 | switch_mm(mm, &init_mm, current); |
4340 | mmdrop(mm); | 4340 | mmdrop(mm); |
4341 | } | 4341 | } |
4342 | 4342 | ||
4343 | static void migrate_dead(unsigned int dead_cpu, task_t *tsk) | 4343 | static void migrate_dead(unsigned int dead_cpu, task_t *tsk) |
4344 | { | 4344 | { |
4345 | struct runqueue *rq = cpu_rq(dead_cpu); | 4345 | struct runqueue *rq = cpu_rq(dead_cpu); |
4346 | 4346 | ||
4347 | /* Must be exiting, otherwise would be on tasklist. */ | 4347 | /* Must be exiting, otherwise would be on tasklist. */ |
4348 | BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); | 4348 | BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); |
4349 | 4349 | ||
4350 | /* Cannot have done final schedule yet: would have vanished. */ | 4350 | /* Cannot have done final schedule yet: would have vanished. */ |
4351 | BUG_ON(tsk->flags & PF_DEAD); | 4351 | BUG_ON(tsk->flags & PF_DEAD); |
4352 | 4352 | ||
4353 | get_task_struct(tsk); | 4353 | get_task_struct(tsk); |
4354 | 4354 | ||
4355 | /* | 4355 | /* |
4356 | * Drop lock around migration; if someone else moves it, | 4356 | * Drop lock around migration; if someone else moves it, |
4357 | * that's OK. No task can be added to this CPU, so iteration is | 4357 | * that's OK. No task can be added to this CPU, so iteration is |
4358 | * fine. | 4358 | * fine. |
4359 | */ | 4359 | */ |
4360 | spin_unlock_irq(&rq->lock); | 4360 | spin_unlock_irq(&rq->lock); |
4361 | move_task_off_dead_cpu(dead_cpu, tsk); | 4361 | move_task_off_dead_cpu(dead_cpu, tsk); |
4362 | spin_lock_irq(&rq->lock); | 4362 | spin_lock_irq(&rq->lock); |
4363 | 4363 | ||
4364 | put_task_struct(tsk); | 4364 | put_task_struct(tsk); |
4365 | } | 4365 | } |
4366 | 4366 | ||
4367 | /* release_task() removes task from tasklist, so we won't find dead tasks. */ | 4367 | /* release_task() removes task from tasklist, so we won't find dead tasks. */ |
4368 | static void migrate_dead_tasks(unsigned int dead_cpu) | 4368 | static void migrate_dead_tasks(unsigned int dead_cpu) |
4369 | { | 4369 | { |
4370 | unsigned arr, i; | 4370 | unsigned arr, i; |
4371 | struct runqueue *rq = cpu_rq(dead_cpu); | 4371 | struct runqueue *rq = cpu_rq(dead_cpu); |
4372 | 4372 | ||
4373 | for (arr = 0; arr < 2; arr++) { | 4373 | for (arr = 0; arr < 2; arr++) { |
4374 | for (i = 0; i < MAX_PRIO; i++) { | 4374 | for (i = 0; i < MAX_PRIO; i++) { |
4375 | struct list_head *list = &rq->arrays[arr].queue[i]; | 4375 | struct list_head *list = &rq->arrays[arr].queue[i]; |
4376 | while (!list_empty(list)) | 4376 | while (!list_empty(list)) |
4377 | migrate_dead(dead_cpu, | 4377 | migrate_dead(dead_cpu, |
4378 | list_entry(list->next, task_t, | 4378 | list_entry(list->next, task_t, |
4379 | run_list)); | 4379 | run_list)); |
4380 | } | 4380 | } |
4381 | } | 4381 | } |
4382 | } | 4382 | } |
4383 | #endif /* CONFIG_HOTPLUG_CPU */ | 4383 | #endif /* CONFIG_HOTPLUG_CPU */ |
4384 | 4384 | ||
4385 | /* | 4385 | /* |
4386 | * migration_call - callback that gets triggered when a CPU is added. | 4386 | * migration_call - callback that gets triggered when a CPU is added. |
4387 | * Here we can start up the necessary migration thread for the new CPU. | 4387 | * Here we can start up the necessary migration thread for the new CPU. |
4388 | */ | 4388 | */ |
4389 | static int migration_call(struct notifier_block *nfb, unsigned long action, | 4389 | static int migration_call(struct notifier_block *nfb, unsigned long action, |
4390 | void *hcpu) | 4390 | void *hcpu) |
4391 | { | 4391 | { |
4392 | int cpu = (long)hcpu; | 4392 | int cpu = (long)hcpu; |
4393 | struct task_struct *p; | 4393 | struct task_struct *p; |
4394 | struct runqueue *rq; | 4394 | struct runqueue *rq; |
4395 | unsigned long flags; | 4395 | unsigned long flags; |
4396 | 4396 | ||
4397 | switch (action) { | 4397 | switch (action) { |
4398 | case CPU_UP_PREPARE: | 4398 | case CPU_UP_PREPARE: |
4399 | p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); | 4399 | p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); |
4400 | if (IS_ERR(p)) | 4400 | if (IS_ERR(p)) |
4401 | return NOTIFY_BAD; | 4401 | return NOTIFY_BAD; |
4402 | p->flags |= PF_NOFREEZE; | 4402 | p->flags |= PF_NOFREEZE; |
4403 | kthread_bind(p, cpu); | 4403 | kthread_bind(p, cpu); |
4404 | /* Must be high prio: stop_machine expects to yield to it. */ | 4404 | /* Must be high prio: stop_machine expects to yield to it. */ |
4405 | rq = task_rq_lock(p, &flags); | 4405 | rq = task_rq_lock(p, &flags); |
4406 | __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); | 4406 | __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); |
4407 | task_rq_unlock(rq, &flags); | 4407 | task_rq_unlock(rq, &flags); |
4408 | cpu_rq(cpu)->migration_thread = p; | 4408 | cpu_rq(cpu)->migration_thread = p; |
4409 | break; | 4409 | break; |
4410 | case CPU_ONLINE: | 4410 | case CPU_ONLINE: |
4411 | /* Strictly unneccessary, as first user will wake it. */ | 4411 | /* Strictly unneccessary, as first user will wake it. */ |
4412 | wake_up_process(cpu_rq(cpu)->migration_thread); | 4412 | wake_up_process(cpu_rq(cpu)->migration_thread); |
4413 | break; | 4413 | break; |
4414 | #ifdef CONFIG_HOTPLUG_CPU | 4414 | #ifdef CONFIG_HOTPLUG_CPU |
4415 | case CPU_UP_CANCELED: | 4415 | case CPU_UP_CANCELED: |
4416 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 4416 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
4417 | kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); | 4417 | kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); |
4418 | kthread_stop(cpu_rq(cpu)->migration_thread); | 4418 | kthread_stop(cpu_rq(cpu)->migration_thread); |
4419 | cpu_rq(cpu)->migration_thread = NULL; | 4419 | cpu_rq(cpu)->migration_thread = NULL; |
4420 | break; | 4420 | break; |
4421 | case CPU_DEAD: | 4421 | case CPU_DEAD: |
4422 | migrate_live_tasks(cpu); | 4422 | migrate_live_tasks(cpu); |
4423 | rq = cpu_rq(cpu); | 4423 | rq = cpu_rq(cpu); |
4424 | kthread_stop(rq->migration_thread); | 4424 | kthread_stop(rq->migration_thread); |
4425 | rq->migration_thread = NULL; | 4425 | rq->migration_thread = NULL; |
4426 | /* Idle task back to normal (off runqueue, low prio) */ | 4426 | /* Idle task back to normal (off runqueue, low prio) */ |
4427 | rq = task_rq_lock(rq->idle, &flags); | 4427 | rq = task_rq_lock(rq->idle, &flags); |
4428 | deactivate_task(rq->idle, rq); | 4428 | deactivate_task(rq->idle, rq); |
4429 | rq->idle->static_prio = MAX_PRIO; | 4429 | rq->idle->static_prio = MAX_PRIO; |
4430 | __setscheduler(rq->idle, SCHED_NORMAL, 0); | 4430 | __setscheduler(rq->idle, SCHED_NORMAL, 0); |
4431 | migrate_dead_tasks(cpu); | 4431 | migrate_dead_tasks(cpu); |
4432 | task_rq_unlock(rq, &flags); | 4432 | task_rq_unlock(rq, &flags); |
4433 | migrate_nr_uninterruptible(rq); | 4433 | migrate_nr_uninterruptible(rq); |
4434 | BUG_ON(rq->nr_running != 0); | 4434 | BUG_ON(rq->nr_running != 0); |
4435 | 4435 | ||
4436 | /* No need to migrate the tasks: it was best-effort if | 4436 | /* No need to migrate the tasks: it was best-effort if |
4437 | * they didn't do lock_cpu_hotplug(). Just wake up | 4437 | * they didn't do lock_cpu_hotplug(). Just wake up |
4438 | * the requestors. */ | 4438 | * the requestors. */ |
4439 | spin_lock_irq(&rq->lock); | 4439 | spin_lock_irq(&rq->lock); |
4440 | while (!list_empty(&rq->migration_queue)) { | 4440 | while (!list_empty(&rq->migration_queue)) { |
4441 | migration_req_t *req; | 4441 | migration_req_t *req; |
4442 | req = list_entry(rq->migration_queue.next, | 4442 | req = list_entry(rq->migration_queue.next, |
4443 | migration_req_t, list); | 4443 | migration_req_t, list); |
4444 | BUG_ON(req->type != REQ_MOVE_TASK); | 4444 | BUG_ON(req->type != REQ_MOVE_TASK); |
4445 | list_del_init(&req->list); | 4445 | list_del_init(&req->list); |
4446 | complete(&req->done); | 4446 | complete(&req->done); |
4447 | } | 4447 | } |
4448 | spin_unlock_irq(&rq->lock); | 4448 | spin_unlock_irq(&rq->lock); |
4449 | break; | 4449 | break; |
4450 | #endif | 4450 | #endif |
4451 | } | 4451 | } |
4452 | return NOTIFY_OK; | 4452 | return NOTIFY_OK; |
4453 | } | 4453 | } |
4454 | 4454 | ||
4455 | /* Register at highest priority so that task migration (migrate_all_tasks) | 4455 | /* Register at highest priority so that task migration (migrate_all_tasks) |
4456 | * happens before everything else. | 4456 | * happens before everything else. |
4457 | */ | 4457 | */ |
4458 | static struct notifier_block __devinitdata migration_notifier = { | 4458 | static struct notifier_block __devinitdata migration_notifier = { |
4459 | .notifier_call = migration_call, | 4459 | .notifier_call = migration_call, |
4460 | .priority = 10 | 4460 | .priority = 10 |
4461 | }; | 4461 | }; |
4462 | 4462 | ||
4463 | int __init migration_init(void) | 4463 | int __init migration_init(void) |
4464 | { | 4464 | { |
4465 | void *cpu = (void *)(long)smp_processor_id(); | 4465 | void *cpu = (void *)(long)smp_processor_id(); |
4466 | /* Start one for boot CPU. */ | 4466 | /* Start one for boot CPU. */ |
4467 | migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); | 4467 | migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); |
4468 | migration_call(&migration_notifier, CPU_ONLINE, cpu); | 4468 | migration_call(&migration_notifier, CPU_ONLINE, cpu); |
4469 | register_cpu_notifier(&migration_notifier); | 4469 | register_cpu_notifier(&migration_notifier); |
4470 | return 0; | 4470 | return 0; |
4471 | } | 4471 | } |
4472 | #endif | 4472 | #endif |
4473 | 4473 | ||
4474 | #ifdef CONFIG_SMP | 4474 | #ifdef CONFIG_SMP |
4475 | #define SCHED_DOMAIN_DEBUG | 4475 | #define SCHED_DOMAIN_DEBUG |
4476 | #ifdef SCHED_DOMAIN_DEBUG | 4476 | #ifdef SCHED_DOMAIN_DEBUG |
4477 | static void sched_domain_debug(struct sched_domain *sd, int cpu) | 4477 | static void sched_domain_debug(struct sched_domain *sd, int cpu) |
4478 | { | 4478 | { |
4479 | int level = 0; | 4479 | int level = 0; |
4480 | 4480 | ||
4481 | printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); | 4481 | printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); |
4482 | 4482 | ||
4483 | do { | 4483 | do { |
4484 | int i; | 4484 | int i; |
4485 | char str[NR_CPUS]; | 4485 | char str[NR_CPUS]; |
4486 | struct sched_group *group = sd->groups; | 4486 | struct sched_group *group = sd->groups; |
4487 | cpumask_t groupmask; | 4487 | cpumask_t groupmask; |
4488 | 4488 | ||
4489 | cpumask_scnprintf(str, NR_CPUS, sd->span); | 4489 | cpumask_scnprintf(str, NR_CPUS, sd->span); |
4490 | cpus_clear(groupmask); | 4490 | cpus_clear(groupmask); |
4491 | 4491 | ||
4492 | printk(KERN_DEBUG); | 4492 | printk(KERN_DEBUG); |
4493 | for (i = 0; i < level + 1; i++) | 4493 | for (i = 0; i < level + 1; i++) |
4494 | printk(" "); | 4494 | printk(" "); |
4495 | printk("domain %d: ", level); | 4495 | printk("domain %d: ", level); |
4496 | 4496 | ||
4497 | if (!(sd->flags & SD_LOAD_BALANCE)) { | 4497 | if (!(sd->flags & SD_LOAD_BALANCE)) { |
4498 | printk("does not load-balance\n"); | 4498 | printk("does not load-balance\n"); |
4499 | if (sd->parent) | 4499 | if (sd->parent) |
4500 | printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); | 4500 | printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent"); |
4501 | break; | 4501 | break; |
4502 | } | 4502 | } |
4503 | 4503 | ||
4504 | printk("span %s\n", str); | 4504 | printk("span %s\n", str); |
4505 | 4505 | ||
4506 | if (!cpu_isset(cpu, sd->span)) | 4506 | if (!cpu_isset(cpu, sd->span)) |
4507 | printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); | 4507 | printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); |
4508 | if (!cpu_isset(cpu, group->cpumask)) | 4508 | if (!cpu_isset(cpu, group->cpumask)) |
4509 | printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); | 4509 | printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); |
4510 | 4510 | ||
4511 | printk(KERN_DEBUG); | 4511 | printk(KERN_DEBUG); |
4512 | for (i = 0; i < level + 2; i++) | 4512 | for (i = 0; i < level + 2; i++) |
4513 | printk(" "); | 4513 | printk(" "); |
4514 | printk("groups:"); | 4514 | printk("groups:"); |
4515 | do { | 4515 | do { |
4516 | if (!group) { | 4516 | if (!group) { |
4517 | printk("\n"); | 4517 | printk("\n"); |
4518 | printk(KERN_ERR "ERROR: group is NULL\n"); | 4518 | printk(KERN_ERR "ERROR: group is NULL\n"); |
4519 | break; | 4519 | break; |
4520 | } | 4520 | } |
4521 | 4521 | ||
4522 | if (!group->cpu_power) { | 4522 | if (!group->cpu_power) { |
4523 | printk("\n"); | 4523 | printk("\n"); |
4524 | printk(KERN_ERR "ERROR: domain->cpu_power not set\n"); | 4524 | printk(KERN_ERR "ERROR: domain->cpu_power not set\n"); |
4525 | } | 4525 | } |
4526 | 4526 | ||
4527 | if (!cpus_weight(group->cpumask)) { | 4527 | if (!cpus_weight(group->cpumask)) { |
4528 | printk("\n"); | 4528 | printk("\n"); |
4529 | printk(KERN_ERR "ERROR: empty group\n"); | 4529 | printk(KERN_ERR "ERROR: empty group\n"); |
4530 | } | 4530 | } |
4531 | 4531 | ||
4532 | if (cpus_intersects(groupmask, group->cpumask)) { | 4532 | if (cpus_intersects(groupmask, group->cpumask)) { |
4533 | printk("\n"); | 4533 | printk("\n"); |
4534 | printk(KERN_ERR "ERROR: repeated CPUs\n"); | 4534 | printk(KERN_ERR "ERROR: repeated CPUs\n"); |
4535 | } | 4535 | } |
4536 | 4536 | ||
4537 | cpus_or(groupmask, groupmask, group->cpumask); | 4537 | cpus_or(groupmask, groupmask, group->cpumask); |
4538 | 4538 | ||
4539 | cpumask_scnprintf(str, NR_CPUS, group->cpumask); | 4539 | cpumask_scnprintf(str, NR_CPUS, group->cpumask); |
4540 | printk(" %s", str); | 4540 | printk(" %s", str); |
4541 | 4541 | ||
4542 | group = group->next; | 4542 | group = group->next; |
4543 | } while (group != sd->groups); | 4543 | } while (group != sd->groups); |
4544 | printk("\n"); | 4544 | printk("\n"); |
4545 | 4545 | ||
4546 | if (!cpus_equal(sd->span, groupmask)) | 4546 | if (!cpus_equal(sd->span, groupmask)) |
4547 | printk(KERN_ERR "ERROR: groups don't span domain->span\n"); | 4547 | printk(KERN_ERR "ERROR: groups don't span domain->span\n"); |
4548 | 4548 | ||
4549 | level++; | 4549 | level++; |
4550 | sd = sd->parent; | 4550 | sd = sd->parent; |
4551 | 4551 | ||
4552 | if (sd) { | 4552 | if (sd) { |
4553 | if (!cpus_subset(groupmask, sd->span)) | 4553 | if (!cpus_subset(groupmask, sd->span)) |
4554 | printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); | 4554 | printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); |
4555 | } | 4555 | } |
4556 | 4556 | ||
4557 | } while (sd); | 4557 | } while (sd); |
4558 | } | 4558 | } |
4559 | #else | 4559 | #else |
4560 | #define sched_domain_debug(sd, cpu) {} | 4560 | #define sched_domain_debug(sd, cpu) {} |
4561 | #endif | 4561 | #endif |
4562 | 4562 | ||
4563 | /* | 4563 | /* |
4564 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must | 4564 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must |
4565 | * hold the hotplug lock. | 4565 | * hold the hotplug lock. |
4566 | */ | 4566 | */ |
4567 | void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) | 4567 | void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) |
4568 | { | 4568 | { |
4569 | migration_req_t req; | 4569 | migration_req_t req; |
4570 | unsigned long flags; | 4570 | unsigned long flags; |
4571 | runqueue_t *rq = cpu_rq(cpu); | 4571 | runqueue_t *rq = cpu_rq(cpu); |
4572 | int local = 1; | 4572 | int local = 1; |
4573 | 4573 | ||
4574 | sched_domain_debug(sd, cpu); | 4574 | sched_domain_debug(sd, cpu); |
4575 | 4575 | ||
4576 | spin_lock_irqsave(&rq->lock, flags); | 4576 | spin_lock_irqsave(&rq->lock, flags); |
4577 | 4577 | ||
4578 | if (cpu == smp_processor_id() || !cpu_online(cpu)) { | 4578 | if (cpu == smp_processor_id() || !cpu_online(cpu)) { |
4579 | rq->sd = sd; | 4579 | rq->sd = sd; |
4580 | } else { | 4580 | } else { |
4581 | init_completion(&req.done); | 4581 | init_completion(&req.done); |
4582 | req.type = REQ_SET_DOMAIN; | 4582 | req.type = REQ_SET_DOMAIN; |
4583 | req.sd = sd; | 4583 | req.sd = sd; |
4584 | list_add(&req.list, &rq->migration_queue); | 4584 | list_add(&req.list, &rq->migration_queue); |
4585 | local = 0; | 4585 | local = 0; |
4586 | } | 4586 | } |
4587 | 4587 | ||
4588 | spin_unlock_irqrestore(&rq->lock, flags); | 4588 | spin_unlock_irqrestore(&rq->lock, flags); |
4589 | 4589 | ||
4590 | if (!local) { | 4590 | if (!local) { |
4591 | wake_up_process(rq->migration_thread); | 4591 | wake_up_process(rq->migration_thread); |
4592 | wait_for_completion(&req.done); | 4592 | wait_for_completion(&req.done); |
4593 | } | 4593 | } |
4594 | } | 4594 | } |
4595 | 4595 | ||
4596 | /* cpus with isolated domains */ | 4596 | /* cpus with isolated domains */ |
4597 | cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; | 4597 | cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; |
4598 | 4598 | ||
4599 | /* Setup the mask of cpus configured for isolated domains */ | 4599 | /* Setup the mask of cpus configured for isolated domains */ |
4600 | static int __init isolated_cpu_setup(char *str) | 4600 | static int __init isolated_cpu_setup(char *str) |
4601 | { | 4601 | { |
4602 | int ints[NR_CPUS], i; | 4602 | int ints[NR_CPUS], i; |
4603 | 4603 | ||
4604 | str = get_options(str, ARRAY_SIZE(ints), ints); | 4604 | str = get_options(str, ARRAY_SIZE(ints), ints); |
4605 | cpus_clear(cpu_isolated_map); | 4605 | cpus_clear(cpu_isolated_map); |
4606 | for (i = 1; i <= ints[0]; i++) | 4606 | for (i = 1; i <= ints[0]; i++) |
4607 | if (ints[i] < NR_CPUS) | 4607 | if (ints[i] < NR_CPUS) |
4608 | cpu_set(ints[i], cpu_isolated_map); | 4608 | cpu_set(ints[i], cpu_isolated_map); |
4609 | return 1; | 4609 | return 1; |
4610 | } | 4610 | } |
4611 | 4611 | ||
4612 | __setup ("isolcpus=", isolated_cpu_setup); | 4612 | __setup ("isolcpus=", isolated_cpu_setup); |
4613 | 4613 | ||
4614 | /* | 4614 | /* |
4615 | * init_sched_build_groups takes an array of groups, the cpumask we wish | 4615 | * init_sched_build_groups takes an array of groups, the cpumask we wish |
4616 | * to span, and a pointer to a function which identifies what group a CPU | 4616 | * to span, and a pointer to a function which identifies what group a CPU |
4617 | * belongs to. The return value of group_fn must be a valid index into the | 4617 | * belongs to. The return value of group_fn must be a valid index into the |
4618 | * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we | 4618 | * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we |
4619 | * keep track of groups covered with a cpumask_t). | 4619 | * keep track of groups covered with a cpumask_t). |
4620 | * | 4620 | * |
4621 | * init_sched_build_groups will build a circular linked list of the groups | 4621 | * init_sched_build_groups will build a circular linked list of the groups |
4622 | * covered by the given span, and will set each group's ->cpumask correctly, | 4622 | * covered by the given span, and will set each group's ->cpumask correctly, |
4623 | * and ->cpu_power to 0. | 4623 | * and ->cpu_power to 0. |
4624 | */ | 4624 | */ |
4625 | void __devinit init_sched_build_groups(struct sched_group groups[], | 4625 | void __devinit init_sched_build_groups(struct sched_group groups[], |
4626 | cpumask_t span, int (*group_fn)(int cpu)) | 4626 | cpumask_t span, int (*group_fn)(int cpu)) |
4627 | { | 4627 | { |
4628 | struct sched_group *first = NULL, *last = NULL; | 4628 | struct sched_group *first = NULL, *last = NULL; |
4629 | cpumask_t covered = CPU_MASK_NONE; | 4629 | cpumask_t covered = CPU_MASK_NONE; |
4630 | int i; | 4630 | int i; |
4631 | 4631 | ||
4632 | for_each_cpu_mask(i, span) { | 4632 | for_each_cpu_mask(i, span) { |
4633 | int group = group_fn(i); | 4633 | int group = group_fn(i); |
4634 | struct sched_group *sg = &groups[group]; | 4634 | struct sched_group *sg = &groups[group]; |
4635 | int j; | 4635 | int j; |
4636 | 4636 | ||
4637 | if (cpu_isset(i, covered)) | 4637 | if (cpu_isset(i, covered)) |
4638 | continue; | 4638 | continue; |
4639 | 4639 | ||
4640 | sg->cpumask = CPU_MASK_NONE; | 4640 | sg->cpumask = CPU_MASK_NONE; |
4641 | sg->cpu_power = 0; | 4641 | sg->cpu_power = 0; |
4642 | 4642 | ||
4643 | for_each_cpu_mask(j, span) { | 4643 | for_each_cpu_mask(j, span) { |
4644 | if (group_fn(j) != group) | 4644 | if (group_fn(j) != group) |
4645 | continue; | 4645 | continue; |
4646 | 4646 | ||
4647 | cpu_set(j, covered); | 4647 | cpu_set(j, covered); |
4648 | cpu_set(j, sg->cpumask); | 4648 | cpu_set(j, sg->cpumask); |
4649 | } | 4649 | } |
4650 | if (!first) | 4650 | if (!first) |
4651 | first = sg; | 4651 | first = sg; |
4652 | if (last) | 4652 | if (last) |
4653 | last->next = sg; | 4653 | last->next = sg; |
4654 | last = sg; | 4654 | last = sg; |
4655 | } | 4655 | } |
4656 | last->next = first; | 4656 | last->next = first; |
4657 | } | 4657 | } |
4658 | 4658 | ||
4659 | 4659 | ||
4660 | #ifdef ARCH_HAS_SCHED_DOMAIN | 4660 | #ifdef ARCH_HAS_SCHED_DOMAIN |
4661 | extern void __devinit arch_init_sched_domains(void); | 4661 | extern void __devinit arch_init_sched_domains(void); |
4662 | extern void __devinit arch_destroy_sched_domains(void); | 4662 | extern void __devinit arch_destroy_sched_domains(void); |
4663 | #else | 4663 | #else |
4664 | #ifdef CONFIG_SCHED_SMT | 4664 | #ifdef CONFIG_SCHED_SMT |
4665 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | 4665 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); |
4666 | static struct sched_group sched_group_cpus[NR_CPUS]; | 4666 | static struct sched_group sched_group_cpus[NR_CPUS]; |
4667 | static int __devinit cpu_to_cpu_group(int cpu) | 4667 | static int __devinit cpu_to_cpu_group(int cpu) |
4668 | { | 4668 | { |
4669 | return cpu; | 4669 | return cpu; |
4670 | } | 4670 | } |
4671 | #endif | 4671 | #endif |
4672 | 4672 | ||
4673 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 4673 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); |
4674 | static struct sched_group sched_group_phys[NR_CPUS]; | 4674 | static struct sched_group sched_group_phys[NR_CPUS]; |
4675 | static int __devinit cpu_to_phys_group(int cpu) | 4675 | static int __devinit cpu_to_phys_group(int cpu) |
4676 | { | 4676 | { |
4677 | #ifdef CONFIG_SCHED_SMT | 4677 | #ifdef CONFIG_SCHED_SMT |
4678 | return first_cpu(cpu_sibling_map[cpu]); | 4678 | return first_cpu(cpu_sibling_map[cpu]); |
4679 | #else | 4679 | #else |
4680 | return cpu; | 4680 | return cpu; |
4681 | #endif | 4681 | #endif |
4682 | } | 4682 | } |
4683 | 4683 | ||
4684 | #ifdef CONFIG_NUMA | 4684 | #ifdef CONFIG_NUMA |
4685 | 4685 | ||
4686 | static DEFINE_PER_CPU(struct sched_domain, node_domains); | 4686 | static DEFINE_PER_CPU(struct sched_domain, node_domains); |
4687 | static struct sched_group sched_group_nodes[MAX_NUMNODES]; | 4687 | static struct sched_group sched_group_nodes[MAX_NUMNODES]; |
4688 | static int __devinit cpu_to_node_group(int cpu) | 4688 | static int __devinit cpu_to_node_group(int cpu) |
4689 | { | 4689 | { |
4690 | return cpu_to_node(cpu); | 4690 | return cpu_to_node(cpu); |
4691 | } | 4691 | } |
4692 | #endif | 4692 | #endif |
4693 | 4693 | ||
4694 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) | 4694 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) |
4695 | /* | 4695 | /* |
4696 | * The domains setup code relies on siblings not spanning | 4696 | * The domains setup code relies on siblings not spanning |
4697 | * multiple nodes. Make sure the architecture has a proper | 4697 | * multiple nodes. Make sure the architecture has a proper |
4698 | * siblings map: | 4698 | * siblings map: |
4699 | */ | 4699 | */ |
4700 | static void check_sibling_maps(void) | 4700 | static void check_sibling_maps(void) |
4701 | { | 4701 | { |
4702 | int i, j; | 4702 | int i, j; |
4703 | 4703 | ||
4704 | for_each_online_cpu(i) { | 4704 | for_each_online_cpu(i) { |
4705 | for_each_cpu_mask(j, cpu_sibling_map[i]) { | 4705 | for_each_cpu_mask(j, cpu_sibling_map[i]) { |
4706 | if (cpu_to_node(i) != cpu_to_node(j)) { | 4706 | if (cpu_to_node(i) != cpu_to_node(j)) { |
4707 | printk(KERN_INFO "warning: CPU %d siblings map " | 4707 | printk(KERN_INFO "warning: CPU %d siblings map " |
4708 | "to different node - isolating " | 4708 | "to different node - isolating " |
4709 | "them.\n", i); | 4709 | "them.\n", i); |
4710 | cpu_sibling_map[i] = cpumask_of_cpu(i); | 4710 | cpu_sibling_map[i] = cpumask_of_cpu(i); |
4711 | break; | 4711 | break; |
4712 | } | 4712 | } |
4713 | } | 4713 | } |
4714 | } | 4714 | } |
4715 | } | 4715 | } |
4716 | #endif | 4716 | #endif |
4717 | 4717 | ||
4718 | /* | 4718 | /* |
4719 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 4719 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
4720 | */ | 4720 | */ |
4721 | static void __devinit arch_init_sched_domains(void) | 4721 | static void __devinit arch_init_sched_domains(void) |
4722 | { | 4722 | { |
4723 | int i; | 4723 | int i; |
4724 | cpumask_t cpu_default_map; | 4724 | cpumask_t cpu_default_map; |
4725 | 4725 | ||
4726 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) | 4726 | #if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) |
4727 | check_sibling_maps(); | 4727 | check_sibling_maps(); |
4728 | #endif | 4728 | #endif |
4729 | /* | 4729 | /* |
4730 | * Setup mask for cpus without special case scheduling requirements. | 4730 | * Setup mask for cpus without special case scheduling requirements. |
4731 | * For now this just excludes isolated cpus, but could be used to | 4731 | * For now this just excludes isolated cpus, but could be used to |
4732 | * exclude other special cases in the future. | 4732 | * exclude other special cases in the future. |
4733 | */ | 4733 | */ |
4734 | cpus_complement(cpu_default_map, cpu_isolated_map); | 4734 | cpus_complement(cpu_default_map, cpu_isolated_map); |
4735 | cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); | 4735 | cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); |
4736 | 4736 | ||
4737 | /* | 4737 | /* |
4738 | * Set up domains. Isolated domains just stay on the dummy domain. | 4738 | * Set up domains. Isolated domains just stay on the dummy domain. |
4739 | */ | 4739 | */ |
4740 | for_each_cpu_mask(i, cpu_default_map) { | 4740 | for_each_cpu_mask(i, cpu_default_map) { |
4741 | int group; | 4741 | int group; |
4742 | struct sched_domain *sd = NULL, *p; | 4742 | struct sched_domain *sd = NULL, *p; |
4743 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); | 4743 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); |
4744 | 4744 | ||
4745 | cpus_and(nodemask, nodemask, cpu_default_map); | 4745 | cpus_and(nodemask, nodemask, cpu_default_map); |
4746 | 4746 | ||
4747 | #ifdef CONFIG_NUMA | 4747 | #ifdef CONFIG_NUMA |
4748 | sd = &per_cpu(node_domains, i); | 4748 | sd = &per_cpu(node_domains, i); |
4749 | group = cpu_to_node_group(i); | 4749 | group = cpu_to_node_group(i); |
4750 | *sd = SD_NODE_INIT; | 4750 | *sd = SD_NODE_INIT; |
4751 | sd->span = cpu_default_map; | 4751 | sd->span = cpu_default_map; |
4752 | sd->groups = &sched_group_nodes[group]; | 4752 | sd->groups = &sched_group_nodes[group]; |
4753 | #endif | 4753 | #endif |
4754 | 4754 | ||
4755 | p = sd; | 4755 | p = sd; |
4756 | sd = &per_cpu(phys_domains, i); | 4756 | sd = &per_cpu(phys_domains, i); |
4757 | group = cpu_to_phys_group(i); | 4757 | group = cpu_to_phys_group(i); |
4758 | *sd = SD_CPU_INIT; | 4758 | *sd = SD_CPU_INIT; |
4759 | sd->span = nodemask; | 4759 | sd->span = nodemask; |
4760 | sd->parent = p; | 4760 | sd->parent = p; |
4761 | sd->groups = &sched_group_phys[group]; | 4761 | sd->groups = &sched_group_phys[group]; |
4762 | 4762 | ||
4763 | #ifdef CONFIG_SCHED_SMT | 4763 | #ifdef CONFIG_SCHED_SMT |
4764 | p = sd; | 4764 | p = sd; |
4765 | sd = &per_cpu(cpu_domains, i); | 4765 | sd = &per_cpu(cpu_domains, i); |
4766 | group = cpu_to_cpu_group(i); | 4766 | group = cpu_to_cpu_group(i); |
4767 | *sd = SD_SIBLING_INIT; | 4767 | *sd = SD_SIBLING_INIT; |
4768 | sd->span = cpu_sibling_map[i]; | 4768 | sd->span = cpu_sibling_map[i]; |
4769 | cpus_and(sd->span, sd->span, cpu_default_map); | 4769 | cpus_and(sd->span, sd->span, cpu_default_map); |
4770 | sd->parent = p; | 4770 | sd->parent = p; |
4771 | sd->groups = &sched_group_cpus[group]; | 4771 | sd->groups = &sched_group_cpus[group]; |
4772 | #endif | 4772 | #endif |
4773 | } | 4773 | } |
4774 | 4774 | ||
4775 | #ifdef CONFIG_SCHED_SMT | 4775 | #ifdef CONFIG_SCHED_SMT |
4776 | /* Set up CPU (sibling) groups */ | 4776 | /* Set up CPU (sibling) groups */ |
4777 | for_each_online_cpu(i) { | 4777 | for_each_online_cpu(i) { |
4778 | cpumask_t this_sibling_map = cpu_sibling_map[i]; | 4778 | cpumask_t this_sibling_map = cpu_sibling_map[i]; |
4779 | cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); | 4779 | cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); |
4780 | if (i != first_cpu(this_sibling_map)) | 4780 | if (i != first_cpu(this_sibling_map)) |
4781 | continue; | 4781 | continue; |
4782 | 4782 | ||
4783 | init_sched_build_groups(sched_group_cpus, this_sibling_map, | 4783 | init_sched_build_groups(sched_group_cpus, this_sibling_map, |
4784 | &cpu_to_cpu_group); | 4784 | &cpu_to_cpu_group); |
4785 | } | 4785 | } |
4786 | #endif | 4786 | #endif |
4787 | 4787 | ||
4788 | /* Set up physical groups */ | 4788 | /* Set up physical groups */ |
4789 | for (i = 0; i < MAX_NUMNODES; i++) { | 4789 | for (i = 0; i < MAX_NUMNODES; i++) { |
4790 | cpumask_t nodemask = node_to_cpumask(i); | 4790 | cpumask_t nodemask = node_to_cpumask(i); |
4791 | 4791 | ||
4792 | cpus_and(nodemask, nodemask, cpu_default_map); | 4792 | cpus_and(nodemask, nodemask, cpu_default_map); |
4793 | if (cpus_empty(nodemask)) | 4793 | if (cpus_empty(nodemask)) |
4794 | continue; | 4794 | continue; |
4795 | 4795 | ||
4796 | init_sched_build_groups(sched_group_phys, nodemask, | 4796 | init_sched_build_groups(sched_group_phys, nodemask, |
4797 | &cpu_to_phys_group); | 4797 | &cpu_to_phys_group); |
4798 | } | 4798 | } |
4799 | 4799 | ||
4800 | #ifdef CONFIG_NUMA | 4800 | #ifdef CONFIG_NUMA |
4801 | /* Set up node groups */ | 4801 | /* Set up node groups */ |
4802 | init_sched_build_groups(sched_group_nodes, cpu_default_map, | 4802 | init_sched_build_groups(sched_group_nodes, cpu_default_map, |
4803 | &cpu_to_node_group); | 4803 | &cpu_to_node_group); |
4804 | #endif | 4804 | #endif |
4805 | 4805 | ||
4806 | /* Calculate CPU power for physical packages and nodes */ | 4806 | /* Calculate CPU power for physical packages and nodes */ |
4807 | for_each_cpu_mask(i, cpu_default_map) { | 4807 | for_each_cpu_mask(i, cpu_default_map) { |
4808 | int power; | 4808 | int power; |
4809 | struct sched_domain *sd; | 4809 | struct sched_domain *sd; |
4810 | #ifdef CONFIG_SCHED_SMT | 4810 | #ifdef CONFIG_SCHED_SMT |
4811 | sd = &per_cpu(cpu_domains, i); | 4811 | sd = &per_cpu(cpu_domains, i); |
4812 | power = SCHED_LOAD_SCALE; | 4812 | power = SCHED_LOAD_SCALE; |
4813 | sd->groups->cpu_power = power; | 4813 | sd->groups->cpu_power = power; |
4814 | #endif | 4814 | #endif |
4815 | 4815 | ||
4816 | sd = &per_cpu(phys_domains, i); | 4816 | sd = &per_cpu(phys_domains, i); |
4817 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | 4817 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * |
4818 | (cpus_weight(sd->groups->cpumask)-1) / 10; | 4818 | (cpus_weight(sd->groups->cpumask)-1) / 10; |
4819 | sd->groups->cpu_power = power; | 4819 | sd->groups->cpu_power = power; |
4820 | 4820 | ||
4821 | #ifdef CONFIG_NUMA | 4821 | #ifdef CONFIG_NUMA |
4822 | if (i == first_cpu(sd->groups->cpumask)) { | 4822 | if (i == first_cpu(sd->groups->cpumask)) { |
4823 | /* Only add "power" once for each physical package. */ | 4823 | /* Only add "power" once for each physical package. */ |
4824 | sd = &per_cpu(node_domains, i); | 4824 | sd = &per_cpu(node_domains, i); |
4825 | sd->groups->cpu_power += power; | 4825 | sd->groups->cpu_power += power; |
4826 | } | 4826 | } |
4827 | #endif | 4827 | #endif |
4828 | } | 4828 | } |
4829 | 4829 | ||
4830 | /* Attach the domains */ | 4830 | /* Attach the domains */ |
4831 | for_each_online_cpu(i) { | 4831 | for_each_online_cpu(i) { |
4832 | struct sched_domain *sd; | 4832 | struct sched_domain *sd; |
4833 | #ifdef CONFIG_SCHED_SMT | 4833 | #ifdef CONFIG_SCHED_SMT |
4834 | sd = &per_cpu(cpu_domains, i); | 4834 | sd = &per_cpu(cpu_domains, i); |
4835 | #else | 4835 | #else |
4836 | sd = &per_cpu(phys_domains, i); | 4836 | sd = &per_cpu(phys_domains, i); |
4837 | #endif | 4837 | #endif |
4838 | cpu_attach_domain(sd, i); | 4838 | cpu_attach_domain(sd, i); |
4839 | } | 4839 | } |
4840 | } | 4840 | } |
4841 | 4841 | ||
4842 | #ifdef CONFIG_HOTPLUG_CPU | 4842 | #ifdef CONFIG_HOTPLUG_CPU |
4843 | static void __devinit arch_destroy_sched_domains(void) | 4843 | static void __devinit arch_destroy_sched_domains(void) |
4844 | { | 4844 | { |
4845 | /* Do nothing: everything is statically allocated. */ | 4845 | /* Do nothing: everything is statically allocated. */ |
4846 | } | 4846 | } |
4847 | #endif | 4847 | #endif |
4848 | 4848 | ||
4849 | #endif /* ARCH_HAS_SCHED_DOMAIN */ | 4849 | #endif /* ARCH_HAS_SCHED_DOMAIN */ |
4850 | 4850 | ||
4851 | /* | 4851 | /* |
4852 | * Initial dummy domain for early boot and for hotplug cpu. Being static, | 4852 | * Initial dummy domain for early boot and for hotplug cpu. Being static, |
4853 | * it is initialized to zero, so all balancing flags are cleared which is | 4853 | * it is initialized to zero, so all balancing flags are cleared which is |
4854 | * what we want. | 4854 | * what we want. |
4855 | */ | 4855 | */ |
4856 | static struct sched_domain sched_domain_dummy; | 4856 | static struct sched_domain sched_domain_dummy; |
4857 | 4857 | ||
4858 | #ifdef CONFIG_HOTPLUG_CPU | 4858 | #ifdef CONFIG_HOTPLUG_CPU |
4859 | /* | 4859 | /* |
4860 | * Force a reinitialization of the sched domains hierarchy. The domains | 4860 | * Force a reinitialization of the sched domains hierarchy. The domains |
4861 | * and groups cannot be updated in place without racing with the balancing | 4861 | * and groups cannot be updated in place without racing with the balancing |
4862 | * code, so we temporarily attach all running cpus to a "dummy" domain | 4862 | * code, so we temporarily attach all running cpus to a "dummy" domain |
4863 | * which will prevent rebalancing while the sched domains are recalculated. | 4863 | * which will prevent rebalancing while the sched domains are recalculated. |
4864 | */ | 4864 | */ |
4865 | static int update_sched_domains(struct notifier_block *nfb, | 4865 | static int update_sched_domains(struct notifier_block *nfb, |
4866 | unsigned long action, void *hcpu) | 4866 | unsigned long action, void *hcpu) |
4867 | { | 4867 | { |
4868 | int i; | 4868 | int i; |
4869 | 4869 | ||
4870 | switch (action) { | 4870 | switch (action) { |
4871 | case CPU_UP_PREPARE: | 4871 | case CPU_UP_PREPARE: |
4872 | case CPU_DOWN_PREPARE: | 4872 | case CPU_DOWN_PREPARE: |
4873 | for_each_online_cpu(i) | 4873 | for_each_online_cpu(i) |
4874 | cpu_attach_domain(&sched_domain_dummy, i); | 4874 | cpu_attach_domain(&sched_domain_dummy, i); |
4875 | arch_destroy_sched_domains(); | 4875 | arch_destroy_sched_domains(); |
4876 | return NOTIFY_OK; | 4876 | return NOTIFY_OK; |
4877 | 4877 | ||
4878 | case CPU_UP_CANCELED: | 4878 | case CPU_UP_CANCELED: |
4879 | case CPU_DOWN_FAILED: | 4879 | case CPU_DOWN_FAILED: |
4880 | case CPU_ONLINE: | 4880 | case CPU_ONLINE: |
4881 | case CPU_DEAD: | 4881 | case CPU_DEAD: |
4882 | /* | 4882 | /* |
4883 | * Fall through and re-initialise the domains. | 4883 | * Fall through and re-initialise the domains. |
4884 | */ | 4884 | */ |
4885 | break; | 4885 | break; |
4886 | default: | 4886 | default: |
4887 | return NOTIFY_DONE; | 4887 | return NOTIFY_DONE; |
4888 | } | 4888 | } |
4889 | 4889 | ||
4890 | /* The hotplug lock is already held by cpu_up/cpu_down */ | 4890 | /* The hotplug lock is already held by cpu_up/cpu_down */ |
4891 | arch_init_sched_domains(); | 4891 | arch_init_sched_domains(); |
4892 | 4892 | ||
4893 | return NOTIFY_OK; | 4893 | return NOTIFY_OK; |
4894 | } | 4894 | } |
4895 | #endif | 4895 | #endif |
4896 | 4896 | ||
4897 | void __init sched_init_smp(void) | 4897 | void __init sched_init_smp(void) |
4898 | { | 4898 | { |
4899 | lock_cpu_hotplug(); | 4899 | lock_cpu_hotplug(); |
4900 | arch_init_sched_domains(); | 4900 | arch_init_sched_domains(); |
4901 | unlock_cpu_hotplug(); | 4901 | unlock_cpu_hotplug(); |
4902 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 4902 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
4903 | hotcpu_notifier(update_sched_domains, 0); | 4903 | hotcpu_notifier(update_sched_domains, 0); |
4904 | } | 4904 | } |
4905 | #else | 4905 | #else |
4906 | void __init sched_init_smp(void) | 4906 | void __init sched_init_smp(void) |
4907 | { | 4907 | { |
4908 | } | 4908 | } |
4909 | #endif /* CONFIG_SMP */ | 4909 | #endif /* CONFIG_SMP */ |
4910 | 4910 | ||
4911 | int in_sched_functions(unsigned long addr) | 4911 | int in_sched_functions(unsigned long addr) |
4912 | { | 4912 | { |
4913 | /* Linker adds these: start and end of __sched functions */ | 4913 | /* Linker adds these: start and end of __sched functions */ |
4914 | extern char __sched_text_start[], __sched_text_end[]; | 4914 | extern char __sched_text_start[], __sched_text_end[]; |
4915 | return in_lock_functions(addr) || | 4915 | return in_lock_functions(addr) || |
4916 | (addr >= (unsigned long)__sched_text_start | 4916 | (addr >= (unsigned long)__sched_text_start |
4917 | && addr < (unsigned long)__sched_text_end); | 4917 | && addr < (unsigned long)__sched_text_end); |
4918 | } | 4918 | } |
4919 | 4919 | ||
4920 | void __init sched_init(void) | 4920 | void __init sched_init(void) |
4921 | { | 4921 | { |
4922 | runqueue_t *rq; | 4922 | runqueue_t *rq; |
4923 | int i, j, k; | 4923 | int i, j, k; |
4924 | 4924 | ||
4925 | for (i = 0; i < NR_CPUS; i++) { | 4925 | for (i = 0; i < NR_CPUS; i++) { |
4926 | prio_array_t *array; | 4926 | prio_array_t *array; |
4927 | 4927 | ||
4928 | rq = cpu_rq(i); | 4928 | rq = cpu_rq(i); |
4929 | spin_lock_init(&rq->lock); | 4929 | spin_lock_init(&rq->lock); |
4930 | rq->active = rq->arrays; | 4930 | rq->active = rq->arrays; |
4931 | rq->expired = rq->arrays + 1; | 4931 | rq->expired = rq->arrays + 1; |
4932 | rq->best_expired_prio = MAX_PRIO; | 4932 | rq->best_expired_prio = MAX_PRIO; |
4933 | 4933 | ||
4934 | #ifdef CONFIG_SMP | 4934 | #ifdef CONFIG_SMP |
4935 | rq->sd = &sched_domain_dummy; | 4935 | rq->sd = &sched_domain_dummy; |
4936 | rq->cpu_load = 0; | 4936 | rq->cpu_load = 0; |
4937 | rq->active_balance = 0; | 4937 | rq->active_balance = 0; |
4938 | rq->push_cpu = 0; | 4938 | rq->push_cpu = 0; |
4939 | rq->migration_thread = NULL; | 4939 | rq->migration_thread = NULL; |
4940 | INIT_LIST_HEAD(&rq->migration_queue); | 4940 | INIT_LIST_HEAD(&rq->migration_queue); |
4941 | #endif | 4941 | #endif |
4942 | atomic_set(&rq->nr_iowait, 0); | 4942 | atomic_set(&rq->nr_iowait, 0); |
4943 | 4943 | ||
4944 | for (j = 0; j < 2; j++) { | 4944 | for (j = 0; j < 2; j++) { |
4945 | array = rq->arrays + j; | 4945 | array = rq->arrays + j; |
4946 | for (k = 0; k < MAX_PRIO; k++) { | 4946 | for (k = 0; k < MAX_PRIO; k++) { |
4947 | INIT_LIST_HEAD(array->queue + k); | 4947 | INIT_LIST_HEAD(array->queue + k); |
4948 | __clear_bit(k, array->bitmap); | 4948 | __clear_bit(k, array->bitmap); |
4949 | } | 4949 | } |
4950 | // delimiter for bitsearch | 4950 | // delimiter for bitsearch |
4951 | __set_bit(MAX_PRIO, array->bitmap); | 4951 | __set_bit(MAX_PRIO, array->bitmap); |
4952 | } | 4952 | } |
4953 | } | 4953 | } |
4954 | 4954 | ||
4955 | /* | 4955 | /* |
4956 | * The boot idle thread does lazy MMU switching as well: | 4956 | * The boot idle thread does lazy MMU switching as well: |
4957 | */ | 4957 | */ |
4958 | atomic_inc(&init_mm.mm_count); | 4958 | atomic_inc(&init_mm.mm_count); |
4959 | enter_lazy_tlb(&init_mm, current); | 4959 | enter_lazy_tlb(&init_mm, current); |
4960 | 4960 | ||
4961 | /* | 4961 | /* |
4962 | * Make us the idle thread. Technically, schedule() should not be | 4962 | * Make us the idle thread. Technically, schedule() should not be |
4963 | * called from this thread, however somewhere below it might be, | 4963 | * called from this thread, however somewhere below it might be, |
4964 | * but because we are the idle thread, we just pick up running again | 4964 | * but because we are the idle thread, we just pick up running again |
4965 | * when this runqueue becomes "idle". | 4965 | * when this runqueue becomes "idle". |
4966 | */ | 4966 | */ |
4967 | init_idle(current, smp_processor_id()); | 4967 | init_idle(current, smp_processor_id()); |
4968 | } | 4968 | } |
4969 | 4969 | ||
4970 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 4970 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
4971 | void __might_sleep(char *file, int line) | 4971 | void __might_sleep(char *file, int line) |
4972 | { | 4972 | { |
4973 | #if defined(in_atomic) | 4973 | #if defined(in_atomic) |
4974 | static unsigned long prev_jiffy; /* ratelimiting */ | 4974 | static unsigned long prev_jiffy; /* ratelimiting */ |
4975 | 4975 | ||
4976 | if ((in_atomic() || irqs_disabled()) && | 4976 | if ((in_atomic() || irqs_disabled()) && |
4977 | system_state == SYSTEM_RUNNING && !oops_in_progress) { | 4977 | system_state == SYSTEM_RUNNING && !oops_in_progress) { |
4978 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) | 4978 | if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) |
4979 | return; | 4979 | return; |
4980 | prev_jiffy = jiffies; | 4980 | prev_jiffy = jiffies; |
4981 | printk(KERN_ERR "Debug: sleeping function called from invalid" | 4981 | printk(KERN_ERR "Debug: sleeping function called from invalid" |
4982 | " context at %s:%d\n", file, line); | 4982 | " context at %s:%d\n", file, line); |
4983 | printk("in_atomic():%d, irqs_disabled():%d\n", | 4983 | printk("in_atomic():%d, irqs_disabled():%d\n", |
4984 | in_atomic(), irqs_disabled()); | 4984 | in_atomic(), irqs_disabled()); |
4985 | dump_stack(); | 4985 | dump_stack(); |
4986 | } | 4986 | } |
4987 | #endif | 4987 | #endif |
4988 | } | 4988 | } |
4989 | EXPORT_SYMBOL(__might_sleep); | 4989 | EXPORT_SYMBOL(__might_sleep); |
4990 | #endif | 4990 | #endif |
4991 | 4991 | ||
4992 | #ifdef CONFIG_MAGIC_SYSRQ | 4992 | #ifdef CONFIG_MAGIC_SYSRQ |
4993 | void normalize_rt_tasks(void) | 4993 | void normalize_rt_tasks(void) |
4994 | { | 4994 | { |
4995 | struct task_struct *p; | 4995 | struct task_struct *p; |
4996 | prio_array_t *array; | 4996 | prio_array_t *array; |
4997 | unsigned long flags; | 4997 | unsigned long flags; |
4998 | runqueue_t *rq; | 4998 | runqueue_t *rq; |
4999 | 4999 | ||
5000 | read_lock_irq(&tasklist_lock); | 5000 | read_lock_irq(&tasklist_lock); |
5001 | for_each_process (p) { | 5001 | for_each_process (p) { |
5002 | if (!rt_task(p)) | 5002 | if (!rt_task(p)) |
5003 | continue; | 5003 | continue; |
5004 | 5004 | ||
5005 | rq = task_rq_lock(p, &flags); | 5005 | rq = task_rq_lock(p, &flags); |
5006 | 5006 | ||
5007 | array = p->array; | 5007 | array = p->array; |
5008 | if (array) | 5008 | if (array) |
5009 | deactivate_task(p, task_rq(p)); | 5009 | deactivate_task(p, task_rq(p)); |
5010 | __setscheduler(p, SCHED_NORMAL, 0); | 5010 | __setscheduler(p, SCHED_NORMAL, 0); |
5011 | if (array) { | 5011 | if (array) { |
5012 | __activate_task(p, task_rq(p)); | 5012 | __activate_task(p, task_rq(p)); |
5013 | resched_task(rq->curr); | 5013 | resched_task(rq->curr); |
5014 | } | 5014 | } |
5015 | 5015 | ||
5016 | task_rq_unlock(rq, &flags); | 5016 | task_rq_unlock(rq, &flags); |
5017 | } | 5017 | } |
5018 | read_unlock_irq(&tasklist_lock); | 5018 | read_unlock_irq(&tasklist_lock); |
5019 | } | 5019 | } |
5020 | 5020 | ||
5021 | #endif /* CONFIG_MAGIC_SYSRQ */ | 5021 | #endif /* CONFIG_MAGIC_SYSRQ */ |
5022 | 5022 |
kernel/stop_machine.c
1 | #include <linux/stop_machine.h> | 1 | #include <linux/stop_machine.h> |
2 | #include <linux/kthread.h> | 2 | #include <linux/kthread.h> |
3 | #include <linux/sched.h> | 3 | #include <linux/sched.h> |
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/syscalls.h> | 6 | #include <linux/syscalls.h> |
7 | #include <asm/atomic.h> | 7 | #include <asm/atomic.h> |
8 | #include <asm/semaphore.h> | 8 | #include <asm/semaphore.h> |
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | 10 | ||
11 | /* Since we effect priority and affinity (both of which are visible | 11 | /* Since we effect priority and affinity (both of which are visible |
12 | * to, and settable by outside processes) we do indirection via a | 12 | * to, and settable by outside processes) we do indirection via a |
13 | * kthread. */ | 13 | * kthread. */ |
14 | 14 | ||
15 | /* Thread to stop each CPU in user context. */ | 15 | /* Thread to stop each CPU in user context. */ |
16 | enum stopmachine_state { | 16 | enum stopmachine_state { |
17 | STOPMACHINE_WAIT, | 17 | STOPMACHINE_WAIT, |
18 | STOPMACHINE_PREPARE, | 18 | STOPMACHINE_PREPARE, |
19 | STOPMACHINE_DISABLE_IRQ, | 19 | STOPMACHINE_DISABLE_IRQ, |
20 | STOPMACHINE_EXIT, | 20 | STOPMACHINE_EXIT, |
21 | }; | 21 | }; |
22 | 22 | ||
23 | static enum stopmachine_state stopmachine_state; | 23 | static enum stopmachine_state stopmachine_state; |
24 | static unsigned int stopmachine_num_threads; | 24 | static unsigned int stopmachine_num_threads; |
25 | static atomic_t stopmachine_thread_ack; | 25 | static atomic_t stopmachine_thread_ack; |
26 | static DECLARE_MUTEX(stopmachine_mutex); | 26 | static DECLARE_MUTEX(stopmachine_mutex); |
27 | 27 | ||
28 | static int stopmachine(void *cpu) | 28 | static int stopmachine(void *cpu) |
29 | { | 29 | { |
30 | int irqs_disabled = 0; | 30 | int irqs_disabled = 0; |
31 | int prepared = 0; | 31 | int prepared = 0; |
32 | 32 | ||
33 | set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); | 33 | set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); |
34 | 34 | ||
35 | /* Ack: we are alive */ | 35 | /* Ack: we are alive */ |
36 | smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ | 36 | smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ |
37 | atomic_inc(&stopmachine_thread_ack); | 37 | atomic_inc(&stopmachine_thread_ack); |
38 | 38 | ||
39 | /* Simple state machine */ | 39 | /* Simple state machine */ |
40 | while (stopmachine_state != STOPMACHINE_EXIT) { | 40 | while (stopmachine_state != STOPMACHINE_EXIT) { |
41 | if (stopmachine_state == STOPMACHINE_DISABLE_IRQ | 41 | if (stopmachine_state == STOPMACHINE_DISABLE_IRQ |
42 | && !irqs_disabled) { | 42 | && !irqs_disabled) { |
43 | local_irq_disable(); | 43 | local_irq_disable(); |
44 | irqs_disabled = 1; | 44 | irqs_disabled = 1; |
45 | /* Ack: irqs disabled. */ | 45 | /* Ack: irqs disabled. */ |
46 | smp_mb(); /* Must read state first. */ | 46 | smp_mb(); /* Must read state first. */ |
47 | atomic_inc(&stopmachine_thread_ack); | 47 | atomic_inc(&stopmachine_thread_ack); |
48 | } else if (stopmachine_state == STOPMACHINE_PREPARE | 48 | } else if (stopmachine_state == STOPMACHINE_PREPARE |
49 | && !prepared) { | 49 | && !prepared) { |
50 | /* Everyone is in place, hold CPU. */ | 50 | /* Everyone is in place, hold CPU. */ |
51 | preempt_disable(); | 51 | preempt_disable(); |
52 | prepared = 1; | 52 | prepared = 1; |
53 | smp_mb(); /* Must read state first. */ | 53 | smp_mb(); /* Must read state first. */ |
54 | atomic_inc(&stopmachine_thread_ack); | 54 | atomic_inc(&stopmachine_thread_ack); |
55 | } | 55 | } |
56 | /* Yield in first stage: migration threads need to | 56 | /* Yield in first stage: migration threads need to |
57 | * help our sisters onto their CPUs. */ | 57 | * help our sisters onto their CPUs. */ |
58 | if (!prepared && !irqs_disabled) | 58 | if (!prepared && !irqs_disabled) |
59 | yield(); | 59 | yield(); |
60 | else | 60 | else |
61 | cpu_relax(); | 61 | cpu_relax(); |
62 | } | 62 | } |
63 | 63 | ||
64 | /* Ack: we are exiting. */ | 64 | /* Ack: we are exiting. */ |
65 | smp_mb(); /* Must read state first. */ | 65 | smp_mb(); /* Must read state first. */ |
66 | atomic_inc(&stopmachine_thread_ack); | 66 | atomic_inc(&stopmachine_thread_ack); |
67 | 67 | ||
68 | if (irqs_disabled) | 68 | if (irqs_disabled) |
69 | local_irq_enable(); | 69 | local_irq_enable(); |
70 | if (prepared) | 70 | if (prepared) |
71 | preempt_enable(); | 71 | preempt_enable(); |
72 | 72 | ||
73 | return 0; | 73 | return 0; |
74 | } | 74 | } |
75 | 75 | ||
76 | /* Change the thread state */ | 76 | /* Change the thread state */ |
77 | static void stopmachine_set_state(enum stopmachine_state state) | 77 | static void stopmachine_set_state(enum stopmachine_state state) |
78 | { | 78 | { |
79 | atomic_set(&stopmachine_thread_ack, 0); | 79 | atomic_set(&stopmachine_thread_ack, 0); |
80 | smp_wmb(); | 80 | smp_wmb(); |
81 | stopmachine_state = state; | 81 | stopmachine_state = state; |
82 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) | 82 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) |
83 | cpu_relax(); | 83 | cpu_relax(); |
84 | } | 84 | } |
85 | 85 | ||
86 | static int stop_machine(void) | 86 | static int stop_machine(void) |
87 | { | 87 | { |
88 | int i, ret = 0; | 88 | int i, ret = 0; |
89 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 89 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
90 | mm_segment_t old_fs = get_fs(); | 90 | mm_segment_t old_fs = get_fs(); |
91 | 91 | ||
92 | /* One high-prio thread per cpu. We'll do this one. */ | 92 | /* One high-prio thread per cpu. We'll do this one. */ |
93 | set_fs(KERNEL_DS); | 93 | set_fs(KERNEL_DS); |
94 | sys_sched_setscheduler(current->pid, SCHED_FIFO, | 94 | sys_sched_setscheduler(current->pid, SCHED_FIFO, |
95 | (struct sched_param __user *)¶m); | 95 | (struct sched_param __user *)¶m); |
96 | set_fs(old_fs); | 96 | set_fs(old_fs); |
97 | 97 | ||
98 | atomic_set(&stopmachine_thread_ack, 0); | 98 | atomic_set(&stopmachine_thread_ack, 0); |
99 | stopmachine_num_threads = 0; | 99 | stopmachine_num_threads = 0; |
100 | stopmachine_state = STOPMACHINE_WAIT; | 100 | stopmachine_state = STOPMACHINE_WAIT; |
101 | 101 | ||
102 | for_each_online_cpu(i) { | 102 | for_each_online_cpu(i) { |
103 | if (i == _smp_processor_id()) | 103 | if (i == raw_smp_processor_id()) |
104 | continue; | 104 | continue; |
105 | ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); | 105 | ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); |
106 | if (ret < 0) | 106 | if (ret < 0) |
107 | break; | 107 | break; |
108 | stopmachine_num_threads++; | 108 | stopmachine_num_threads++; |
109 | } | 109 | } |
110 | 110 | ||
111 | /* Wait for them all to come to life. */ | 111 | /* Wait for them all to come to life. */ |
112 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) | 112 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) |
113 | yield(); | 113 | yield(); |
114 | 114 | ||
115 | /* If some failed, kill them all. */ | 115 | /* If some failed, kill them all. */ |
116 | if (ret < 0) { | 116 | if (ret < 0) { |
117 | stopmachine_set_state(STOPMACHINE_EXIT); | 117 | stopmachine_set_state(STOPMACHINE_EXIT); |
118 | up(&stopmachine_mutex); | 118 | up(&stopmachine_mutex); |
119 | return ret; | 119 | return ret; |
120 | } | 120 | } |
121 | 121 | ||
122 | /* Don't schedule us away at this point, please. */ | 122 | /* Don't schedule us away at this point, please. */ |
123 | local_irq_disable(); | 123 | local_irq_disable(); |
124 | 124 | ||
125 | /* Now they are all started, make them hold the CPUs, ready. */ | 125 | /* Now they are all started, make them hold the CPUs, ready. */ |
126 | stopmachine_set_state(STOPMACHINE_PREPARE); | 126 | stopmachine_set_state(STOPMACHINE_PREPARE); |
127 | 127 | ||
128 | /* Make them disable irqs. */ | 128 | /* Make them disable irqs. */ |
129 | stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); | 129 | stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); |
130 | 130 | ||
131 | return 0; | 131 | return 0; |
132 | } | 132 | } |
133 | 133 | ||
134 | static void restart_machine(void) | 134 | static void restart_machine(void) |
135 | { | 135 | { |
136 | stopmachine_set_state(STOPMACHINE_EXIT); | 136 | stopmachine_set_state(STOPMACHINE_EXIT); |
137 | local_irq_enable(); | 137 | local_irq_enable(); |
138 | } | 138 | } |
139 | 139 | ||
140 | struct stop_machine_data | 140 | struct stop_machine_data |
141 | { | 141 | { |
142 | int (*fn)(void *); | 142 | int (*fn)(void *); |
143 | void *data; | 143 | void *data; |
144 | struct completion done; | 144 | struct completion done; |
145 | }; | 145 | }; |
146 | 146 | ||
147 | static int do_stop(void *_smdata) | 147 | static int do_stop(void *_smdata) |
148 | { | 148 | { |
149 | struct stop_machine_data *smdata = _smdata; | 149 | struct stop_machine_data *smdata = _smdata; |
150 | int ret; | 150 | int ret; |
151 | 151 | ||
152 | ret = stop_machine(); | 152 | ret = stop_machine(); |
153 | if (ret == 0) { | 153 | if (ret == 0) { |
154 | ret = smdata->fn(smdata->data); | 154 | ret = smdata->fn(smdata->data); |
155 | restart_machine(); | 155 | restart_machine(); |
156 | } | 156 | } |
157 | 157 | ||
158 | /* We're done: you can kthread_stop us now */ | 158 | /* We're done: you can kthread_stop us now */ |
159 | complete(&smdata->done); | 159 | complete(&smdata->done); |
160 | 160 | ||
161 | /* Wait for kthread_stop */ | 161 | /* Wait for kthread_stop */ |
162 | set_current_state(TASK_INTERRUPTIBLE); | 162 | set_current_state(TASK_INTERRUPTIBLE); |
163 | while (!kthread_should_stop()) { | 163 | while (!kthread_should_stop()) { |
164 | schedule(); | 164 | schedule(); |
165 | set_current_state(TASK_INTERRUPTIBLE); | 165 | set_current_state(TASK_INTERRUPTIBLE); |
166 | } | 166 | } |
167 | __set_current_state(TASK_RUNNING); | 167 | __set_current_state(TASK_RUNNING); |
168 | return ret; | 168 | return ret; |
169 | } | 169 | } |
170 | 170 | ||
171 | struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, | 171 | struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, |
172 | unsigned int cpu) | 172 | unsigned int cpu) |
173 | { | 173 | { |
174 | struct stop_machine_data smdata; | 174 | struct stop_machine_data smdata; |
175 | struct task_struct *p; | 175 | struct task_struct *p; |
176 | 176 | ||
177 | smdata.fn = fn; | 177 | smdata.fn = fn; |
178 | smdata.data = data; | 178 | smdata.data = data; |
179 | init_completion(&smdata.done); | 179 | init_completion(&smdata.done); |
180 | 180 | ||
181 | down(&stopmachine_mutex); | 181 | down(&stopmachine_mutex); |
182 | 182 | ||
183 | /* If they don't care which CPU fn runs on, bind to any online one. */ | 183 | /* If they don't care which CPU fn runs on, bind to any online one. */ |
184 | if (cpu == NR_CPUS) | 184 | if (cpu == NR_CPUS) |
185 | cpu = _smp_processor_id(); | 185 | cpu = raw_smp_processor_id(); |
186 | 186 | ||
187 | p = kthread_create(do_stop, &smdata, "kstopmachine"); | 187 | p = kthread_create(do_stop, &smdata, "kstopmachine"); |
188 | if (!IS_ERR(p)) { | 188 | if (!IS_ERR(p)) { |
189 | kthread_bind(p, cpu); | 189 | kthread_bind(p, cpu); |
190 | wake_up_process(p); | 190 | wake_up_process(p); |
191 | wait_for_completion(&smdata.done); | 191 | wait_for_completion(&smdata.done); |
192 | } | 192 | } |
193 | up(&stopmachine_mutex); | 193 | up(&stopmachine_mutex); |
194 | return p; | 194 | return p; |
195 | } | 195 | } |
196 | 196 | ||
197 | int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) | 197 | int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) |
198 | { | 198 | { |
199 | struct task_struct *p; | 199 | struct task_struct *p; |
200 | int ret; | 200 | int ret; |
201 | 201 | ||
202 | /* No CPUs can come up or down during this. */ | 202 | /* No CPUs can come up or down during this. */ |
203 | lock_cpu_hotplug(); | 203 | lock_cpu_hotplug(); |
204 | p = __stop_machine_run(fn, data, cpu); | 204 | p = __stop_machine_run(fn, data, cpu); |
205 | if (!IS_ERR(p)) | 205 | if (!IS_ERR(p)) |
206 | ret = kthread_stop(p); | 206 | ret = kthread_stop(p); |
207 | else | 207 | else |
208 | ret = PTR_ERR(p); | 208 | ret = PTR_ERR(p); |
209 | unlock_cpu_hotplug(); | 209 | unlock_cpu_hotplug(); |
210 | 210 | ||
211 | return ret; | 211 | return ret; |
212 | } | 212 | } |
213 | 213 |
lib/Makefile
1 | # | 1 | # |
2 | # Makefile for some libs needed in the kernel. | 2 | # Makefile for some libs needed in the kernel. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ | 5 | lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ |
6 | bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ | 6 | bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ |
7 | idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ | 7 | idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ |
8 | sha1.o halfmd4.o | 8 | sha1.o halfmd4.o |
9 | 9 | ||
10 | lib-y += kobject.o kref.o kobject_uevent.o klist.o | 10 | lib-y += kobject.o kref.o kobject_uevent.o klist.o |
11 | 11 | ||
12 | obj-y += sort.o parser.o | 12 | obj-y += sort.o parser.o |
13 | 13 | ||
14 | ifeq ($(CONFIG_DEBUG_KOBJECT),y) | 14 | ifeq ($(CONFIG_DEBUG_KOBJECT),y) |
15 | CFLAGS_kobject.o += -DDEBUG | 15 | CFLAGS_kobject.o += -DDEBUG |
16 | CFLAGS_kobject_uevent.o += -DDEBUG | 16 | CFLAGS_kobject_uevent.o += -DDEBUG |
17 | endif | 17 | endif |
18 | 18 | ||
19 | lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 19 | lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
20 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | 20 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
21 | lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o | 21 | lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o |
22 | obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o | 22 | obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o |
23 | obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o | ||
23 | 24 | ||
24 | ifneq ($(CONFIG_HAVE_DEC_LOCK),y) | 25 | ifneq ($(CONFIG_HAVE_DEC_LOCK),y) |
25 | lib-y += dec_and_lock.o | 26 | lib-y += dec_and_lock.o |
26 | endif | 27 | endif |
27 | 28 | ||
28 | obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o | 29 | obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o |
29 | obj-$(CONFIG_CRC32) += crc32.o | 30 | obj-$(CONFIG_CRC32) += crc32.o |
30 | obj-$(CONFIG_LIBCRC32C) += libcrc32c.o | 31 | obj-$(CONFIG_LIBCRC32C) += libcrc32c.o |
31 | obj-$(CONFIG_GENERIC_IOMAP) += iomap.o | 32 | obj-$(CONFIG_GENERIC_IOMAP) += iomap.o |
32 | 33 | ||
33 | obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ | 34 | obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ |
34 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | 35 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ |
35 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ | 36 | obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ |
36 | 37 | ||
37 | hostprogs-y := gen_crc32table | 38 | hostprogs-y := gen_crc32table |
38 | clean-files := crc32table.h | 39 | clean-files := crc32table.h |
39 | 40 | ||
40 | $(obj)/crc32.o: $(obj)/crc32table.h | 41 | $(obj)/crc32.o: $(obj)/crc32table.h |
41 | 42 | ||
42 | quiet_cmd_crc32 = GEN $@ | 43 | quiet_cmd_crc32 = GEN $@ |
43 | cmd_crc32 = $< > $@ | 44 | cmd_crc32 = $< > $@ |
44 | 45 | ||
45 | $(obj)/crc32table.h: $(obj)/gen_crc32table | 46 | $(obj)/crc32table.h: $(obj)/gen_crc32table |
46 | $(call cmd,crc32) | 47 | $(call cmd,crc32) |
47 | 48 |
lib/kernel_lock.c
1 | /* | 1 | /* |
2 | * lib/kernel_lock.c | 2 | * lib/kernel_lock.c |
3 | * | 3 | * |
4 | * This is the traditional BKL - big kernel lock. Largely | 4 | * This is the traditional BKL - big kernel lock. Largely |
5 | * relegated to obsolescense, but used by various less | 5 | * relegated to obsolescense, but used by various less |
6 | * important (or lazy) subsystems. | 6 | * important (or lazy) subsystems. |
7 | */ | 7 | */ |
8 | #include <linux/smp_lock.h> | 8 | #include <linux/smp_lock.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/kallsyms.h> | 10 | #include <linux/kallsyms.h> |
11 | 11 | ||
12 | #if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \ | ||
13 | defined(CONFIG_DEBUG_PREEMPT) | ||
14 | |||
15 | /* | ||
16 | * Debugging check. | ||
17 | */ | ||
18 | unsigned int smp_processor_id(void) | ||
19 | { | ||
20 | unsigned long preempt_count = preempt_count(); | ||
21 | int this_cpu = __smp_processor_id(); | ||
22 | cpumask_t this_mask; | ||
23 | |||
24 | if (likely(preempt_count)) | ||
25 | goto out; | ||
26 | |||
27 | if (irqs_disabled()) | ||
28 | goto out; | ||
29 | |||
30 | /* | ||
31 | * Kernel threads bound to a single CPU can safely use | ||
32 | * smp_processor_id(): | ||
33 | */ | ||
34 | this_mask = cpumask_of_cpu(this_cpu); | ||
35 | |||
36 | if (cpus_equal(current->cpus_allowed, this_mask)) | ||
37 | goto out; | ||
38 | |||
39 | /* | ||
40 | * It is valid to assume CPU-locality during early bootup: | ||
41 | */ | ||
42 | if (system_state != SYSTEM_RUNNING) | ||
43 | goto out; | ||
44 | |||
45 | /* | ||
46 | * Avoid recursion: | ||
47 | */ | ||
48 | preempt_disable(); | ||
49 | |||
50 | if (!printk_ratelimit()) | ||
51 | goto out_enable; | ||
52 | |||
53 | printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid); | ||
54 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | ||
55 | dump_stack(); | ||
56 | |||
57 | out_enable: | ||
58 | preempt_enable_no_resched(); | ||
59 | out: | ||
60 | return this_cpu; | ||
61 | } | ||
62 | |||
63 | EXPORT_SYMBOL(smp_processor_id); | ||
64 | |||
65 | #endif /* PREEMPT && __smp_processor_id && DEBUG_PREEMPT */ | ||
66 | |||
67 | #ifdef CONFIG_PREEMPT_BKL | 12 | #ifdef CONFIG_PREEMPT_BKL |
68 | /* | 13 | /* |
69 | * The 'big kernel semaphore' | 14 | * The 'big kernel semaphore' |
70 | * | 15 | * |
71 | * This mutex is taken and released recursively by lock_kernel() | 16 | * This mutex is taken and released recursively by lock_kernel() |
72 | * and unlock_kernel(). It is transparently dropped and reaquired | 17 | * and unlock_kernel(). It is transparently dropped and reaquired |
73 | * over schedule(). It is used to protect legacy code that hasn't | 18 | * over schedule(). It is used to protect legacy code that hasn't |
74 | * been migrated to a proper locking design yet. | 19 | * been migrated to a proper locking design yet. |
75 | * | 20 | * |
76 | * Note: code locked by this semaphore will only be serialized against | 21 | * Note: code locked by this semaphore will only be serialized against |
77 | * other code using the same locking facility. The code guarantees that | 22 | * other code using the same locking facility. The code guarantees that |
78 | * the task remains on the same CPU. | 23 | * the task remains on the same CPU. |
79 | * | 24 | * |
80 | * Don't use in new code. | 25 | * Don't use in new code. |
81 | */ | 26 | */ |
82 | static DECLARE_MUTEX(kernel_sem); | 27 | static DECLARE_MUTEX(kernel_sem); |
83 | 28 | ||
84 | /* | 29 | /* |
85 | * Re-acquire the kernel semaphore. | 30 | * Re-acquire the kernel semaphore. |
86 | * | 31 | * |
87 | * This function is called with preemption off. | 32 | * This function is called with preemption off. |
88 | * | 33 | * |
89 | * We are executing in schedule() so the code must be extremely careful | 34 | * We are executing in schedule() so the code must be extremely careful |
90 | * about recursion, both due to the down() and due to the enabling of | 35 | * about recursion, both due to the down() and due to the enabling of |
91 | * preemption. schedule() will re-check the preemption flag after | 36 | * preemption. schedule() will re-check the preemption flag after |
92 | * reacquiring the semaphore. | 37 | * reacquiring the semaphore. |
93 | */ | 38 | */ |
94 | int __lockfunc __reacquire_kernel_lock(void) | 39 | int __lockfunc __reacquire_kernel_lock(void) |
95 | { | 40 | { |
96 | struct task_struct *task = current; | 41 | struct task_struct *task = current; |
97 | int saved_lock_depth = task->lock_depth; | 42 | int saved_lock_depth = task->lock_depth; |
98 | 43 | ||
99 | BUG_ON(saved_lock_depth < 0); | 44 | BUG_ON(saved_lock_depth < 0); |
100 | 45 | ||
101 | task->lock_depth = -1; | 46 | task->lock_depth = -1; |
102 | preempt_enable_no_resched(); | 47 | preempt_enable_no_resched(); |
103 | 48 | ||
104 | down(&kernel_sem); | 49 | down(&kernel_sem); |
105 | 50 | ||
106 | preempt_disable(); | 51 | preempt_disable(); |
107 | task->lock_depth = saved_lock_depth; | 52 | task->lock_depth = saved_lock_depth; |
108 | 53 | ||
109 | return 0; | 54 | return 0; |
110 | } | 55 | } |
111 | 56 | ||
112 | void __lockfunc __release_kernel_lock(void) | 57 | void __lockfunc __release_kernel_lock(void) |
113 | { | 58 | { |
114 | up(&kernel_sem); | 59 | up(&kernel_sem); |
115 | } | 60 | } |
116 | 61 | ||
117 | /* | 62 | /* |
118 | * Getting the big kernel semaphore. | 63 | * Getting the big kernel semaphore. |
119 | */ | 64 | */ |
120 | void __lockfunc lock_kernel(void) | 65 | void __lockfunc lock_kernel(void) |
121 | { | 66 | { |
122 | struct task_struct *task = current; | 67 | struct task_struct *task = current; |
123 | int depth = task->lock_depth + 1; | 68 | int depth = task->lock_depth + 1; |
124 | 69 | ||
125 | if (likely(!depth)) | 70 | if (likely(!depth)) |
126 | /* | 71 | /* |
127 | * No recursion worries - we set up lock_depth _after_ | 72 | * No recursion worries - we set up lock_depth _after_ |
128 | */ | 73 | */ |
129 | down(&kernel_sem); | 74 | down(&kernel_sem); |
130 | 75 | ||
131 | task->lock_depth = depth; | 76 | task->lock_depth = depth; |
132 | } | 77 | } |
133 | 78 | ||
134 | void __lockfunc unlock_kernel(void) | 79 | void __lockfunc unlock_kernel(void) |
135 | { | 80 | { |
136 | struct task_struct *task = current; | 81 | struct task_struct *task = current; |
137 | 82 | ||
138 | BUG_ON(task->lock_depth < 0); | 83 | BUG_ON(task->lock_depth < 0); |
139 | 84 | ||
140 | if (likely(--task->lock_depth < 0)) | 85 | if (likely(--task->lock_depth < 0)) |
141 | up(&kernel_sem); | 86 | up(&kernel_sem); |
142 | } | 87 | } |
143 | 88 | ||
144 | #else | 89 | #else |
145 | 90 | ||
146 | /* | 91 | /* |
147 | * The 'big kernel lock' | 92 | * The 'big kernel lock' |
148 | * | 93 | * |
149 | * This spinlock is taken and released recursively by lock_kernel() | 94 | * This spinlock is taken and released recursively by lock_kernel() |
150 | * and unlock_kernel(). It is transparently dropped and reaquired | 95 | * and unlock_kernel(). It is transparently dropped and reaquired |
151 | * over schedule(). It is used to protect legacy code that hasn't | 96 | * over schedule(). It is used to protect legacy code that hasn't |
152 | * been migrated to a proper locking design yet. | 97 | * been migrated to a proper locking design yet. |
153 | * | 98 | * |
154 | * Don't use in new code. | 99 | * Don't use in new code. |
155 | */ | 100 | */ |
156 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); | 101 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); |
157 | 102 | ||
158 | 103 | ||
159 | /* | 104 | /* |
160 | * Acquire/release the underlying lock from the scheduler. | 105 | * Acquire/release the underlying lock from the scheduler. |
161 | * | 106 | * |
162 | * This is called with preemption disabled, and should | 107 | * This is called with preemption disabled, and should |
163 | * return an error value if it cannot get the lock and | 108 | * return an error value if it cannot get the lock and |
164 | * TIF_NEED_RESCHED gets set. | 109 | * TIF_NEED_RESCHED gets set. |
165 | * | 110 | * |
166 | * If it successfully gets the lock, it should increment | 111 | * If it successfully gets the lock, it should increment |
167 | * the preemption count like any spinlock does. | 112 | * the preemption count like any spinlock does. |
168 | * | 113 | * |
169 | * (This works on UP too - _raw_spin_trylock will never | 114 | * (This works on UP too - _raw_spin_trylock will never |
170 | * return false in that case) | 115 | * return false in that case) |
171 | */ | 116 | */ |
172 | int __lockfunc __reacquire_kernel_lock(void) | 117 | int __lockfunc __reacquire_kernel_lock(void) |
173 | { | 118 | { |
174 | while (!_raw_spin_trylock(&kernel_flag)) { | 119 | while (!_raw_spin_trylock(&kernel_flag)) { |
175 | if (test_thread_flag(TIF_NEED_RESCHED)) | 120 | if (test_thread_flag(TIF_NEED_RESCHED)) |
176 | return -EAGAIN; | 121 | return -EAGAIN; |
177 | cpu_relax(); | 122 | cpu_relax(); |
178 | } | 123 | } |
179 | preempt_disable(); | 124 | preempt_disable(); |
180 | return 0; | 125 | return 0; |
181 | } | 126 | } |
182 | 127 | ||
183 | void __lockfunc __release_kernel_lock(void) | 128 | void __lockfunc __release_kernel_lock(void) |
184 | { | 129 | { |
185 | _raw_spin_unlock(&kernel_flag); | 130 | _raw_spin_unlock(&kernel_flag); |
186 | preempt_enable_no_resched(); | 131 | preempt_enable_no_resched(); |
187 | } | 132 | } |
188 | 133 | ||
189 | /* | 134 | /* |
190 | * These are the BKL spinlocks - we try to be polite about preemption. | 135 | * These are the BKL spinlocks - we try to be polite about preemption. |
191 | * If SMP is not on (ie UP preemption), this all goes away because the | 136 | * If SMP is not on (ie UP preemption), this all goes away because the |
192 | * _raw_spin_trylock() will always succeed. | 137 | * _raw_spin_trylock() will always succeed. |
193 | */ | 138 | */ |
194 | #ifdef CONFIG_PREEMPT | 139 | #ifdef CONFIG_PREEMPT |
195 | static inline void __lock_kernel(void) | 140 | static inline void __lock_kernel(void) |
196 | { | 141 | { |
197 | preempt_disable(); | 142 | preempt_disable(); |
198 | if (unlikely(!_raw_spin_trylock(&kernel_flag))) { | 143 | if (unlikely(!_raw_spin_trylock(&kernel_flag))) { |
199 | /* | 144 | /* |
200 | * If preemption was disabled even before this | 145 | * If preemption was disabled even before this |
201 | * was called, there's nothing we can be polite | 146 | * was called, there's nothing we can be polite |
202 | * about - just spin. | 147 | * about - just spin. |
203 | */ | 148 | */ |
204 | if (preempt_count() > 1) { | 149 | if (preempt_count() > 1) { |
205 | _raw_spin_lock(&kernel_flag); | 150 | _raw_spin_lock(&kernel_flag); |
206 | return; | 151 | return; |
207 | } | 152 | } |
208 | 153 | ||
209 | /* | 154 | /* |
210 | * Otherwise, let's wait for the kernel lock | 155 | * Otherwise, let's wait for the kernel lock |
211 | * with preemption enabled.. | 156 | * with preemption enabled.. |
212 | */ | 157 | */ |
213 | do { | 158 | do { |
214 | preempt_enable(); | 159 | preempt_enable(); |
215 | while (spin_is_locked(&kernel_flag)) | 160 | while (spin_is_locked(&kernel_flag)) |
216 | cpu_relax(); | 161 | cpu_relax(); |
217 | preempt_disable(); | 162 | preempt_disable(); |
218 | } while (!_raw_spin_trylock(&kernel_flag)); | 163 | } while (!_raw_spin_trylock(&kernel_flag)); |
219 | } | 164 | } |
220 | } | 165 | } |
221 | 166 | ||
222 | #else | 167 | #else |
223 | 168 | ||
224 | /* | 169 | /* |
225 | * Non-preemption case - just get the spinlock | 170 | * Non-preemption case - just get the spinlock |
226 | */ | 171 | */ |
227 | static inline void __lock_kernel(void) | 172 | static inline void __lock_kernel(void) |
228 | { | 173 | { |
229 | _raw_spin_lock(&kernel_flag); | 174 | _raw_spin_lock(&kernel_flag); |
230 | } | 175 | } |
231 | #endif | 176 | #endif |
232 | 177 | ||
233 | static inline void __unlock_kernel(void) | 178 | static inline void __unlock_kernel(void) |
234 | { | 179 | { |
235 | _raw_spin_unlock(&kernel_flag); | 180 | _raw_spin_unlock(&kernel_flag); |
236 | preempt_enable(); | 181 | preempt_enable(); |
237 | } | 182 | } |
238 | 183 | ||
239 | /* | 184 | /* |
240 | * Getting the big kernel lock. | 185 | * Getting the big kernel lock. |
241 | * | 186 | * |
242 | * This cannot happen asynchronously, so we only need to | 187 | * This cannot happen asynchronously, so we only need to |
243 | * worry about other CPU's. | 188 | * worry about other CPU's. |
244 | */ | 189 | */ |
245 | void __lockfunc lock_kernel(void) | 190 | void __lockfunc lock_kernel(void) |
246 | { | 191 | { |
247 | int depth = current->lock_depth+1; | 192 | int depth = current->lock_depth+1; |
248 | if (likely(!depth)) | 193 | if (likely(!depth)) |
249 | __lock_kernel(); | 194 | __lock_kernel(); |
250 | current->lock_depth = depth; | 195 | current->lock_depth = depth; |
251 | } | 196 | } |
252 | 197 | ||
253 | void __lockfunc unlock_kernel(void) | 198 | void __lockfunc unlock_kernel(void) |
254 | { | 199 | { |
255 | BUG_ON(current->lock_depth < 0); | 200 | BUG_ON(current->lock_depth < 0); |
256 | if (likely(--current->lock_depth < 0)) | 201 | if (likely(--current->lock_depth < 0)) |
257 | __unlock_kernel(); | 202 | __unlock_kernel(); |
258 | } | 203 | } |
259 | 204 | ||
260 | #endif | 205 | #endif |
261 | 206 | ||
262 | EXPORT_SYMBOL(lock_kernel); | 207 | EXPORT_SYMBOL(lock_kernel); |
263 | EXPORT_SYMBOL(unlock_kernel); | 208 | EXPORT_SYMBOL(unlock_kernel); |
264 | 209 | ||
265 | 210 |
lib/smp_processor_id.c
File was created | 1 | /* | |
2 | * lib/smp_processor_id.c | ||
3 | * | ||
4 | * DEBUG_PREEMPT variant of smp_processor_id(). | ||
5 | */ | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/kallsyms.h> | ||
8 | |||
9 | unsigned int debug_smp_processor_id(void) | ||
10 | { | ||
11 | unsigned long preempt_count = preempt_count(); | ||
12 | int this_cpu = raw_smp_processor_id(); | ||
13 | cpumask_t this_mask; | ||
14 | |||
15 | if (likely(preempt_count)) | ||
16 | goto out; | ||
17 | |||
18 | if (irqs_disabled()) | ||
19 | goto out; | ||
20 | |||
21 | /* | ||
22 | * Kernel threads bound to a single CPU can safely use | ||
23 | * smp_processor_id(): | ||
24 | */ | ||
25 | this_mask = cpumask_of_cpu(this_cpu); | ||
26 | |||
27 | if (cpus_equal(current->cpus_allowed, this_mask)) | ||
28 | goto out; | ||
29 | |||
30 | /* | ||
31 | * It is valid to assume CPU-locality during early bootup: | ||
32 | */ | ||
33 | if (system_state != SYSTEM_RUNNING) | ||
34 | goto out; | ||
35 | |||
36 | /* | ||
37 | * Avoid recursion: | ||
38 | */ | ||
39 | preempt_disable(); | ||
40 | |||
41 | if (!printk_ratelimit()) | ||
42 | goto out_enable; | ||
43 | |||
44 | printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid); | ||
45 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | ||
46 | dump_stack(); | ||
47 | |||
48 | out_enable: | ||
49 | preempt_enable_no_resched(); | ||
50 | out: | ||
51 | return this_cpu; | ||
52 | } | ||
53 | |||
54 | EXPORT_SYMBOL(debug_smp_processor_id); | ||
55 | |||
56 |