Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar: "Misc fixes: group scheduling corner case fix, two deadline scheduler fixes, effective_load() overflow fix, nested sleep fix, 6144 CPUs system fix" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Fix RCU stall upon -ENOMEM in sched_create_group() sched/deadline: Avoid double-accounting in case of missed deadlines sched/deadline: Fix migration of SCHED_DEADLINE tasks sched: Fix odd values in effective_load() calculations sched, fanotify: Deal with nested sleeps sched: Fix KMALLOC_MAX_SIZE overflow during cpumask allocation

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Misc fixes: group scheduling corner case fix, two deadline scheduler fixes, effective_load() overflow fix, nested sleep fix, 6144 CPUs system fix" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Fix RCU stall upon -ENOMEM in sched_create_group() sched/deadline: Avoid double-accounting in case of missed deadlines sched/deadline: Fix migration of SCHED_DEADLINE tasks sched: Fix odd values in effective_load() calculations sched, fanotify: Deal with nested sleeps sched: Fix KMALLOC_MAX_SIZE overflow during cpumask allocation
Linus Torvalds
2 parents ddb321a8dd 7f1a169b88
Showing 4 changed files Side-by-side Diff
fs/notify/fanotify/fanotify_user.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/fair.c
@@ -259,16 +259,15 @@
 	struct fsnotify_event *kevent;
 	char __user *start;
 	int ret;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
  
 	start = buf;
 	group = file->private_data;
  
 	pr_debug("%s: group=%p\n", __func__, group);
  
+	add_wait_queue(&group->notification_waitq, &wait);
 	while (1) {
-		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
-
 		mutex_lock(&group->notification_mutex);
 		kevent = get_one_event(group, count);
 		mutex_unlock(&group->notification_mutex);
@@ -289,7 +288,8 @@
  
 			if (start != buf)
 				break;
-			schedule();
+
+			wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 			continue;
 		}
  
  
@@ -318,8 +318,8 @@
 		buf += ret;
 		count -= ret;
 	}
+	remove_wait_queue(&group->notification_waitq, &wait);
  
-	finish_wait(&group->notification_waitq, &wait);
 	if (start != buf && ret != -EFAULT)
 		ret = buf - start;
 	return ret;
@@ -7113,9 +7113,6 @@
 #ifdef CONFIG_RT_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
 #endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	alloc_size += num_possible_cpus() * cpumask_size();
-#endif
 	if (alloc_size) {
 		ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
  
  
  
@@ -7135,13 +7132,13 @@
 		ptr += nr_cpu_ids * sizeof(void **);
  
 #endif /* CONFIG_RT_GROUP_SCHED */
+	}
 #ifdef CONFIG_CPUMASK_OFFSTACK
-		for_each_possible_cpu(i) {
-			per_cpu(load_balance_mask, i) = (void *)ptr;
-			ptr += cpumask_size();
-		}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
+	for_each_possible_cpu(i) {
+		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
+			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 	}
+#endif /* CONFIG_CPUMASK_OFFSTACK */
  
 	init_rt_bandwidth(&def_rt_bandwidth,
 			global_rt_period(), global_rt_runtime());
@@ -570,24 +570,7 @@
 static
 int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
 {
-	int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));
-	int rorun = dl_se->runtime <= 0;
-
-	if (!rorun && !dmiss)
-		return 0;
-
-	/*
-	 * If we are beyond our current deadline and we are still
-	 * executing, then we have already used some of the runtime of
-	 * the next instance. Thus, if we do not account that, we are
-	 * stealing bandwidth from the system at each deadline miss!
-	 */
-	if (dmiss) {
-		dl_se->runtime = rorun ? dl_se->runtime : 0;
-		dl_se->runtime -= rq_clock(rq) - dl_se->deadline;
-	}
-
-	return 1;
+	return (dl_se->runtime <= 0);
 }
  
 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
  
@@ -826,10 +809,10 @@
 	 * parameters of the task might need updating. Otherwise,
 	 * we want a replenishment of its runtime.
 	 */
-	if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
-		replenish_dl_entity(dl_se, pi_se);
-	else
+	if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
 		update_dl_entity(dl_se, pi_se);
+	else if (flags & ENQUEUE_REPLENISH)
+		replenish_dl_entity(dl_se, pi_se);
  
 	__enqueue_dl_entity(dl_se);
 }
@@ -4005,6 +4005,10 @@
  
 static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
+	/* init_cfs_bandwidth() was not called */
+	if (!cfs_b->throttled_cfs_rq.next)
+		return;
+
 	hrtimer_cancel(&cfs_b->period_timer);
 	hrtimer_cancel(&cfs_b->slack_timer);
 }
@@ -4424,7 +4428,7 @@
 		 * wl = S * s'_i; see (2)
 		 */
 		if (W > 0 && w < W)
-			wl = (w * tg->shares) / W;
+			wl = (w * (long)tg->shares) / W;
 		else
 			wl = tg->shares;
...	...	@@ -259,16 +259,15 @@
259	259	struct fsnotify_event *kevent;
260	260	char __user *start;
261	261	int ret;
262		- DEFINE_WAIT(wait);
	262	+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
263	263
264	264	start = buf;
265	265	group = file->private_data;
266	266
267	267	pr_debug("%s: group=%p\n", __func__, group);
268	268
	269	+ add_wait_queue(&group->notification_waitq, &wait);
269	270	while (1) {
270		- prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
271		-
272	271	mutex_lock(&group->notification_mutex);
273	272	kevent = get_one_event(group, count);
274	273	mutex_unlock(&group->notification_mutex);
...	...	@@ -289,7 +288,8 @@
289	288
290	289	if (start != buf)
291	290	break;
292		- schedule();
	291	+
	292	+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
293	293	continue;
294	294	}
295	295
296	296
...	...	@@ -318,8 +318,8 @@
318	318	buf += ret;
319	319	count -= ret;
320	320	}
	321	+ remove_wait_queue(&group->notification_waitq, &wait);
321	322
322		- finish_wait(&group->notification_waitq, &wait);
323	323	if (start != buf && ret != -EFAULT)
324	324	ret = buf - start;
325	325	return ret;
...	...	@@ -7113,9 +7113,6 @@
7113	7113	#ifdef CONFIG_RT_GROUP_SCHED
7114	7114	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
7115	7115	#endif
7116		-#ifdef CONFIG_CPUMASK_OFFSTACK
7117		- alloc_size += num_possible_cpus() * cpumask_size();
7118		-#endif
7119	7116	if (alloc_size) {
7120	7117	ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
7121	7118
7122	7119
7123	7120
...	...	@@ -7135,13 +7132,13 @@
7135	7132	ptr += nr_cpu_ids * sizeof(void **);
7136	7133
7137	7134	#endif /* CONFIG_RT_GROUP_SCHED */
	7135	+ }
7138	7136	#ifdef CONFIG_CPUMASK_OFFSTACK
7139		- for_each_possible_cpu(i) {
7140		- per_cpu(load_balance_mask, i) = (void *)ptr;
7141		- ptr += cpumask_size();
7142		- }
7143		-#endif /* CONFIG_CPUMASK_OFFSTACK */
	7137	+ for_each_possible_cpu(i) {
	7138	+ per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
	7139	+ cpumask_size(), GFP_KERNEL, cpu_to_node(i));
7144	7140	}
	7141	+#endif /* CONFIG_CPUMASK_OFFSTACK */
7145	7142
7146	7143	init_rt_bandwidth(&def_rt_bandwidth,
7147	7144	global_rt_period(), global_rt_runtime());
...	...	@@ -570,24 +570,7 @@
570	570	static
571	571	int dl_runtime_exceeded(struct rq rq, struct sched_dl_entity dl_se)
572	572	{
573		- int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));
574		- int rorun = dl_se->runtime <= 0;
575		-
576		- if (!rorun && !dmiss)
577		- return 0;
578		-
579		- /*
580		- * If we are beyond our current deadline and we are still
581		- * executing, then we have already used some of the runtime of
582		- * the next instance. Thus, if we do not account that, we are
583		- * stealing bandwidth from the system at each deadline miss!
584		- */
585		- if (dmiss) {
586		- dl_se->runtime = rorun ? dl_se->runtime : 0;
587		- dl_se->runtime -= rq_clock(rq) - dl_se->deadline;
588		- }
589		-
590		- return 1;
	573	+ return (dl_se->runtime <= 0);
591	574	}
592	575
593	576	extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
594	577
...	...	@@ -826,10 +809,10 @@
826	809	* parameters of the task might need updating. Otherwise,
827	810	* we want a replenishment of its runtime.
828	811	*/
829		- if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
830		- replenish_dl_entity(dl_se, pi_se);
831		- else
	812	+ if (dl_se->dl_new \|\| flags & ENQUEUE_WAKEUP)
832	813	update_dl_entity(dl_se, pi_se);
	814	+ else if (flags & ENQUEUE_REPLENISH)
	815	+ replenish_dl_entity(dl_se, pi_se);
833	816
834	817	__enqueue_dl_entity(dl_se);
835	818	}
...	...	@@ -4005,6 +4005,10 @@
4005	4005
4006	4006	static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
4007	4007	{
	4008	+ /* init_cfs_bandwidth() was not called */
	4009	+ if (!cfs_b->throttled_cfs_rq.next)
	4010	+ return;
	4011	+
4008	4012	hrtimer_cancel(&cfs_b->period_timer);
4009	4013	hrtimer_cancel(&cfs_b->slack_timer);
4010	4014	}
...	...	@@ -4424,7 +4428,7 @@
4424	4428	* wl = S * s'_i; see (2)
4425	4429	*/
4426	4430	if (W > 0 && w < W)
4427		- wl = (w * tg->shares) / W;
	4431	+ wl = (w * (long)tg->shares) / W;
4428	4432	else
4429	4433	wl = tg->shares;
4430	4434