Eric Lee / smarc-ti-linux-kernel

1

/*

1

/*

2

* Deadline Scheduling Class (SCHED_DEADLINE)

2

* Deadline Scheduling Class (SCHED_DEADLINE)

3

*

3

*

4

* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).

4

* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).

5

*

5

*

6

* Tasks that periodically executes their instances for less than their

6

* Tasks that periodically executes their instances for less than their

7

* runtime won't miss any of their deadlines.

7

* runtime won't miss any of their deadlines.

8

* Tasks that are not periodic or sporadic or that tries to execute more

8

* Tasks that are not periodic or sporadic or that tries to execute more

9

* than their reserved bandwidth will be slowed down (and may potentially

9

* than their reserved bandwidth will be slowed down (and may potentially

10

* miss some of their deadlines), and won't affect any other task.

10

* miss some of their deadlines), and won't affect any other task.

11

*

11

*

12

13

* Juri Lelli <juri.lelli@gmail.com>,

13

* Juri Lelli <juri.lelli@gmail.com>,

14

* Michael Trimarchi <michael@amarulasolutions.com>,

14

* Michael Trimarchi <michael@amarulasolutions.com>,

15

* Fabio Checconi <fchecconi@gmail.com>

15

* Fabio Checconi <fchecconi@gmail.com>

16

*/

16

*/

17

#include "sched.h"

17

#include "sched.h"

18

19

#include <linux/slab.h>

19

#include <linux/slab.h>

20

21

struct dl_bandwidth def_dl_bandwidth;

21

struct dl_bandwidth def_dl_bandwidth;

22

23

static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)

23

static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)

24

{

24

{

25

return container_of(dl_se, struct task_struct, dl);

25

return container_of(dl_se, struct task_struct, dl);

26

}

26

}

27

28

static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)

28

static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)

29

{

29

{

30

return container_of(dl_rq, struct rq, dl);

30

return container_of(dl_rq, struct rq, dl);

31

}

31

}

32

33

static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)

33

static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)

34

{

34

{

35

struct task_struct *p = dl_task_of(dl_se);

35

struct task_struct *p = dl_task_of(dl_se);

36

struct rq *rq = task_rq(p);

36

struct rq *rq = task_rq(p);

37

38

return &rq->dl;

38

return &rq->dl;

39

}

39

}

40

41

static inline int on_dl_rq(struct sched_dl_entity *dl_se)

41

static inline int on_dl_rq(struct sched_dl_entity *dl_se)

42

{

42

{

43

return !RB_EMPTY_NODE(&dl_se->rb_node);

43

return !RB_EMPTY_NODE(&dl_se->rb_node);

44

}

44

}

45

46

static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)

46

static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)

47

{

47

{

48

struct sched_dl_entity *dl_se = &p->dl;

48

struct sched_dl_entity *dl_se = &p->dl;

49

50

return dl_rq->rb_leftmost == &dl_se->rb_node;

50

return dl_rq->rb_leftmost == &dl_se->rb_node;

51

}

51

}

52

53

void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)

53

void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)

54

{

54

{

55

raw_spin_lock_init(&dl_b->dl_runtime_lock);

55

raw_spin_lock_init(&dl_b->dl_runtime_lock);

56

dl_b->dl_period = period;

56

dl_b->dl_period = period;

57

dl_b->dl_runtime = runtime;

57

dl_b->dl_runtime = runtime;

58

}

58

}

59

60

extern unsigned long to_ratio(u64 period, u64 runtime);

60

extern unsigned long to_ratio(u64 period, u64 runtime);

61

62

void init_dl_bw(struct dl_bw *dl_b)

62

void init_dl_bw(struct dl_bw *dl_b)

63

{

63

{

64

raw_spin_lock_init(&dl_b->lock);

64

raw_spin_lock_init(&dl_b->lock);

65

raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);

65

raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);

66

if (global_rt_runtime() == RUNTIME_INF)

66

if (global_rt_runtime() == RUNTIME_INF)

67

dl_b->bw = -1;

67

dl_b->bw = -1;

68

else

68

else

69

dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());

69

dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());

70

raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);

70

raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);

71

dl_b->total_bw = 0;

71

dl_b->total_bw = 0;

72

}

72

}

73

74

void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)

74

void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)

75

{

75

{

76

dl_rq->rb_root = RB_ROOT;

76

dl_rq->rb_root = RB_ROOT;

77

78

#ifdef CONFIG_SMP

78

#ifdef CONFIG_SMP

79

/* zero means no -deadline tasks */

79

/* zero means no -deadline tasks */

80

dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;

80

dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;

81

82

dl_rq->dl_nr_migratory = 0;

82

dl_rq->dl_nr_migratory = 0;

83

dl_rq->overloaded = 0;

83

dl_rq->overloaded = 0;

84

dl_rq->pushable_dl_tasks_root = RB_ROOT;

84

dl_rq->pushable_dl_tasks_root = RB_ROOT;

85

#else

85

#else

86

init_dl_bw(&dl_rq->dl_bw);

86

init_dl_bw(&dl_rq->dl_bw);

87

#endif

87

#endif

88

}

88

}

89

90

#ifdef CONFIG_SMP

90

#ifdef CONFIG_SMP

91

92

static inline int dl_overloaded(struct rq *rq)

92

static inline int dl_overloaded(struct rq *rq)

93

{

93

{

94

return atomic_read(&rq->rd->dlo_count);

94

return atomic_read(&rq->rd->dlo_count);

95

}

95

}

96

97

static inline void dl_set_overload(struct rq *rq)

97

static inline void dl_set_overload(struct rq *rq)

98

{

98

{

99

if (!rq->online)

99

if (!rq->online)

100

return;

100

return;

101

102

cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);

102

cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);

103

/*

103

/*

104

* Must be visible before the overload count is

104

* Must be visible before the overload count is

105

* set (as in sched_rt.c).

105

* set (as in sched_rt.c).

106

*

106

*

107

* Matched by the barrier in pull_dl_task().

107

* Matched by the barrier in pull_dl_task().

108

*/

108

*/

109

smp_wmb();

109

smp_wmb();

110

atomic_inc(&rq->rd->dlo_count);

110

atomic_inc(&rq->rd->dlo_count);

111

}

111

}

112

113

static inline void dl_clear_overload(struct rq *rq)

113

static inline void dl_clear_overload(struct rq *rq)

114

{

114

{

115

if (!rq->online)

115

if (!rq->online)

116

return;

116

return;

117

118

atomic_dec(&rq->rd->dlo_count);

118

atomic_dec(&rq->rd->dlo_count);

119

cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);

119

cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);

120

}

120

}

121

122

static void update_dl_migration(struct dl_rq *dl_rq)

122

static void update_dl_migration(struct dl_rq *dl_rq)

123

{

123

{

124

if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {

124

if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {

125

if (!dl_rq->overloaded) {

125

if (!dl_rq->overloaded) {

126

dl_set_overload(rq_of_dl_rq(dl_rq));

126

dl_set_overload(rq_of_dl_rq(dl_rq));

127

dl_rq->overloaded = 1;

127

dl_rq->overloaded = 1;

128

}

128

}

129

} else if (dl_rq->overloaded) {

129

} else if (dl_rq->overloaded) {

130

dl_clear_overload(rq_of_dl_rq(dl_rq));

130

dl_clear_overload(rq_of_dl_rq(dl_rq));

131

dl_rq->overloaded = 0;

131

dl_rq->overloaded = 0;

132

}

132

}

133

}

133

}

134

135

static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

135

static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

136

{

136

{

137

struct task_struct *p = dl_task_of(dl_se);

137

struct task_struct *p = dl_task_of(dl_se);

138

139

if (p->nr_cpus_allowed > 1)

139

if (p->nr_cpus_allowed > 1)

140

dl_rq->dl_nr_migratory++;

140

dl_rq->dl_nr_migratory++;

141

142

update_dl_migration(dl_rq);

142

update_dl_migration(dl_rq);

143

}

143

}

144

145

static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

145

static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

146

{

146

{

147

struct task_struct *p = dl_task_of(dl_se);

147

struct task_struct *p = dl_task_of(dl_se);

148

149

if (p->nr_cpus_allowed > 1)

149

if (p->nr_cpus_allowed > 1)

150

dl_rq->dl_nr_migratory--;

150

dl_rq->dl_nr_migratory--;

151

152

update_dl_migration(dl_rq);

152

update_dl_migration(dl_rq);

153

}

153

}

154

155

/*

155

/*

156

* The list of pushable -deadline task is not a plist, like in

156

* The list of pushable -deadline task is not a plist, like in

157

* sched_rt.c, it is an rb-tree with tasks ordered by deadline.

157

* sched_rt.c, it is an rb-tree with tasks ordered by deadline.

158

*/

158

*/

159

static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)

159

static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)

160

{

160

{

161

struct dl_rq *dl_rq = &rq->dl;

161

struct dl_rq *dl_rq = &rq->dl;

162

struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;

162

struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;

163

struct rb_node *parent = NULL;

163

struct rb_node *parent = NULL;

164

struct task_struct *entry;

164

struct task_struct *entry;

165

int leftmost = 1;

165

int leftmost = 1;

166

167

BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));

167

BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));

168

169

while (*link) {

169

while (*link) {

170

parent = *link;

170

parent = *link;

171

entry = rb_entry(parent, struct task_struct,

171

entry = rb_entry(parent, struct task_struct,

172

pushable_dl_tasks);

172

pushable_dl_tasks);

173

if (dl_entity_preempt(&p->dl, &entry->dl))

173

if (dl_entity_preempt(&p->dl, &entry->dl))

174

link = &parent->rb_left;

174

link = &parent->rb_left;

175

else {

175

else {

176

link = &parent->rb_right;

176

link = &parent->rb_right;

177

leftmost = 0;

177

leftmost = 0;

178

}

178

}

179

}

179

}

180

181

if (leftmost)

181

if (leftmost)

182

dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;

182

dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;

183

184

rb_link_node(&p->pushable_dl_tasks, parent, link);

184

rb_link_node(&p->pushable_dl_tasks, parent, link);

185

rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);

185

rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);

186

}

186

}

187

188

static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)

188

static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)

189

{

189

{

190

struct dl_rq *dl_rq = &rq->dl;

190

struct dl_rq *dl_rq = &rq->dl;

191

192

if (RB_EMPTY_NODE(&p->pushable_dl_tasks))

192

if (RB_EMPTY_NODE(&p->pushable_dl_tasks))

193

return;

193

return;

194

195

if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {

195

if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {

196

struct rb_node *next_node;

196

struct rb_node *next_node;

197

198

next_node = rb_next(&p->pushable_dl_tasks);

198

next_node = rb_next(&p->pushable_dl_tasks);

199

dl_rq->pushable_dl_tasks_leftmost = next_node;

199

dl_rq->pushable_dl_tasks_leftmost = next_node;

200

}

200

}

201

202

rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);

202

rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);

203

RB_CLEAR_NODE(&p->pushable_dl_tasks);

203

RB_CLEAR_NODE(&p->pushable_dl_tasks);

204

}

204

}

205

206

static inline int has_pushable_dl_tasks(struct rq *rq)

206

static inline int has_pushable_dl_tasks(struct rq *rq)

207

{

207

{

208

return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);

208

return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);

209

}

209

}

210

211

static int push_dl_task(struct rq *rq);

211

static int push_dl_task(struct rq *rq);

212

213

static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)

213

static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)

214

{

214

{

215

return dl_task(prev);

215

return dl_task(prev);

216

}

216

}

217

218

static inline void set_post_schedule(struct rq *rq)

218

static inline void set_post_schedule(struct rq *rq)

219

{

219

{

220

rq->post_schedule = has_pushable_dl_tasks(rq);

220

rq->post_schedule = has_pushable_dl_tasks(rq);

221

}

221

}

222

223

#else

223

#else

224

225

static inline

225

static inline

226

void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)

226

void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)

227

{

227

{

228

}

228

}

229

230

static inline

230

static inline

231

void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)

231

void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)

232

{

232

{

233

}

233

}

234

235

static inline

235

static inline

236

void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

236

void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

237

{

237

{

238

}

238

}

239

240

static inline

240

static inline

241

void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

241

void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

242

{

242

{

243

}

243

}

244

245

static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)

245

static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)

246

{

246

{

247

return false;

247

return false;

248

}

248

}

249

250

static inline int pull_dl_task(struct rq *rq)

250

static inline int pull_dl_task(struct rq *rq)

251

{

251

{

252

return 0;

252

return 0;

253

}

253

}

254

255

static inline void set_post_schedule(struct rq *rq)

255

static inline void set_post_schedule(struct rq *rq)

256

{

256

{

257

}

257

}

258

#endif /* CONFIG_SMP */

258

#endif /* CONFIG_SMP */

259

260

static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);

260

static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);

261

static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);

261

static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);

262

static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,

262

static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,

263

int flags);

263

int flags);

264

265

/*

265

/*

266

* We are being explicitly informed that a new instance is starting,

266

* We are being explicitly informed that a new instance is starting,

267

* and this means that:

267

* and this means that:

268

* - the absolute deadline of the entity has to be placed at

268

* - the absolute deadline of the entity has to be placed at

269

* current time + relative deadline;

269

* current time + relative deadline;

270

* - the runtime of the entity has to be set to the maximum value.

270

* - the runtime of the entity has to be set to the maximum value.

271

*

271

*

272

* The capability of specifying such event is useful whenever a -deadline

272

* The capability of specifying such event is useful whenever a -deadline

273

* entity wants to (try to!) synchronize its behaviour with the scheduler's

273

* entity wants to (try to!) synchronize its behaviour with the scheduler's

274

* one, and to (try to!) reconcile itself with its own scheduling

274

* one, and to (try to!) reconcile itself with its own scheduling

275

* parameters.

275

* parameters.

276

*/

276

*/

277

static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,

277

static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,

278

struct sched_dl_entity *pi_se)

278

struct sched_dl_entity *pi_se)

279

{

279

{

280

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

280

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

281

struct rq *rq = rq_of_dl_rq(dl_rq);

281

struct rq *rq = rq_of_dl_rq(dl_rq);

282

283

WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);

283

WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);

284

285

/*

285

/*

286

* We use the regular wall clock time to set deadlines in the

286

* We use the regular wall clock time to set deadlines in the

287

* future; in fact, we must consider execution overheads (time

287

* future; in fact, we must consider execution overheads (time

288

* spent on hardirq context, etc.).

288

* spent on hardirq context, etc.).

289

*/

289

*/

290

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

290

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

291

dl_se->runtime = pi_se->dl_runtime;

291

dl_se->runtime = pi_se->dl_runtime;

292

dl_se->dl_new = 0;

292

dl_se->dl_new = 0;

293

}

293

}

294

295

/*

295

/*

296

* Pure Earliest Deadline First (EDF) scheduling does not deal with the

296

* Pure Earliest Deadline First (EDF) scheduling does not deal with the

297

* possibility of a entity lasting more than what it declared, and thus

297

* possibility of a entity lasting more than what it declared, and thus

298

* exhausting its runtime.

298

* exhausting its runtime.

299

*

299

*

300

* Here we are interested in making runtime overrun possible, but we do

300

* Here we are interested in making runtime overrun possible, but we do

301

* not want a entity which is misbehaving to affect the scheduling of all

301

* not want a entity which is misbehaving to affect the scheduling of all

302

* other entities.

302

* other entities.

303

* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)

303

* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)

304

* is used, in order to confine each entity within its own bandwidth.

304

* is used, in order to confine each entity within its own bandwidth.

305

*

305

*

306

* This function deals exactly with that, and ensures that when the runtime

306

* This function deals exactly with that, and ensures that when the runtime

307

* of a entity is replenished, its deadline is also postponed. That ensures

307

* of a entity is replenished, its deadline is also postponed. That ensures

308

* the overrunning entity can't interfere with other entity in the system and

308

* the overrunning entity can't interfere with other entity in the system and

309

* can't make them miss their deadlines. Reasons why this kind of overruns

309

* can't make them miss their deadlines. Reasons why this kind of overruns

310

* could happen are, typically, a entity voluntarily trying to overcome its

310

* could happen are, typically, a entity voluntarily trying to overcome its

311

* runtime, or it just underestimated it during sched_setscheduler_ex().

311

* runtime, or it just underestimated it during sched_setscheduler_ex().

312

*/

312

*/

313

static void replenish_dl_entity(struct sched_dl_entity *dl_se,

313

static void replenish_dl_entity(struct sched_dl_entity *dl_se,

314

struct sched_dl_entity *pi_se)

314

struct sched_dl_entity *pi_se)

315

{

315

{

316

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

316

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

317

struct rq *rq = rq_of_dl_rq(dl_rq);

317

struct rq *rq = rq_of_dl_rq(dl_rq);

318

319

BUG_ON(pi_se->dl_runtime <= 0);

319

BUG_ON(pi_se->dl_runtime <= 0);

320

321

/*

321

/*

322

* This could be the case for a !-dl task that is boosted.

322

* This could be the case for a !-dl task that is boosted.

323

* Just go with full inherited parameters.

323

* Just go with full inherited parameters.

324

*/

324

*/

325

if (dl_se->dl_deadline == 0) {

325

if (dl_se->dl_deadline == 0) {

326

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

326

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

327

dl_se->runtime = pi_se->dl_runtime;

327

dl_se->runtime = pi_se->dl_runtime;

328

}

328

}

329

330

/*

330

/*

331

* We keep moving the deadline away until we get some

331

* We keep moving the deadline away until we get some

332

* available runtime for the entity. This ensures correct

332

* available runtime for the entity. This ensures correct

333

* handling of situations where the runtime overrun is

333

* handling of situations where the runtime overrun is

334

* arbitrary large.

334

* arbitrary large.

335

*/

335

*/

336

while (dl_se->runtime <= 0) {

336

while (dl_se->runtime <= 0) {

337

dl_se->deadline += pi_se->dl_period;

337

dl_se->deadline += pi_se->dl_period;

338

dl_se->runtime += pi_se->dl_runtime;

338

dl_se->runtime += pi_se->dl_runtime;

339

}

339

}

340

341

/*

341

/*

342

* At this point, the deadline really should be "in

342

* At this point, the deadline really should be "in

343

* the future" with respect to rq->clock. If it's

343

* the future" with respect to rq->clock. If it's

344

* not, we are, for some reason, lagging too much!

344

* not, we are, for some reason, lagging too much!

345

* Anyway, after having warn userspace abut that,

345

* Anyway, after having warn userspace abut that,

346

* we still try to keep the things running by

346

* we still try to keep the things running by

347

* resetting the deadline and the budget of the

347

* resetting the deadline and the budget of the

348

* entity.

348

* entity.

349

*/

349

*/

350

if (dl_time_before(dl_se->deadline, rq_clock(rq))) {

350

if (dl_time_before(dl_se->deadline, rq_clock(rq))) {

351

static bool lag_once = false;

351

static bool lag_once = false;

352

353

if (!lag_once) {

353

if (!lag_once) {

354

lag_once = true;

354

lag_once = true;

355

printk_sched("sched: DL replenish lagged to much\n");

355

printk_sched("sched: DL replenish lagged to much\n");

356

}

356

}

357

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

357

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

358

dl_se->runtime = pi_se->dl_runtime;

358

dl_se->runtime = pi_se->dl_runtime;

359

}

359

}

360

}

360

}

361

362

/*

362

/*

363

* Here we check if --at time t-- an entity (which is probably being

363

* Here we check if --at time t-- an entity (which is probably being

364

* [re]activated or, in general, enqueued) can use its remaining runtime

364

* [re]activated or, in general, enqueued) can use its remaining runtime

365

* and its current deadline _without_ exceeding the bandwidth it is

365

* and its current deadline _without_ exceeding the bandwidth it is

366

* assigned (function returns true if it can't). We are in fact applying

366

* assigned (function returns true if it can't). We are in fact applying

367

* one of the CBS rules: when a task wakes up, if the residual runtime

367

* one of the CBS rules: when a task wakes up, if the residual runtime

368

* over residual deadline fits within the allocated bandwidth, then we

368

* over residual deadline fits within the allocated bandwidth, then we

369

* can keep the current (absolute) deadline and residual budget without

369

* can keep the current (absolute) deadline and residual budget without

370

* disrupting the schedulability of the system. Otherwise, we should

370

* disrupting the schedulability of the system. Otherwise, we should

371

* refill the runtime and set the deadline a period in the future,

371

* refill the runtime and set the deadline a period in the future,

372

* because keeping the current (absolute) deadline of the task would

372

* because keeping the current (absolute) deadline of the task would

373

* result in breaking guarantees promised to other tasks (refer to

373

* result in breaking guarantees promised to other tasks (refer to

374

* Documentation/scheduler/sched-deadline.txt for more informations).

374

* Documentation/scheduler/sched-deadline.txt for more informations).

375

*

375

*

376

* This function returns true if:

376

* This function returns true if:

377

*

377

*

378

* runtime / (deadline - t) > dl_runtime / dl_period ,

378

* runtime / (deadline - t) > dl_runtime / dl_period ,

379

*

379

*

380

* IOW we can't recycle current parameters.

380

* IOW we can't recycle current parameters.

381

*

381

*

382

* Notice that the bandwidth check is done against the period. For

382

* Notice that the bandwidth check is done against the period. For

383

* task with deadline equal to period this is the same of using

383

* task with deadline equal to period this is the same of using

384

* dl_deadline instead of dl_period in the equation above.

384

* dl_deadline instead of dl_period in the equation above.

385

*/

385

*/

386

static bool dl_entity_overflow(struct sched_dl_entity *dl_se,

386

static bool dl_entity_overflow(struct sched_dl_entity *dl_se,

387

struct sched_dl_entity *pi_se, u64 t)

387

struct sched_dl_entity *pi_se, u64 t)

388

{

388

{

389

u64 left, right;

389

u64 left, right;

390

391

/*

391

/*

392

* left and right are the two sides of the equation above,

392

* left and right are the two sides of the equation above,

393

* after a bit of shuffling to use multiplications instead

393

* after a bit of shuffling to use multiplications instead

394

* of divisions.

394

* of divisions.

395

*

395

*

396

* Note that none of the time values involved in the two

396

* Note that none of the time values involved in the two

397

* multiplications are absolute: dl_deadline and dl_runtime

397

* multiplications are absolute: dl_deadline and dl_runtime

398

* are the relative deadline and the maximum runtime of each

398

* are the relative deadline and the maximum runtime of each

399

* instance, runtime is the runtime left for the last instance

399

* instance, runtime is the runtime left for the last instance

400

* and (deadline - t), since t is rq->clock, is the time left

400

* and (deadline - t), since t is rq->clock, is the time left

401

* to the (absolute) deadline. Even if overflowing the u64 type

401

* to the (absolute) deadline. Even if overflowing the u64 type

402

* is very unlikely to occur in both cases, here we scale down

402

* is very unlikely to occur in both cases, here we scale down

403

* as we want to avoid that risk at all. Scaling down by 10

403

* as we want to avoid that risk at all. Scaling down by 10

404

* means that we reduce granularity to 1us. We are fine with it,

404

* means that we reduce granularity to 1us. We are fine with it,

405

* since this is only a true/false check and, anyway, thinking

405

* since this is only a true/false check and, anyway, thinking

406

* of anything below microseconds resolution is actually fiction

406

* of anything below microseconds resolution is actually fiction

407

* (but still we want to give the user that illusion >;).

407

* (but still we want to give the user that illusion >;).

408

*/

408

*/

409

left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);

409

left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);

410

right = ((dl_se->deadline - t) >> DL_SCALE) *

410

right = ((dl_se->deadline - t) >> DL_SCALE) *

411

(pi_se->dl_runtime >> DL_SCALE);

411

(pi_se->dl_runtime >> DL_SCALE);

412

413

return dl_time_before(right, left);

413

return dl_time_before(right, left);

414

}

414

}

415

416

/*

416

/*

417

* When a -deadline entity is queued back on the runqueue, its runtime and

417

* When a -deadline entity is queued back on the runqueue, its runtime and

418

* deadline might need updating.

418

* deadline might need updating.

419

*

419

*

420

* The policy here is that we update the deadline of the entity only if:

420

* The policy here is that we update the deadline of the entity only if:

421

* - the current deadline is in the past,

421

* - the current deadline is in the past,

422

* - using the remaining runtime with the current deadline would make

422

* - using the remaining runtime with the current deadline would make

423

* the entity exceed its bandwidth.

423

* the entity exceed its bandwidth.

424

*/

424

*/

425

static void update_dl_entity(struct sched_dl_entity *dl_se,

425

static void update_dl_entity(struct sched_dl_entity *dl_se,

426

struct sched_dl_entity *pi_se)

426

struct sched_dl_entity *pi_se)

427

{

427

{

428

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

428

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

429

struct rq *rq = rq_of_dl_rq(dl_rq);

429

struct rq *rq = rq_of_dl_rq(dl_rq);

430

431

/*

431

/*

432

* The arrival of a new instance needs special treatment, i.e.,

432

* The arrival of a new instance needs special treatment, i.e.,

433

* the actual scheduling parameters have to be "renewed".

433

* the actual scheduling parameters have to be "renewed".

434

*/

434

*/

435

if (dl_se->dl_new) {

435

if (dl_se->dl_new) {

436

setup_new_dl_entity(dl_se, pi_se);

436

setup_new_dl_entity(dl_se, pi_se);

437

return;

437

return;

438

}

438

}

439

440

if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||

440

if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||

441

dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {

441

dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {

442

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

442

dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;

443

dl_se->runtime = pi_se->dl_runtime;

443

dl_se->runtime = pi_se->dl_runtime;

444

}

444

}

445

}

445

}

446

447

/*

447

/*

448

* If the entity depleted all its runtime, and if we want it to sleep

448

* If the entity depleted all its runtime, and if we want it to sleep

449

* while waiting for some new execution time to become available, we

449

* while waiting for some new execution time to become available, we

450

* set the bandwidth enforcement timer to the replenishment instant

450

* set the bandwidth enforcement timer to the replenishment instant

451

* and try to activate it.

451

* and try to activate it.

452

*

452

*

453

* Notice that it is important for the caller to know if the timer

453

* Notice that it is important for the caller to know if the timer

454

* actually started or not (i.e., the replenishment instant is in

454

* actually started or not (i.e., the replenishment instant is in

455

* the future or in the past).

455

* the future or in the past).

456

*/

456

*/

457

static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)

457

static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)

458

{

458

{

459

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

459

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

460

struct rq *rq = rq_of_dl_rq(dl_rq);

460

struct rq *rq = rq_of_dl_rq(dl_rq);

461

ktime_t now, act;

461

ktime_t now, act;

462

ktime_t soft, hard;

462

ktime_t soft, hard;

463

unsigned long range;

463

unsigned long range;

464

s64 delta;

464

s64 delta;

465

466

if (boosted)

466

if (boosted)

467

return 0;

467

return 0;

468

/*

468

/*

469

* We want the timer to fire at the deadline, but considering

469

* We want the timer to fire at the deadline, but considering

470

* that it is actually coming from rq->clock and not from

470

* that it is actually coming from rq->clock and not from

471

* hrtimer's time base reading.

471

* hrtimer's time base reading.

472

*/

472

*/

473

act = ns_to_ktime(dl_se->deadline);

473

act = ns_to_ktime(dl_se->deadline);

474

now = hrtimer_cb_get_time(&dl_se->dl_timer);

474

now = hrtimer_cb_get_time(&dl_se->dl_timer);

475

delta = ktime_to_ns(now) - rq_clock(rq);

475

delta = ktime_to_ns(now) - rq_clock(rq);

476

act = ktime_add_ns(act, delta);

476

act = ktime_add_ns(act, delta);

477

478

/*

478

/*

479

* If the expiry time already passed, e.g., because the value

479

* If the expiry time already passed, e.g., because the value

480

* chosen as the deadline is too small, don't even try to

480

* chosen as the deadline is too small, don't even try to

481

* start the timer in the past!

481

* start the timer in the past!

482

*/

482

*/

483

if (ktime_us_delta(act, now) < 0)

483

if (ktime_us_delta(act, now) < 0)

484

return 0;

484

return 0;

485

486

hrtimer_set_expires(&dl_se->dl_timer, act);

486

hrtimer_set_expires(&dl_se->dl_timer, act);

487

488

soft = hrtimer_get_softexpires(&dl_se->dl_timer);

488

soft = hrtimer_get_softexpires(&dl_se->dl_timer);

489

hard = hrtimer_get_expires(&dl_se->dl_timer);

489

hard = hrtimer_get_expires(&dl_se->dl_timer);

490

range = ktime_to_ns(ktime_sub(hard, soft));

490

range = ktime_to_ns(ktime_sub(hard, soft));

491

__hrtimer_start_range_ns(&dl_se->dl_timer, soft,

491

__hrtimer_start_range_ns(&dl_se->dl_timer, soft,

492

range, HRTIMER_MODE_ABS, 0);

492

range, HRTIMER_MODE_ABS, 0);

493

494

return hrtimer_active(&dl_se->dl_timer);

494

return hrtimer_active(&dl_se->dl_timer);

495

}

495

}

496

497

/*

497

/*

498

* This is the bandwidth enforcement timer callback. If here, we know

498

* This is the bandwidth enforcement timer callback. If here, we know

499

* a task is not on its dl_rq, since the fact that the timer was running

499

* a task is not on its dl_rq, since the fact that the timer was running

500

* means the task is throttled and needs a runtime replenishment.

500

* means the task is throttled and needs a runtime replenishment.

501

*

501

*

502

* However, what we actually do depends on the fact the task is active,

502

* However, what we actually do depends on the fact the task is active,

503

* (it is on its rq) or has been removed from there by a call to

503

* (it is on its rq) or has been removed from there by a call to

504

* dequeue_task_dl(). In the former case we must issue the runtime

504

* dequeue_task_dl(). In the former case we must issue the runtime

505

* replenishment and add the task back to the dl_rq; in the latter, we just

505

* replenishment and add the task back to the dl_rq; in the latter, we just

506

* do nothing but clearing dl_throttled, so that runtime and deadline

506

* do nothing but clearing dl_throttled, so that runtime and deadline

507

* updating (and the queueing back to dl_rq) will be done by the

507

* updating (and the queueing back to dl_rq) will be done by the

508

* next call to enqueue_task_dl().

508

* next call to enqueue_task_dl().

509

*/

509

*/

510

static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)

510

static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)

511

{

511

{

512

struct sched_dl_entity *dl_se = container_of(timer,

512

struct sched_dl_entity *dl_se = container_of(timer,

513

struct sched_dl_entity,

513

struct sched_dl_entity,

514

dl_timer);

514

dl_timer);

515

struct task_struct *p = dl_task_of(dl_se);

515

struct task_struct *p = dl_task_of(dl_se);

516

struct rq *rq = task_rq(p);

516

struct rq *rq;

517

again:

518

rq = task_rq(p);

517

raw_spin_lock(&rq->lock);

519

raw_spin_lock(&rq->lock);

520

521

if (rq != task_rq(p)) {

522

/* Task was moved, retrying. */

523

raw_spin_unlock(&rq->lock);

524

goto again;

525

}

518

526

519

/*

527

/*

520

* We need to take care of a possible races here. In fact, the

528

* We need to take care of a possible races here. In fact, the

521

* task might have changed its scheduling policy to something

529

* task might have changed its scheduling policy to something

522

* different from SCHED_DEADLINE or changed its reservation

530

* different from SCHED_DEADLINE or changed its reservation

523

* parameters (through sched_setscheduler()).

531

* parameters (through sched_setscheduler()).

524

*/

532

*/

525

if (!dl_task(p) || dl_se->dl_new)

533

if (!dl_task(p) || dl_se->dl_new)

526

goto unlock;

534

goto unlock;

527

535

528

sched_clock_tick();

536

sched_clock_tick();

529

update_rq_clock(rq);

537

update_rq_clock(rq);

530

dl_se->dl_throttled = 0;

538

dl_se->dl_throttled = 0;

531

dl_se->dl_yielded = 0;

539

dl_se->dl_yielded = 0;

532

if (p->on_rq) {

540

if (p->on_rq) {

533

enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);

541

enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);

534

if (task_has_dl_policy(rq->curr))

542

if (task_has_dl_policy(rq->curr))

535

check_preempt_curr_dl(rq, p, 0);

543

check_preempt_curr_dl(rq, p, 0);

536

else

544

else

537

resched_task(rq->curr);

545

resched_task(rq->curr);

538

#ifdef CONFIG_SMP

546

#ifdef CONFIG_SMP

539

/*

547

/*

540

* Queueing this task back might have overloaded rq,

548

* Queueing this task back might have overloaded rq,

541

* check if we need to kick someone away.

549

* check if we need to kick someone away.

542

*/

550

*/

543

if (has_pushable_dl_tasks(rq))

551

if (has_pushable_dl_tasks(rq))

544

push_dl_task(rq);

552

push_dl_task(rq);

545

#endif

553

#endif

546

}

554

}

547

unlock:

555

unlock:

548

raw_spin_unlock(&rq->lock);

556

raw_spin_unlock(&rq->lock);

549

557

550

return HRTIMER_NORESTART;

558

return HRTIMER_NORESTART;

551

}

559

}

552

560

553

void init_dl_task_timer(struct sched_dl_entity *dl_se)

561

void init_dl_task_timer(struct sched_dl_entity *dl_se)

554

{

562

{

555

struct hrtimer *timer = &dl_se->dl_timer;

563

struct hrtimer *timer = &dl_se->dl_timer;

556

564

557

if (hrtimer_active(timer)) {

565

if (hrtimer_active(timer)) {

558

hrtimer_try_to_cancel(timer);

566

hrtimer_try_to_cancel(timer);

559

return;

567

return;

560

}

568

}

561

569

562

hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);

570

hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);

563

timer->function = dl_task_timer;

571

timer->function = dl_task_timer;

564

}

572

}

565

573

566

static

574

static

567

int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)

575

int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)

568

{

576

{

569

int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));

577

int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));

570

int rorun = dl_se->runtime <= 0;

578

int rorun = dl_se->runtime <= 0;

571

579

572

if (!rorun && !dmiss)

580

if (!rorun && !dmiss)

573

return 0;

581

return 0;

574

582

575

/*

583

/*

576

* If we are beyond our current deadline and we are still

584

* If we are beyond our current deadline and we are still

577

* executing, then we have already used some of the runtime of

585

* executing, then we have already used some of the runtime of

578

* the next instance. Thus, if we do not account that, we are

586

* the next instance. Thus, if we do not account that, we are

579

* stealing bandwidth from the system at each deadline miss!

587

* stealing bandwidth from the system at each deadline miss!

580

*/

588

*/

581

if (dmiss) {

589

if (dmiss) {

582

dl_se->runtime = rorun ? dl_se->runtime : 0;

590

dl_se->runtime = rorun ? dl_se->runtime : 0;

583

dl_se->runtime -= rq_clock(rq) - dl_se->deadline;

591

dl_se->runtime -= rq_clock(rq) - dl_se->deadline;

584

}

592

}

585

593

586

return 1;

594

return 1;

587

}

595

}

588

596

589

extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);

597

extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);

590

598

591

/*

599

/*

592

* Update the current task's runtime statistics (provided it is still

600

* Update the current task's runtime statistics (provided it is still

593

* a -deadline task and has not been removed from the dl_rq).

601

* a -deadline task and has not been removed from the dl_rq).

594

*/

602

*/

595

static void update_curr_dl(struct rq *rq)

603

static void update_curr_dl(struct rq *rq)

596

{

604

{

597

struct task_struct *curr = rq->curr;

605

struct task_struct *curr = rq->curr;

598

struct sched_dl_entity *dl_se = &curr->dl;

606

struct sched_dl_entity *dl_se = &curr->dl;

599

u64 delta_exec;

607

u64 delta_exec;

600

608

601

if (!dl_task(curr) || !on_dl_rq(dl_se))

609

if (!dl_task(curr) || !on_dl_rq(dl_se))

602

return;

610

return;

603

611

604

/*

612

/*

605

* Consumed budget is computed considering the time as

613

* Consumed budget is computed considering the time as

606

* observed by schedulable tasks (excluding time spent

614

* observed by schedulable tasks (excluding time spent

607

* in hardirq context, etc.). Deadlines are instead

615

* in hardirq context, etc.). Deadlines are instead

608

* computed using hard walltime. This seems to be the more

616

* computed using hard walltime. This seems to be the more

609

* natural solution, but the full ramifications of this

617

* natural solution, but the full ramifications of this

610

* approach need further study.

618

* approach need further study.

611

*/

619

*/

612

delta_exec = rq_clock_task(rq) - curr->se.exec_start;

620

delta_exec = rq_clock_task(rq) - curr->se.exec_start;

613

if (unlikely((s64)delta_exec <= 0))

621

if (unlikely((s64)delta_exec <= 0))

614

return;

622

return;

615

623

616

schedstat_set(curr->se.statistics.exec_max,

624

schedstat_set(curr->se.statistics.exec_max,

617

max(curr->se.statistics.exec_max, delta_exec));

625

max(curr->se.statistics.exec_max, delta_exec));

618

626

619

curr->se.sum_exec_runtime += delta_exec;

627

curr->se.sum_exec_runtime += delta_exec;

620

account_group_exec_runtime(curr, delta_exec);

628

account_group_exec_runtime(curr, delta_exec);

621

629

622

curr->se.exec_start = rq_clock_task(rq);

630

curr->se.exec_start = rq_clock_task(rq);

623

cpuacct_charge(curr, delta_exec);

631

cpuacct_charge(curr, delta_exec);

624

632

625

sched_rt_avg_update(rq, delta_exec);

633

sched_rt_avg_update(rq, delta_exec);

626

634

627

dl_se->runtime -= delta_exec;

635

dl_se->runtime -= delta_exec;

628

if (dl_runtime_exceeded(rq, dl_se)) {

636

if (dl_runtime_exceeded(rq, dl_se)) {

629

__dequeue_task_dl(rq, curr, 0);

637

__dequeue_task_dl(rq, curr, 0);

630

if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))

638

if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))

631

dl_se->dl_throttled = 1;

639

dl_se->dl_throttled = 1;

632

else

640

else

633

enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);

641

enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);

634

642

635

if (!is_leftmost(curr, &rq->dl))

643

if (!is_leftmost(curr, &rq->dl))

636

resched_task(curr);

644

resched_task(curr);

637

}

645

}

638

646

639

/*

647

/*

640

* Because -- for now -- we share the rt bandwidth, we need to

648

* Because -- for now -- we share the rt bandwidth, we need to

641

* account our runtime there too, otherwise actual rt tasks

649

* account our runtime there too, otherwise actual rt tasks

642

* would be able to exceed the shared quota.

650

* would be able to exceed the shared quota.

643

*

651

*

644

* Account to the root rt group for now.

652

* Account to the root rt group for now.

645

*

653

*

646

* The solution we're working towards is having the RT groups scheduled

654

* The solution we're working towards is having the RT groups scheduled

647

* using deadline servers -- however there's a few nasties to figure

655

* using deadline servers -- however there's a few nasties to figure

648

* out before that can happen.

656

* out before that can happen.

649

*/

657

*/

650

if (rt_bandwidth_enabled()) {

658

if (rt_bandwidth_enabled()) {

651

struct rt_rq *rt_rq = &rq->rt;

659

struct rt_rq *rt_rq = &rq->rt;

652

660

653

raw_spin_lock(&rt_rq->rt_runtime_lock);

661

raw_spin_lock(&rt_rq->rt_runtime_lock);

654

/*

662

/*

655

* We'll let actual RT tasks worry about the overflow here, we

663

* We'll let actual RT tasks worry about the overflow here, we

656

* have our own CBS to keep us inline; only account when RT

664

* have our own CBS to keep us inline; only account when RT

657

* bandwidth is relevant.

665

* bandwidth is relevant.

658

*/

666

*/

659

if (sched_rt_bandwidth_account(rt_rq))

667

if (sched_rt_bandwidth_account(rt_rq))

660

rt_rq->rt_time += delta_exec;

668

rt_rq->rt_time += delta_exec;

661

raw_spin_unlock(&rt_rq->rt_runtime_lock);

669

raw_spin_unlock(&rt_rq->rt_runtime_lock);

662

}

670

}

663

}

671

}

664

672

665

#ifdef CONFIG_SMP

673

#ifdef CONFIG_SMP

666

674

667

static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);

675

static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);

668

676

669

static inline u64 next_deadline(struct rq *rq)

677

static inline u64 next_deadline(struct rq *rq)

670

{

678

{

671

struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);

679

struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);

672

680

673

if (next && dl_prio(next->prio))

681

if (next && dl_prio(next->prio))

674

return next->dl.deadline;

682

return next->dl.deadline;

675

else

683

else

676

return 0;

684

return 0;

677

}

685

}

678

686

679

static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)

687

static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)

680

{

688

{

681

struct rq *rq = rq_of_dl_rq(dl_rq);

689

struct rq *rq = rq_of_dl_rq(dl_rq);

682

690

683

if (dl_rq->earliest_dl.curr == 0 ||

691

if (dl_rq->earliest_dl.curr == 0 ||

684

dl_time_before(deadline, dl_rq->earliest_dl.curr)) {

692

dl_time_before(deadline, dl_rq->earliest_dl.curr)) {

685

/*

693

/*

686

* If the dl_rq had no -deadline tasks, or if the new task

694

* If the dl_rq had no -deadline tasks, or if the new task

687

* has shorter deadline than the current one on dl_rq, we

695

* has shorter deadline than the current one on dl_rq, we

688

* know that the previous earliest becomes our next earliest,

696

* know that the previous earliest becomes our next earliest,

689

* as the new task becomes the earliest itself.

697

* as the new task becomes the earliest itself.

690

*/

698

*/

691

dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;

699

dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;

692

dl_rq->earliest_dl.curr = deadline;

700

dl_rq->earliest_dl.curr = deadline;

693

cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);

701

cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);

694

} else if (dl_rq->earliest_dl.next == 0 ||

702

} else if (dl_rq->earliest_dl.next == 0 ||

695

dl_time_before(deadline, dl_rq->earliest_dl.next)) {

703

dl_time_before(deadline, dl_rq->earliest_dl.next)) {

696

/*

704

/*

697

* On the other hand, if the new -deadline task has a

705

* On the other hand, if the new -deadline task has a

698

* a later deadline than the earliest one on dl_rq, but

706

* a later deadline than the earliest one on dl_rq, but

699

* it is earlier than the next (if any), we must

707

* it is earlier than the next (if any), we must

700

* recompute the next-earliest.

708

* recompute the next-earliest.

701

*/

709

*/

702

dl_rq->earliest_dl.next = next_deadline(rq);

710

dl_rq->earliest_dl.next = next_deadline(rq);

703

}

711

}

704

}

712

}

705

713

706

static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)

714

static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)

707

{

715

{

708

struct rq *rq = rq_of_dl_rq(dl_rq);

716

struct rq *rq = rq_of_dl_rq(dl_rq);

709

717

710

/*

718

/*

711

* Since we may have removed our earliest (and/or next earliest)

719

* Since we may have removed our earliest (and/or next earliest)

712

* task we must recompute them.

720

* task we must recompute them.

713

*/

721

*/

714

if (!dl_rq->dl_nr_running) {

722

if (!dl_rq->dl_nr_running) {

715

dl_rq->earliest_dl.curr = 0;

723

dl_rq->earliest_dl.curr = 0;

716

dl_rq->earliest_dl.next = 0;

724

dl_rq->earliest_dl.next = 0;

717

cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);

725

cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);

718

} else {

726

} else {

719

struct rb_node *leftmost = dl_rq->rb_leftmost;

727

struct rb_node *leftmost = dl_rq->rb_leftmost;

720

struct sched_dl_entity *entry;

728

struct sched_dl_entity *entry;

721

729

722

entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);

730

entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);

723

dl_rq->earliest_dl.curr = entry->deadline;

731

dl_rq->earliest_dl.curr = entry->deadline;

724

dl_rq->earliest_dl.next = next_deadline(rq);

732

dl_rq->earliest_dl.next = next_deadline(rq);

725

cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);

733

cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);

726

}

734

}

727

}

735

}

728

736

729

#else

737

#else

730

738

731

static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}

739

static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}

732

static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}

740

static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}

733

741

734

#endif /* CONFIG_SMP */

742

#endif /* CONFIG_SMP */

735

743

736

static inline

744

static inline

737

void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

745

void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

738

{

746

{

739

int prio = dl_task_of(dl_se)->prio;

747

int prio = dl_task_of(dl_se)->prio;

740

u64 deadline = dl_se->deadline;

748

u64 deadline = dl_se->deadline;

741

749

742

WARN_ON(!dl_prio(prio));

750

WARN_ON(!dl_prio(prio));

743

dl_rq->dl_nr_running++;

751

dl_rq->dl_nr_running++;

744

inc_nr_running(rq_of_dl_rq(dl_rq));

752

inc_nr_running(rq_of_dl_rq(dl_rq));

745

753

746

inc_dl_deadline(dl_rq, deadline);

754

inc_dl_deadline(dl_rq, deadline);

747

inc_dl_migration(dl_se, dl_rq);

755

inc_dl_migration(dl_se, dl_rq);

748

}

756

}

749

757

750

static inline

758

static inline

751

void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

759

void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

752

{

760

{

753

int prio = dl_task_of(dl_se)->prio;

761

int prio = dl_task_of(dl_se)->prio;

754

762

755

WARN_ON(!dl_prio(prio));

763

WARN_ON(!dl_prio(prio));

756

WARN_ON(!dl_rq->dl_nr_running);

764

WARN_ON(!dl_rq->dl_nr_running);

757

dl_rq->dl_nr_running--;

765

dl_rq->dl_nr_running--;

758

dec_nr_running(rq_of_dl_rq(dl_rq));

766

dec_nr_running(rq_of_dl_rq(dl_rq));

759

767

760

dec_dl_deadline(dl_rq, dl_se->deadline);

768

dec_dl_deadline(dl_rq, dl_se->deadline);

761

dec_dl_migration(dl_se, dl_rq);

769

dec_dl_migration(dl_se, dl_rq);

762

}

770

}

763

771

764

static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)

772

static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)

765

{

773

{

766

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

774

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

767

struct rb_node **link = &dl_rq->rb_root.rb_node;

775

struct rb_node **link = &dl_rq->rb_root.rb_node;

768

struct rb_node *parent = NULL;

776

struct rb_node *parent = NULL;

769

struct sched_dl_entity *entry;

777

struct sched_dl_entity *entry;

770

int leftmost = 1;

778

int leftmost = 1;

771

779

772

BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));

780

BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));

773

781

774

while (*link) {

782

while (*link) {

775

parent = *link;

783

parent = *link;

776

entry = rb_entry(parent, struct sched_dl_entity, rb_node);

784

entry = rb_entry(parent, struct sched_dl_entity, rb_node);

777

if (dl_time_before(dl_se->deadline, entry->deadline))

785

if (dl_time_before(dl_se->deadline, entry->deadline))

778

link = &parent->rb_left;

786

link = &parent->rb_left;

779

else {

787

else {

780

link = &parent->rb_right;

788

link = &parent->rb_right;

781

leftmost = 0;

789

leftmost = 0;

782

}

790

}

783

}

791

}

784

792

785

if (leftmost)

793

if (leftmost)

786

dl_rq->rb_leftmost = &dl_se->rb_node;

794

dl_rq->rb_leftmost = &dl_se->rb_node;

787

795

788

rb_link_node(&dl_se->rb_node, parent, link);

796

rb_link_node(&dl_se->rb_node, parent, link);

789

rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);

797

rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);

790

798

791

inc_dl_tasks(dl_se, dl_rq);

799

inc_dl_tasks(dl_se, dl_rq);

792

}

800

}

793

801

794

static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)

802

static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)

795

{

803

{

796

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

804

struct dl_rq *dl_rq = dl_rq_of_se(dl_se);

797

805

798

if (RB_EMPTY_NODE(&dl_se->rb_node))

806

if (RB_EMPTY_NODE(&dl_se->rb_node))

799

return;

807

return;

800

808

801

if (dl_rq->rb_leftmost == &dl_se->rb_node) {

809

if (dl_rq->rb_leftmost == &dl_se->rb_node) {

802

struct rb_node *next_node;

810

struct rb_node *next_node;

803

811

804

next_node = rb_next(&dl_se->rb_node);

812

next_node = rb_next(&dl_se->rb_node);

805

dl_rq->rb_leftmost = next_node;

813

dl_rq->rb_leftmost = next_node;

806

}

814

}

807

815

808

rb_erase(&dl_se->rb_node, &dl_rq->rb_root);

816

rb_erase(&dl_se->rb_node, &dl_rq->rb_root);

809

RB_CLEAR_NODE(&dl_se->rb_node);

817

RB_CLEAR_NODE(&dl_se->rb_node);

810

818

811

dec_dl_tasks(dl_se, dl_rq);

819

dec_dl_tasks(dl_se, dl_rq);

812

}

820

}

813

821

814

static void

822

static void

815

enqueue_dl_entity(struct sched_dl_entity *dl_se,

823

enqueue_dl_entity(struct sched_dl_entity *dl_se,

816

struct sched_dl_entity *pi_se, int flags)

824

struct sched_dl_entity *pi_se, int flags)

817

{

825

{

818

BUG_ON(on_dl_rq(dl_se));

826

BUG_ON(on_dl_rq(dl_se));

819

827

820

/*

828

/*

821

* If this is a wakeup or a new instance, the scheduling

829

* If this is a wakeup or a new instance, the scheduling

822

* parameters of the task might need updating. Otherwise,

830

* parameters of the task might need updating. Otherwise,

823

* we want a replenishment of its runtime.

831

* we want a replenishment of its runtime.

824

*/

832

*/

825

if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)

833

if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)

826

replenish_dl_entity(dl_se, pi_se);

834

replenish_dl_entity(dl_se, pi_se);

827

else

835

else

828

update_dl_entity(dl_se, pi_se);

836

update_dl_entity(dl_se, pi_se);

829

837

830

__enqueue_dl_entity(dl_se);

838

__enqueue_dl_entity(dl_se);

831

}

839

}

832

840

833

static void dequeue_dl_entity(struct sched_dl_entity *dl_se)

841

static void dequeue_dl_entity(struct sched_dl_entity *dl_se)

834

{

842

{

835

__dequeue_dl_entity(dl_se);

843

__dequeue_dl_entity(dl_se);

836

}

844

}

837

845

838

static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)

846

static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)

839

{

847

{

840

struct task_struct *pi_task = rt_mutex_get_top_task(p);

848

struct task_struct *pi_task = rt_mutex_get_top_task(p);

841

struct sched_dl_entity *pi_se = &p->dl;

849

struct sched_dl_entity *pi_se = &p->dl;

842

850

843

/*

851

/*

844

* Use the scheduling parameters of the top pi-waiter

852

* Use the scheduling parameters of the top pi-waiter

845

* task if we have one and its (relative) deadline is

853

* task if we have one and its (relative) deadline is

846

* smaller than our one... OTW we keep our runtime and

854

* smaller than our one... OTW we keep our runtime and

847

* deadline.

855

* deadline.

848

*/

856

*/

849

if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio))

857

if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio))

850

pi_se = &pi_task->dl;

858

pi_se = &pi_task->dl;

851

859

852

/*

860

/*

853

* If p is throttled, we do nothing. In fact, if it exhausted

861

* If p is throttled, we do nothing. In fact, if it exhausted

854

* its budget it needs a replenishment and, since it now is on

862

* its budget it needs a replenishment and, since it now is on

855

* its rq, the bandwidth timer callback (which clearly has not

863

* its rq, the bandwidth timer callback (which clearly has not

856

* run yet) will take care of this.

864

* run yet) will take care of this.

857

*/

865

*/

858

if (p->dl.dl_throttled)

866

if (p->dl.dl_throttled)

859

return;

867

return;

860

868

861

enqueue_dl_entity(&p->dl, pi_se, flags);

869

enqueue_dl_entity(&p->dl, pi_se, flags);

862

870

863

if (!task_current(rq, p) && p->nr_cpus_allowed > 1)

871

if (!task_current(rq, p) && p->nr_cpus_allowed > 1)

864

enqueue_pushable_dl_task(rq, p);

872

enqueue_pushable_dl_task(rq, p);

865

}

873

}

866

874

867

static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)

875

static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)

868

{

876

{

869

dequeue_dl_entity(&p->dl);

877

dequeue_dl_entity(&p->dl);

870

dequeue_pushable_dl_task(rq, p);

878

dequeue_pushable_dl_task(rq, p);

871

}

879

}

872

880

873

static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)

881

static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)

874

{

882

{

875

update_curr_dl(rq);

883

update_curr_dl(rq);

876

__dequeue_task_dl(rq, p, flags);

884

__dequeue_task_dl(rq, p, flags);

877

}

885

}

878

886

879

/*

887

/*

880

* Yield task semantic for -deadline tasks is:

888

* Yield task semantic for -deadline tasks is:

881

*

889

*

882

* get off from the CPU until our next instance, with

890

* get off from the CPU until our next instance, with

883

* a new runtime. This is of little use now, since we

891

* a new runtime. This is of little use now, since we

884

* don't have a bandwidth reclaiming mechanism. Anyway,

892

* don't have a bandwidth reclaiming mechanism. Anyway,

885

* bandwidth reclaiming is planned for the future, and

893

* bandwidth reclaiming is planned for the future, and

886

* yield_task_dl will indicate that some spare budget

894

* yield_task_dl will indicate that some spare budget

887

* is available for other task instances to use it.

895

* is available for other task instances to use it.

888

*/

896

*/

889

static void yield_task_dl(struct rq *rq)

897

static void yield_task_dl(struct rq *rq)

890

{

898

{

891

struct task_struct *p = rq->curr;

899

struct task_struct *p = rq->curr;

892

900

893

/*

901

/*

894

* We make the task go to sleep until its current deadline by

902

* We make the task go to sleep until its current deadline by

895

* forcing its runtime to zero. This way, update_curr_dl() stops

903

* forcing its runtime to zero. This way, update_curr_dl() stops

896

* it and the bandwidth timer will wake it up and will give it

904

* it and the bandwidth timer will wake it up and will give it

897

* new scheduling parameters (thanks to dl_yielded=1).

905

* new scheduling parameters (thanks to dl_yielded=1).

898

*/

906

*/

899

if (p->dl.runtime > 0) {

907

if (p->dl.runtime > 0) {

900

rq->curr->dl.dl_yielded = 1;

908

rq->curr->dl.dl_yielded = 1;

901

p->dl.runtime = 0;

909

p->dl.runtime = 0;

902

}

910

}

903

update_curr_dl(rq);

911

update_curr_dl(rq);

904

}

912

}

905

913

906

#ifdef CONFIG_SMP

914

#ifdef CONFIG_SMP

907

915

908

static int find_later_rq(struct task_struct *task);

916

static int find_later_rq(struct task_struct *task);

909

917

910

static int

918

static int

911

select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)

919

select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)

912

{

920

{

913

struct task_struct *curr;

921

struct task_struct *curr;

914

struct rq *rq;

922

struct rq *rq;

915

923

916

if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)

924

if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)

917

goto out;

925

goto out;

918

926

919

rq = cpu_rq(cpu);

927

rq = cpu_rq(cpu);

920

928

921

rcu_read_lock();

929

rcu_read_lock();

922

curr = ACCESS_ONCE(rq->curr); /* unlocked access */

930

curr = ACCESS_ONCE(rq->curr); /* unlocked access */

923

931

924

/*

932

/*

925

* If we are dealing with a -deadline task, we must

933

* If we are dealing with a -deadline task, we must

926

* decide where to wake it up.

934

* decide where to wake it up.

927

* If it has a later deadline and the current task

935

* If it has a later deadline and the current task

928

* on this rq can't move (provided the waking task

936

* on this rq can't move (provided the waking task

929

* can!) we prefer to send it somewhere else. On the

937

* can!) we prefer to send it somewhere else. On the

930

* other hand, if it has a shorter deadline, we

938

* other hand, if it has a shorter deadline, we

931

* try to make it stay here, it might be important.

939

* try to make it stay here, it might be important.

932

*/

940

*/

933

if (unlikely(dl_task(curr)) &&

941

if (unlikely(dl_task(curr)) &&

934

(curr->nr_cpus_allowed < 2 ||

942

(curr->nr_cpus_allowed < 2 ||

935

!dl_entity_preempt(&p->dl, &curr->dl)) &&

943

!dl_entity_preempt(&p->dl, &curr->dl)) &&

936

(p->nr_cpus_allowed > 1)) {

944

(p->nr_cpus_allowed > 1)) {

937

int target = find_later_rq(p);

945

int target = find_later_rq(p);

938

946

939

if (target != -1)

947

if (target != -1)

940

cpu = target;

948

cpu = target;

941

}

949

}

942

rcu_read_unlock();

950

rcu_read_unlock();

943

951

944

out:

952

out:

945

return cpu;

953

return cpu;

946

}

954

}

947

955

948

static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)

956

static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)

949

{

957

{

950

/*

958

/*

951

* Current can't be migrated, useless to reschedule,

959

* Current can't be migrated, useless to reschedule,

952

* let's hope p can move out.

960

* let's hope p can move out.

953

*/

961

*/

954

if (rq->curr->nr_cpus_allowed == 1 ||

962

if (rq->curr->nr_cpus_allowed == 1 ||

955

cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)

963

cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)

956

return;

964

return;

957

965

958

/*

966

/*

959

* p is migratable, so let's not schedule it and

967

* p is migratable, so let's not schedule it and

960

* see if it is pushed or pulled somewhere else.

968

* see if it is pushed or pulled somewhere else.

961

*/

969

*/

962

if (p->nr_cpus_allowed != 1 &&

970

if (p->nr_cpus_allowed != 1 &&

963

cpudl_find(&rq->rd->cpudl, p, NULL) != -1)

971

cpudl_find(&rq->rd->cpudl, p, NULL) != -1)

964

return;

972

return;

965

973

966

resched_task(rq->curr);

974

resched_task(rq->curr);

967

}

975

}

968

976

969

static int pull_dl_task(struct rq *this_rq);

977

static int pull_dl_task(struct rq *this_rq);

970

978

971

#endif /* CONFIG_SMP */

979

#endif /* CONFIG_SMP */

972

980

973

/*

981

/*

974

* Only called when both the current and waking task are -deadline

982

* Only called when both the current and waking task are -deadline

975

* tasks.

983

* tasks.

976

*/

984

*/

977

static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,

985

static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,

978

int flags)

986

int flags)

979

{

987

{

980

if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {

988

if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {

981

resched_task(rq->curr);

989

resched_task(rq->curr);

982

return;

990

return;

983

}

991

}

984

992

985

#ifdef CONFIG_SMP

993

#ifdef CONFIG_SMP

986

/*

994

/*

987

* In the unlikely case current and p have the same deadline

995

* In the unlikely case current and p have the same deadline

988

* let us try to decide what's the best thing to do...

996

* let us try to decide what's the best thing to do...

989

*/

997

*/

990

if ((p->dl.deadline == rq->curr->dl.deadline) &&

998

if ((p->dl.deadline == rq->curr->dl.deadline) &&

991

!test_tsk_need_resched(rq->curr))

999

!test_tsk_need_resched(rq->curr))

992

check_preempt_equal_dl(rq, p);

1000

check_preempt_equal_dl(rq, p);

993

#endif /* CONFIG_SMP */

1001

#endif /* CONFIG_SMP */

994

}

1002

}

995

1003

996

#ifdef CONFIG_SCHED_HRTICK

1004

#ifdef CONFIG_SCHED_HRTICK

997

static void start_hrtick_dl(struct rq *rq, struct task_struct *p)

1005

static void start_hrtick_dl(struct rq *rq, struct task_struct *p)

998

{

1006

{

999

s64 delta = p->dl.dl_runtime - p->dl.runtime;

1007

s64 delta = p->dl.dl_runtime - p->dl.runtime;

1000

1008

1001

if (delta > 10000)

1009

if (delta > 10000)

1002

hrtick_start(rq, p->dl.runtime);

1010

hrtick_start(rq, p->dl.runtime);

1003

}

1011

}

1004

#endif

1012

#endif

1005

1013

1006

static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,

1014

static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,

1007

struct dl_rq *dl_rq)

1015

struct dl_rq *dl_rq)

1008

{

1016

{

1009

struct rb_node *left = dl_rq->rb_leftmost;

1017

struct rb_node *left = dl_rq->rb_leftmost;

1010

1018

1011

if (!left)

1019

if (!left)

1012

return NULL;

1020

return NULL;

1013

1021

1014

return rb_entry(left, struct sched_dl_entity, rb_node);

1022

return rb_entry(left, struct sched_dl_entity, rb_node);

1015

}

1023

}

1016

1024

1017

struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)

1025

struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)

1018

{

1026

{

1019

struct sched_dl_entity *dl_se;

1027

struct sched_dl_entity *dl_se;

1020

struct task_struct *p;

1028

struct task_struct *p;

1021

struct dl_rq *dl_rq;

1029

struct dl_rq *dl_rq;

1022

1030

1023

dl_rq = &rq->dl;

1031

dl_rq = &rq->dl;

1024

1032

1025

if (need_pull_dl_task(rq, prev)) {

1033

if (need_pull_dl_task(rq, prev)) {

1026

pull_dl_task(rq);

1034

pull_dl_task(rq);

1027

/*

1035

/*

1028

* pull_rt_task() can drop (and re-acquire) rq->lock; this

1036

* pull_rt_task() can drop (and re-acquire) rq->lock; this

1029

* means a stop task can slip in, in which case we need to

1037

* means a stop task can slip in, in which case we need to

1030

* re-start task selection.

1038

* re-start task selection.

1031

*/

1039

*/

1032

if (rq->stop && rq->stop->on_rq)

1040

if (rq->stop && rq->stop->on_rq)

1033

return RETRY_TASK;

1041

return RETRY_TASK;

1034

}

1042

}

1035

1043

1036

/*

1044

/*

1037

* When prev is DL, we may throttle it in put_prev_task().

1045

* When prev is DL, we may throttle it in put_prev_task().

1038

* So, we update time before we check for dl_nr_running.

1046

* So, we update time before we check for dl_nr_running.

1039

*/

1047

*/

1040

if (prev->sched_class == &dl_sched_class)

1048

if (prev->sched_class == &dl_sched_class)

1041

update_curr_dl(rq);

1049

update_curr_dl(rq);

1042

1050

1043

if (unlikely(!dl_rq->dl_nr_running))

1051

if (unlikely(!dl_rq->dl_nr_running))

1044

return NULL;

1052

return NULL;

1045

1053

1046

put_prev_task(rq, prev);

1054

put_prev_task(rq, prev);

1047

1055

1048

dl_se = pick_next_dl_entity(rq, dl_rq);

1056

dl_se = pick_next_dl_entity(rq, dl_rq);

1049

BUG_ON(!dl_se);

1057

BUG_ON(!dl_se);

1050

1058

1051

p = dl_task_of(dl_se);

1059

p = dl_task_of(dl_se);

1052

p->se.exec_start = rq_clock_task(rq);

1060

p->se.exec_start = rq_clock_task(rq);

1053

1061

1054

/* Running task will never be pushed. */

1062

/* Running task will never be pushed. */

1055

dequeue_pushable_dl_task(rq, p);

1063

dequeue_pushable_dl_task(rq, p);

1056

1064

1057

#ifdef CONFIG_SCHED_HRTICK

1065

#ifdef CONFIG_SCHED_HRTICK

1058

if (hrtick_enabled(rq))

1066

if (hrtick_enabled(rq))

1059

start_hrtick_dl(rq, p);

1067

start_hrtick_dl(rq, p);

1060

#endif

1068

#endif

1061

1069

1062

set_post_schedule(rq);

1070

set_post_schedule(rq);

1063

1071

1064

return p;

1072

return p;

1065

}

1073

}

1066

1074

1067

static void put_prev_task_dl(struct rq *rq, struct task_struct *p)

1075

static void put_prev_task_dl(struct rq *rq, struct task_struct *p)

1068

{

1076

{

1069

update_curr_dl(rq);

1077

update_curr_dl(rq);

1070

1078

1071

if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)

1079

if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)

1072

enqueue_pushable_dl_task(rq, p);

1080

enqueue_pushable_dl_task(rq, p);

1073

}

1081

}

1074

1082

1075

static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)

1083

static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)

1076

{

1084

{

1077

update_curr_dl(rq);

1085

update_curr_dl(rq);

1078

1086

1079

#ifdef CONFIG_SCHED_HRTICK

1087

#ifdef CONFIG_SCHED_HRTICK

1080

if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)

1088

if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)

1081

start_hrtick_dl(rq, p);

1089

start_hrtick_dl(rq, p);

1082

#endif

1090

#endif

1083

}

1091

}

1084

1092

1085

static void task_fork_dl(struct task_struct *p)

1093

static void task_fork_dl(struct task_struct *p)

1086

{

1094

{

1087

/*

1095

/*

1088

* SCHED_DEADLINE tasks cannot fork and this is achieved through

1096

* SCHED_DEADLINE tasks cannot fork and this is achieved through

1089

* sched_fork()

1097

* sched_fork()

1090

*/

1098

*/

1091

}

1099

}

1092

1100

1093

static void task_dead_dl(struct task_struct *p)

1101

static void task_dead_dl(struct task_struct *p)

1094

{

1102

{

1095

struct hrtimer *timer = &p->dl.dl_timer;

1103

struct hrtimer *timer = &p->dl.dl_timer;

1096

struct dl_bw *dl_b = dl_bw_of(task_cpu(p));

1104

struct dl_bw *dl_b = dl_bw_of(task_cpu(p));

1097

1105

1098

/*

1106

/*

1099

* Since we are TASK_DEAD we won't slip out of the domain!

1107

* Since we are TASK_DEAD we won't slip out of the domain!

1100

*/

1108

*/

1101

raw_spin_lock_irq(&dl_b->lock);

1109

raw_spin_lock_irq(&dl_b->lock);

1102

dl_b->total_bw -= p->dl.dl_bw;

1110

dl_b->total_bw -= p->dl.dl_bw;

1103

raw_spin_unlock_irq(&dl_b->lock);

1111

raw_spin_unlock_irq(&dl_b->lock);

1104

1112

1105

hrtimer_cancel(timer);

1113

hrtimer_cancel(timer);

1106

}

1114

}

1107

1115

1108

static void set_curr_task_dl(struct rq *rq)

1116

static void set_curr_task_dl(struct rq *rq)

1109

{

1117

{

1110

struct task_struct *p = rq->curr;

1118

struct task_struct *p = rq->curr;

1111

1119

1112

p->se.exec_start = rq_clock_task(rq);

1120

p->se.exec_start = rq_clock_task(rq);

1113

1121

1114

/* You can't push away the running task */

1122

/* You can't push away the running task */

1115

dequeue_pushable_dl_task(rq, p);

1123

dequeue_pushable_dl_task(rq, p);

1116

}

1124

}

1117

1125

1118

#ifdef CONFIG_SMP

1126

#ifdef CONFIG_SMP

1119

1127

1120

/* Only try algorithms three times */

1128

/* Only try algorithms three times */

1121

#define DL_MAX_TRIES 3

1129

#define DL_MAX_TRIES 3

1122

1130

1123

static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)

1131

static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)

1124

{

1132

{

1125

if (!task_running(rq, p) &&

1133

if (!task_running(rq, p) &&

1126

(cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&

1134

(cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&

1127

(p->nr_cpus_allowed > 1))

1135

(p->nr_cpus_allowed > 1))

1128

return 1;

1136

return 1;

1129

1137

1130

return 0;

1138

return 0;

1131

}

1139

}

1132

1140

1133

/* Returns the second earliest -deadline task, NULL otherwise */

1141

/* Returns the second earliest -deadline task, NULL otherwise */

1134

static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)

1142

static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)

1135

{

1143

{

1136

struct rb_node *next_node = rq->dl.rb_leftmost;

1144

struct rb_node *next_node = rq->dl.rb_leftmost;

1137

struct sched_dl_entity *dl_se;

1145

struct sched_dl_entity *dl_se;

1138

struct task_struct *p = NULL;

1146

struct task_struct *p = NULL;

1139

1147

1140

next_node:

1148

next_node:

1141

next_node = rb_next(next_node);

1149

next_node = rb_next(next_node);

1142

if (next_node) {

1150

if (next_node) {

1143

dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);

1151

dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);

1144

p = dl_task_of(dl_se);

1152

p = dl_task_of(dl_se);

1145

1153

1146

if (pick_dl_task(rq, p, cpu))

1154

if (pick_dl_task(rq, p, cpu))

1147

return p;

1155

return p;

1148

1156

1149

goto next_node;

1157

goto next_node;

1150

}

1158

}

1151

1159

1152

return NULL;

1160

return NULL;

1153

}

1161

}

1154

1162

1155

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);

1163

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);

1156

1164

1157

static int find_later_rq(struct task_struct *task)

1165

static int find_later_rq(struct task_struct *task)

1158

{

1166

{

1159

struct sched_domain *sd;

1167

struct sched_domain *sd;

1160

struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);

1168

struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);

1161

int this_cpu = smp_processor_id();

1169

int this_cpu = smp_processor_id();

1162

int best_cpu, cpu = task_cpu(task);

1170

int best_cpu, cpu = task_cpu(task);

1163

1171

1164

/* Make sure the mask is initialized first */

1172

/* Make sure the mask is initialized first */

1165

if (unlikely(!later_mask))

1173

if (unlikely(!later_mask))

1166

return -1;

1174

return -1;

1167

1175

1168

if (task->nr_cpus_allowed == 1)

1176

if (task->nr_cpus_allowed == 1)

1169

return -1;

1177

return -1;

1170

1178

1171

best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,

1179

best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,

1172

task, later_mask);

1180

task, later_mask);

1173

if (best_cpu == -1)

1181

if (best_cpu == -1)

1174

return -1;

1182

return -1;

1175

1183

1176

/*

1184

/*

1177

* If we are here, some target has been found,

1185

* If we are here, some target has been found,

1178

* the most suitable of which is cached in best_cpu.

1186

* the most suitable of which is cached in best_cpu.

1179

* This is, among the runqueues where the current tasks

1187

* This is, among the runqueues where the current tasks

1180

* have later deadlines than the task's one, the rq

1188

* have later deadlines than the task's one, the rq

1181

* with the latest possible one.

1189

* with the latest possible one.

1182

*

1190

*

1183

* Now we check how well this matches with task's

1191

* Now we check how well this matches with task's

1184

* affinity and system topology.

1192

* affinity and system topology.

1185

*

1193

*

1186

* The last cpu where the task run is our first

1194

* The last cpu where the task run is our first

1187

* guess, since it is most likely cache-hot there.

1195

* guess, since it is most likely cache-hot there.

1188

*/

1196

*/

1189

if (cpumask_test_cpu(cpu, later_mask))

1197

if (cpumask_test_cpu(cpu, later_mask))

1190

return cpu;

1198

return cpu;

1191

/*

1199

/*

1192

* Check if this_cpu is to be skipped (i.e., it is

1200

* Check if this_cpu is to be skipped (i.e., it is

1193

* not in the mask) or not.

1201

* not in the mask) or not.

1194

*/

1202

*/

1195

if (!cpumask_test_cpu(this_cpu, later_mask))

1203

if (!cpumask_test_cpu(this_cpu, later_mask))

1196

this_cpu = -1;

1204

this_cpu = -1;

1197

1205

1198

rcu_read_lock();

1206

rcu_read_lock();

1199

for_each_domain(cpu, sd) {

1207

for_each_domain(cpu, sd) {

1200

if (sd->flags & SD_WAKE_AFFINE) {

1208

if (sd->flags & SD_WAKE_AFFINE) {

1201

1209

1202

/*

1210

/*

1203

* If possible, preempting this_cpu is

1211

* If possible, preempting this_cpu is

1204

* cheaper than migrating.

1212

* cheaper than migrating.

1205

*/

1213

*/

1206

if (this_cpu != -1 &&

1214

if (this_cpu != -1 &&

1207

cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {

1215

cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {

1208

rcu_read_unlock();

1216

rcu_read_unlock();

1209

return this_cpu;

1217

return this_cpu;

1210

}

1218

}

1211

1219

1212

/*

1220

/*

1213

* Last chance: if best_cpu is valid and is

1221

* Last chance: if best_cpu is valid and is

1214

* in the mask, that becomes our choice.

1222

* in the mask, that becomes our choice.

1215

*/

1223

*/

1216

if (best_cpu < nr_cpu_ids &&

1224

if (best_cpu < nr_cpu_ids &&

1217

cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {

1225

cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {

1218

rcu_read_unlock();

1226

rcu_read_unlock();

1219

return best_cpu;

1227

return best_cpu;

1220

}

1228

}

1221

}

1229

}

1222

}

1230

}

1223

rcu_read_unlock();

1231

rcu_read_unlock();

1224

1232

1225

/*

1233

/*

1226

* At this point, all our guesses failed, we just return

1234

* At this point, all our guesses failed, we just return

1227

* 'something', and let the caller sort the things out.

1235

* 'something', and let the caller sort the things out.

1228

*/

1236

*/

1229

if (this_cpu != -1)

1237

if (this_cpu != -1)

1230

return this_cpu;

1238

return this_cpu;

1231

1239

1232

cpu = cpumask_any(later_mask);

1240

cpu = cpumask_any(later_mask);

1233

if (cpu < nr_cpu_ids)

1241

if (cpu < nr_cpu_ids)

1234

return cpu;

1242

return cpu;

1235

1243

1236

return -1;

1244

return -1;

1237

}

1245

}

1238

1246

1239

/* Locks the rq it finds */

1247

/* Locks the rq it finds */

1240

static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)

1248

static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)

1241

{

1249

{

1242

struct rq *later_rq = NULL;

1250

struct rq *later_rq = NULL;

1243

int tries;

1251

int tries;

1244

int cpu;

1252

int cpu;

1245

1253

1246

for (tries = 0; tries < DL_MAX_TRIES; tries++) {

1254

for (tries = 0; tries < DL_MAX_TRIES; tries++) {

1247

cpu = find_later_rq(task);

1255

cpu = find_later_rq(task);

1248

1256

1249

if ((cpu == -1) || (cpu == rq->cpu))

1257

if ((cpu == -1) || (cpu == rq->cpu))

1250

break;

1258

break;

1251

1259

1252

later_rq = cpu_rq(cpu);

1260

later_rq = cpu_rq(cpu);

1253

1261

1254

/* Retry if something changed. */

1262

/* Retry if something changed. */

1255

if (double_lock_balance(rq, later_rq)) {

1263

if (double_lock_balance(rq, later_rq)) {

1256

if (unlikely(task_rq(task) != rq ||

1264

if (unlikely(task_rq(task) != rq ||

1257

!cpumask_test_cpu(later_rq->cpu,

1265

!cpumask_test_cpu(later_rq->cpu,

1258

&task->cpus_allowed) ||

1266

&task->cpus_allowed) ||

1259

task_running(rq, task) || !task->on_rq)) {

1267

task_running(rq, task) || !task->on_rq)) {

1260

double_unlock_balance(rq, later_rq);

1268

double_unlock_balance(rq, later_rq);

1261

later_rq = NULL;

1269

later_rq = NULL;

1262

break;

1270

break;

1263

}

1271

}

1264

}

1272

}

1265

1273

1266

/*

1274

/*

1267

* If the rq we found has no -deadline task, or

1275

* If the rq we found has no -deadline task, or

1268

* its earliest one has a later deadline than our

1276

* its earliest one has a later deadline than our

1269

* task, the rq is a good one.

1277

* task, the rq is a good one.

1270

*/

1278

*/

1271

if (!later_rq->dl.dl_nr_running ||

1279

if (!later_rq->dl.dl_nr_running ||

1272

dl_time_before(task->dl.deadline,

1280

dl_time_before(task->dl.deadline,

1273

later_rq->dl.earliest_dl.curr))

1281

later_rq->dl.earliest_dl.curr))

1274

break;

1282

break;

1275

1283

1276

/* Otherwise we try again. */

1284

/* Otherwise we try again. */

1277

double_unlock_balance(rq, later_rq);

1285

double_unlock_balance(rq, later_rq);

1278

later_rq = NULL;

1286

later_rq = NULL;

1279

}

1287

}

1280

1288

1281

return later_rq;

1289

return later_rq;

1282

}

1290

}

1283

1291

1284

static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)

1292

static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)

1285

{

1293

{

1286

struct task_struct *p;

1294

struct task_struct *p;

1287

1295

1288

if (!has_pushable_dl_tasks(rq))

1296

if (!has_pushable_dl_tasks(rq))

1289

return NULL;

1297

return NULL;

1290

1298

1291

p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,

1299

p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,

1292

struct task_struct, pushable_dl_tasks);

1300

struct task_struct, pushable_dl_tasks);

1293

1301

1294

BUG_ON(rq->cpu != task_cpu(p));

1302

BUG_ON(rq->cpu != task_cpu(p));

1295

BUG_ON(task_current(rq, p));

1303

BUG_ON(task_current(rq, p));

1296

BUG_ON(p->nr_cpus_allowed <= 1);

1304

BUG_ON(p->nr_cpus_allowed <= 1);

1297

1305

1298

BUG_ON(!p->on_rq);

1306

BUG_ON(!p->on_rq);

1299

BUG_ON(!dl_task(p));

1307

BUG_ON(!dl_task(p));

1300

1308

1301

return p;

1309

return p;

1302

}

1310

}

1303

1311

1304

/*

1312

/*

1305

* See if the non running -deadline tasks on this rq

1313

* See if the non running -deadline tasks on this rq

1306

* can be sent to some other CPU where they can preempt

1314

* can be sent to some other CPU where they can preempt

1307

* and start executing.

1315

* and start executing.

1308

*/

1316

*/

1309

static int push_dl_task(struct rq *rq)

1317

static int push_dl_task(struct rq *rq)

1310

{

1318

{

1311

struct task_struct *next_task;

1319

struct task_struct *next_task;

1312

struct rq *later_rq;

1320

struct rq *later_rq;

1313

1321

1314

if (!rq->dl.overloaded)

1322

if (!rq->dl.overloaded)

1315

return 0;

1323

return 0;

1316

1324

1317

next_task = pick_next_pushable_dl_task(rq);

1325

next_task = pick_next_pushable_dl_task(rq);

1318

if (!next_task)

1326

if (!next_task)

1319

return 0;

1327

return 0;

1320

1328

1321

retry:

1329

retry:

1322

if (unlikely(next_task == rq->curr)) {

1330

if (unlikely(next_task == rq->curr)) {

1323

WARN_ON(1);

1331

WARN_ON(1);

1324

return 0;

1332

return 0;

1325

}

1333

}

1326

1334

1327

/*

1335

/*

1328

* If next_task preempts rq->curr, and rq->curr

1336

* If next_task preempts rq->curr, and rq->curr

1329

* can move away, it makes sense to just reschedule

1337

* can move away, it makes sense to just reschedule

1330

* without going further in pushing next_task.

1338

* without going further in pushing next_task.

1331

*/

1339

*/

1332

if (dl_task(rq->curr) &&

1340

if (dl_task(rq->curr) &&

1333

dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&

1341

dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&

1334

rq->curr->nr_cpus_allowed > 1) {

1342

rq->curr->nr_cpus_allowed > 1) {

1335

resched_task(rq->curr);

1343

resched_task(rq->curr);

1336

return 0;

1344

return 0;

1337

}

1345

}

1338

1346

1339

/* We might release rq lock */

1347

/* We might release rq lock */

1340

get_task_struct(next_task);

1348

get_task_struct(next_task);

1341

1349

1342

/* Will lock the rq it'll find */

1350

/* Will lock the rq it'll find */

1343

later_rq = find_lock_later_rq(next_task, rq);

1351

later_rq = find_lock_later_rq(next_task, rq);

1344

if (!later_rq) {

1352

if (!later_rq) {

1345

struct task_struct *task;

1353

struct task_struct *task;

1346

1354

1347

/*

1355

/*

1348

* We must check all this again, since

1356

* We must check all this again, since

1349

* find_lock_later_rq releases rq->lock and it is

1357

* find_lock_later_rq releases rq->lock and it is

1350

* then possible that next_task has migrated.

1358

* then possible that next_task has migrated.

1351

*/

1359

*/

1352

task = pick_next_pushable_dl_task(rq);

1360

task = pick_next_pushable_dl_task(rq);

1353

if (task_cpu(next_task) == rq->cpu && task == next_task) {

1361

if (task_cpu(next_task) == rq->cpu && task == next_task) {

1354

/*

1362

/*

1355

* The task is still there. We don't try

1363

* The task is still there. We don't try

1356

* again, some other cpu will pull it when ready.

1364

* again, some other cpu will pull it when ready.

1357

*/

1365

*/

1358

dequeue_pushable_dl_task(rq, next_task);

1366

dequeue_pushable_dl_task(rq, next_task);

1359

goto out;

1367

goto out;

1360

}

1368

}

1361

1369

1362

if (!task)

1370

if (!task)

1363

/* No more tasks */

1371

/* No more tasks */

1364

goto out;

1372

goto out;

1365

1373

1366

put_task_struct(next_task);

1374

put_task_struct(next_task);

1367

next_task = task;

1375

next_task = task;

1368

goto retry;

1376

goto retry;

1369

}

1377

}

1370

1378

1371

deactivate_task(rq, next_task, 0);

1379

deactivate_task(rq, next_task, 0);

1372

set_task_cpu(next_task, later_rq->cpu);

1380

set_task_cpu(next_task, later_rq->cpu);

1373

activate_task(later_rq, next_task, 0);

1381

activate_task(later_rq, next_task, 0);

1374

1382

1375

resched_task(later_rq->curr);

1383

resched_task(later_rq->curr);

1376

1384

1377

double_unlock_balance(rq, later_rq);

1385

double_unlock_balance(rq, later_rq);

1378

1386

1379

out:

1387

out:

1380

put_task_struct(next_task);

1388

put_task_struct(next_task);

1381

1389

1382

return 1;

1390

return 1;

1383

}

1391

}

1384

1392

1385

static void push_dl_tasks(struct rq *rq)

1393

static void push_dl_tasks(struct rq *rq)

1386

{

1394

{

1387

/* Terminates as it moves a -deadline task */

1395

/* Terminates as it moves a -deadline task */

1388

while (push_dl_task(rq))

1396

while (push_dl_task(rq))

1389

;

1397

;

1390

}

1398

}

1391

1399

1392

static int pull_dl_task(struct rq *this_rq)

1400

static int pull_dl_task(struct rq *this_rq)

1393

{

1401

{

1394

int this_cpu = this_rq->cpu, ret = 0, cpu;

1402

int this_cpu = this_rq->cpu, ret = 0, cpu;

1395

struct task_struct *p;

1403

struct task_struct *p;

1396

struct rq *src_rq;

1404

struct rq *src_rq;

1397

u64 dmin = LONG_MAX;

1405

u64 dmin = LONG_MAX;

1398

1406

1399

if (likely(!dl_overloaded(this_rq)))

1407

if (likely(!dl_overloaded(this_rq)))

1400

return 0;

1408

return 0;

1401

1409

1402

/*

1410

/*

1403

* Match the barrier from dl_set_overloaded; this guarantees that if we

1411

* Match the barrier from dl_set_overloaded; this guarantees that if we

1404

* see overloaded we must also see the dlo_mask bit.

1412

* see overloaded we must also see the dlo_mask bit.

1405

*/

1413

*/

1406

smp_rmb();

1414

smp_rmb();

1407

1415

1408

for_each_cpu(cpu, this_rq->rd->dlo_mask) {

1416

for_each_cpu(cpu, this_rq->rd->dlo_mask) {

1409

if (this_cpu == cpu)

1417

if (this_cpu == cpu)

1410

continue;

1418

continue;

1411

1419

1412

src_rq = cpu_rq(cpu);

1420

src_rq = cpu_rq(cpu);

1413

1421

1414

/*

1422

/*

1415

* It looks racy, abd it is! However, as in sched_rt.c,

1423

* It looks racy, abd it is! However, as in sched_rt.c,

1416

* we are fine with this.

1424

* we are fine with this.

1417

*/

1425

*/

1418

if (this_rq->dl.dl_nr_running &&

1426

if (this_rq->dl.dl_nr_running &&

1419

dl_time_before(this_rq->dl.earliest_dl.curr,

1427

dl_time_before(this_rq->dl.earliest_dl.curr,

1420

src_rq->dl.earliest_dl.next))

1428

src_rq->dl.earliest_dl.next))

1421

continue;

1429

continue;

1422

1430

1423

/* Might drop this_rq->lock */

1431

/* Might drop this_rq->lock */

1424

double_lock_balance(this_rq, src_rq);

1432

double_lock_balance(this_rq, src_rq);

1425

1433

1426

/*

1434

/*

1427

* If there are no more pullable tasks on the

1435

* If there are no more pullable tasks on the

1428

* rq, we're done with it.

1436

* rq, we're done with it.

1429

*/

1437

*/

1430

if (src_rq->dl.dl_nr_running <= 1)

1438

if (src_rq->dl.dl_nr_running <= 1)

1431

goto skip;

1439

goto skip;

1432

1440

1433

p = pick_next_earliest_dl_task(src_rq, this_cpu);

1441

p = pick_next_earliest_dl_task(src_rq, this_cpu);

1434

1442

1435

/*

1443

/*

1436

* We found a task to be pulled if:

1444

* We found a task to be pulled if:

1437

* - it preempts our current (if there's one),

1445

* - it preempts our current (if there's one),

1438

* - it will preempt the last one we pulled (if any).

1446

* - it will preempt the last one we pulled (if any).

1439

*/

1447

*/

1440

if (p && dl_time_before(p->dl.deadline, dmin) &&

1448

if (p && dl_time_before(p->dl.deadline, dmin) &&

1441

(!this_rq->dl.dl_nr_running ||

1449

(!this_rq->dl.dl_nr_running ||

1442

dl_time_before(p->dl.deadline,

1450

dl_time_before(p->dl.deadline,

1443

this_rq->dl.earliest_dl.curr))) {

1451

this_rq->dl.earliest_dl.curr))) {

1444

WARN_ON(p == src_rq->curr);

1452

WARN_ON(p == src_rq->curr);

1445

WARN_ON(!p->on_rq);

1453

WARN_ON(!p->on_rq);

1446

1454

1447

/*

1455

/*

1448

* Then we pull iff p has actually an earlier

1456

* Then we pull iff p has actually an earlier

1449

* deadline than the current task of its runqueue.

1457

* deadline than the current task of its runqueue.

1450

*/

1458

*/

1451

if (dl_time_before(p->dl.deadline,

1459

if (dl_time_before(p->dl.deadline,

1452

src_rq->curr->dl.deadline))

1460

src_rq->curr->dl.deadline))

1453

goto skip;

1461

goto skip;

1454

1462

1455

ret = 1;

1463

ret = 1;

1456

1464

1457

deactivate_task(src_rq, p, 0);

1465

deactivate_task(src_rq, p, 0);

1458

set_task_cpu(p, this_cpu);

1466

set_task_cpu(p, this_cpu);

1459

activate_task(this_rq, p, 0);

1467

activate_task(this_rq, p, 0);

1460

dmin = p->dl.deadline;

1468

dmin = p->dl.deadline;

1461

1469

1462

/* Is there any other task even earlier? */

1470

/* Is there any other task even earlier? */

1463

}

1471

}

1464

skip:

1472

skip:

1465

double_unlock_balance(this_rq, src_rq);

1473

double_unlock_balance(this_rq, src_rq);

1466

}

1474

}

1467

1475

1468

return ret;

1476

return ret;

1469

}

1477

}

1470

1478

1471

static void post_schedule_dl(struct rq *rq)

1479

static void post_schedule_dl(struct rq *rq)

1472

{

1480

{

1473

push_dl_tasks(rq);

1481

push_dl_tasks(rq);

1474

}

1482

}

1475

1483

1476

/*

1484

/*

1477

* Since the task is not running and a reschedule is not going to happen

1485

* Since the task is not running and a reschedule is not going to happen

1478

* anytime soon on its runqueue, we try pushing it away now.

1486

* anytime soon on its runqueue, we try pushing it away now.

1479

*/

1487

*/

1480

static void task_woken_dl(struct rq *rq, struct task_struct *p)

1488

static void task_woken_dl(struct rq *rq, struct task_struct *p)

1481

{

1489

{

1482

if (!task_running(rq, p) &&

1490

if (!task_running(rq, p) &&

1483

!test_tsk_need_resched(rq->curr) &&

1491

!test_tsk_need_resched(rq->curr) &&

1484

has_pushable_dl_tasks(rq) &&

1492

has_pushable_dl_tasks(rq) &&

1485

p->nr_cpus_allowed > 1 &&

1493

p->nr_cpus_allowed > 1 &&

1486

dl_task(rq->curr) &&

1494

dl_task(rq->curr) &&

1487

(rq->curr->nr_cpus_allowed < 2 ||

1495

(rq->curr->nr_cpus_allowed < 2 ||

1488

dl_entity_preempt(&rq->curr->dl, &p->dl))) {

1496

dl_entity_preempt(&rq->curr->dl, &p->dl))) {

1489

push_dl_tasks(rq);

1497

push_dl_tasks(rq);

1490

}

1498

}

1491

}

1499

}

1492

1500

1493

static void set_cpus_allowed_dl(struct task_struct *p,

1501

static void set_cpus_allowed_dl(struct task_struct *p,

1494

const struct cpumask *new_mask)

1502

const struct cpumask *new_mask)

1495

{

1503

{

1496

struct rq *rq;

1504

struct rq *rq;

1497

int weight;

1505

int weight;

1498

1506

1499

BUG_ON(!dl_task(p));

1507

BUG_ON(!dl_task(p));

1500

1508

1501

/*

1509

/*

1502

* Update only if the task is actually running (i.e.,

1510

* Update only if the task is actually running (i.e.,

1503

* it is on the rq AND it is not throttled).

1511

* it is on the rq AND it is not throttled).

1504

*/

1512

*/

1505

if (!on_dl_rq(&p->dl))

1513

if (!on_dl_rq(&p->dl))

1506

return;

1514

return;

1507

1515

1508

weight = cpumask_weight(new_mask);

1516

weight = cpumask_weight(new_mask);

1509

1517

1510

/*

1518

/*

1511

* Only update if the process changes its state from whether it

1519

* Only update if the process changes its state from whether it

1512

* can migrate or not.

1520

* can migrate or not.

1513

*/

1521

*/

1514

if ((p->nr_cpus_allowed > 1) == (weight > 1))

1522

if ((p->nr_cpus_allowed > 1) == (weight > 1))

1515

return;

1523

return;

1516

1524

1517

rq = task_rq(p);

1525

rq = task_rq(p);

1518

1526

1519

/*

1527

/*

1520

* The process used to be able to migrate OR it can now migrate

1528

* The process used to be able to migrate OR it can now migrate

1521

*/

1529

*/

1522

if (weight <= 1) {

1530

if (weight <= 1) {

1523

if (!task_current(rq, p))

1531

if (!task_current(rq, p))

1524

dequeue_pushable_dl_task(rq, p);

1532

dequeue_pushable_dl_task(rq, p);

1525

BUG_ON(!rq->dl.dl_nr_migratory);

1533

BUG_ON(!rq->dl.dl_nr_migratory);

1526

rq->dl.dl_nr_migratory--;

1534

rq->dl.dl_nr_migratory--;

1527

} else {

1535

} else {

1528

if (!task_current(rq, p))

1536

if (!task_current(rq, p))

1529

enqueue_pushable_dl_task(rq, p);

1537

enqueue_pushable_dl_task(rq, p);

1530

rq->dl.dl_nr_migratory++;

1538

rq->dl.dl_nr_migratory++;

1531

}

1539

}

1532

1540

1533

update_dl_migration(&rq->dl);

1541

update_dl_migration(&rq->dl);

1534

}

1542

}

1535

1543

1536

/* Assumes rq->lock is held */

1544

/* Assumes rq->lock is held */

1537

static void rq_online_dl(struct rq *rq)

1545

static void rq_online_dl(struct rq *rq)

1538

{

1546

{

1539

if (rq->dl.overloaded)

1547

if (rq->dl.overloaded)

1540

dl_set_overload(rq);

1548

dl_set_overload(rq);

1541

1549

1542

if (rq->dl.dl_nr_running > 0)

1550

if (rq->dl.dl_nr_running > 0)

1543

cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);

1551

cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);

1544

}

1552

}

1545

1553

1546

/* Assumes rq->lock is held */

1554

/* Assumes rq->lock is held */

1547

static void rq_offline_dl(struct rq *rq)

1555

static void rq_offline_dl(struct rq *rq)

1548

{

1556

{

1549

if (rq->dl.overloaded)

1557

if (rq->dl.overloaded)

1550

dl_clear_overload(rq);

1558

dl_clear_overload(rq);

1551

1559

1552

cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);

1560

cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);

1553

}

1561

}

1554

1562

1555

void init_sched_dl_class(void)

1563

void init_sched_dl_class(void)

1556

{

1564

{

1557

unsigned int i;

1565

unsigned int i;

1558

1566

1559

for_each_possible_cpu(i)

1567

for_each_possible_cpu(i)

1560

zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),

1568

zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),

1561

GFP_KERNEL, cpu_to_node(i));

1569

GFP_KERNEL, cpu_to_node(i));

1562

}

1570

}

1563

1571

1564

#endif /* CONFIG_SMP */

1572

#endif /* CONFIG_SMP */

1565

1573

1566

static void switched_from_dl(struct rq *rq, struct task_struct *p)

1574

static void switched_from_dl(struct rq *rq, struct task_struct *p)

1567

{

1575

{

1568

if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))

1576

if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))

1569

hrtimer_try_to_cancel(&p->dl.dl_timer);

1577

hrtimer_try_to_cancel(&p->dl.dl_timer);

1570

1578

1571

#ifdef CONFIG_SMP

1579

#ifdef CONFIG_SMP

1572

/*

1580

/*

1573

* Since this might be the only -deadline task on the rq,

1581

* Since this might be the only -deadline task on the rq,

1574

* this is the right place to try to pull some other one

1582

* this is the right place to try to pull some other one

1575

* from an overloaded cpu, if any.

1583

* from an overloaded cpu, if any.

1576

*/

1584

*/

1577

if (!rq->dl.dl_nr_running)

1585

if (!rq->dl.dl_nr_running)

1578

pull_dl_task(rq);

1586

pull_dl_task(rq);

1579

#endif

1587

#endif

1580

}

1588

}

1581

1589

1582

/*

1590

/*

1583

* When switching to -deadline, we may overload the rq, then

1591

* When switching to -deadline, we may overload the rq, then

1584

* we try to push someone off, if possible.

1592

* we try to push someone off, if possible.

1585

*/

1593

*/

1586

static void switched_to_dl(struct rq *rq, struct task_struct *p)

1594

static void switched_to_dl(struct rq *rq, struct task_struct *p)

1587

{

1595

{

1588

int check_resched = 1;

1596

int check_resched = 1;

1589

1597

1590

/*

1598

/*

1591

* If p is throttled, don't consider the possibility

1599

* If p is throttled, don't consider the possibility

1592

* of preempting rq->curr, the check will be done right

1600

* of preempting rq->curr, the check will be done right

1593

* after its runtime will get replenished.

1601

* after its runtime will get replenished.

1594

*/

1602

*/

1595

if (unlikely(p->dl.dl_throttled))

1603

if (unlikely(p->dl.dl_throttled))

1596

return;

1604

return;

1597

1605

1598

if (p->on_rq && rq->curr != p) {

1606

if (p->on_rq && rq->curr != p) {

1599

#ifdef CONFIG_SMP

1607

#ifdef CONFIG_SMP

1600

if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p))

1608

if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p))

1601

/* Only reschedule if pushing failed */

1609

/* Only reschedule if pushing failed */

1602

check_resched = 0;

1610

check_resched = 0;

1603

#endif /* CONFIG_SMP */

1611

#endif /* CONFIG_SMP */

1604

if (check_resched && task_has_dl_policy(rq->curr))

1612

if (check_resched && task_has_dl_policy(rq->curr))

1605

check_preempt_curr_dl(rq, p, 0);

1613

check_preempt_curr_dl(rq, p, 0);

1606

}

1614

}

1607

}

1615

}

1608

1616

1609

/*

1617

/*

1610

* If the scheduling parameters of a -deadline task changed,

1618

* If the scheduling parameters of a -deadline task changed,

1611

* a push or pull operation might be needed.

1619

* a push or pull operation might be needed.

1612

*/

1620

*/

1613

static void prio_changed_dl(struct rq *rq, struct task_struct *p,

1621

static void prio_changed_dl(struct rq *rq, struct task_struct *p,

1614

int oldprio)

1622

int oldprio)

1615

{

1623

{

1616

if (p->on_rq || rq->curr == p) {

1624

if (p->on_rq || rq->curr == p) {

1617

#ifdef CONFIG_SMP

1625

#ifdef CONFIG_SMP

1618

/*

1626

/*

1619

* This might be too much, but unfortunately

1627

* This might be too much, but unfortunately

1620

* we don't have the old deadline value, and

1628

* we don't have the old deadline value, and

1621

* we can't argue if the task is increasing

1629

* we can't argue if the task is increasing

1622

* or lowering its prio, so...

1630

* or lowering its prio, so...

1623

*/

1631

*/

1624

if (!rq->dl.overloaded)

1632

if (!rq->dl.overloaded)

1625

pull_dl_task(rq);

1633

pull_dl_task(rq);

1626

1634

1627

/*

1635

/*

1628

* If we now have a earlier deadline task than p,

1636

* If we now have a earlier deadline task than p,

1629

* then reschedule, provided p is still on this

1637

* then reschedule, provided p is still on this

1630

* runqueue.

1638

* runqueue.

1631

*/

1639

*/

1632

if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&

1640

if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&

1633

rq->curr == p)

1641

rq->curr == p)

1634

resched_task(p);

1642

resched_task(p);

1635

#else

1643

#else

1636

/*

1644

/*

1637

* Again, we don't know if p has a earlier

1645

* Again, we don't know if p has a earlier

1638

* or later deadline, so let's blindly set a

1646

* or later deadline, so let's blindly set a

1639

* (maybe not needed) rescheduling point.

1647

* (maybe not needed) rescheduling point.

1640

*/

1648

*/

1641

resched_task(p);

1649

resched_task(p);

1642

#endif /* CONFIG_SMP */

1650

#endif /* CONFIG_SMP */

1643

} else

1651

} else

1644

switched_to_dl(rq, p);

1652

switched_to_dl(rq, p);

1645

}

1653

}

1646

1654

1647

const struct sched_class dl_sched_class = {

1655

const struct sched_class dl_sched_class = {

1648

.next = &rt_sched_class,

1656

.next = &rt_sched_class,

1649

.enqueue_task = enqueue_task_dl,

1657

.enqueue_task = enqueue_task_dl,

1650

.dequeue_task = dequeue_task_dl,

1658

.dequeue_task = dequeue_task_dl,

1651

.yield_task = yield_task_dl,

1659

.yield_task = yield_task_dl,

1652

1660

1653

.check_preempt_curr = check_preempt_curr_dl,

1661

.check_preempt_curr = check_preempt_curr_dl,

1654

1662

1655

.pick_next_task = pick_next_task_dl,

1663

.pick_next_task = pick_next_task_dl,

1656

.put_prev_task = put_prev_task_dl,

1664

.put_prev_task = put_prev_task_dl,

1657

1665

1658

#ifdef CONFIG_SMP

1666

#ifdef CONFIG_SMP

1659

.select_task_rq = select_task_rq_dl,

1667

.select_task_rq = select_task_rq_dl,

1660

.set_cpus_allowed = set_cpus_allowed_dl,

1668

.set_cpus_allowed = set_cpus_allowed_dl,

1661

.rq_online = rq_online_dl,

1669

.rq_online = rq_online_dl,

1662

.rq_offline = rq_offline_dl,

1670

.rq_offline = rq_offline_dl,

1663

.post_schedule = post_schedule_dl,

1671

.post_schedule = post_schedule_dl,

1664

.task_woken = task_woken_dl,

1672

.task_woken = task_woken_dl,

1665

#endif

1673

#endif

1666

1674

1667

.set_curr_task = set_curr_task_dl,

1675

.set_curr_task = set_curr_task_dl,

1668

.task_tick = task_tick_dl,

1676

.task_tick = task_tick_dl,

1669

.task_fork = task_fork_dl,

1677

.task_fork = task_fork_dl,

1670

.task_dead = task_dead_dl,

1678

.task_dead = task_dead_dl,

1671

1679

1672

.prio_changed = prio_changed_dl,

1680

.prio_changed = prio_changed_dl,

1673

.switched_from = switched_from_dl,

1681

.switched_from = switched_from_dl,

1674

.switched_to = switched_to_dl,

1682

.switched_to = switched_to_dl,

1675

};

1683

};

1676

1684

GITLAB

sched/dl: Fix race in dl_task_timer()

 /*
  * Deadline Scheduling Class (SCHED_DEADLINE)
  *
  * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
  *
  * Tasks that periodically executes their instances for less than their
  * runtime won't miss any of their deadlines.
  * Tasks that are not periodic or sporadic or that tries to execute more
  * than their reserved bandwidth will be slowed down (and may potentially
  * miss some of their deadlines), and won't affect any other task.
  *
  * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
  *                    Juri Lelli <juri.lelli@gmail.com>,
  *                    Michael Trimarchi <michael@amarulasolutions.com>,
  *                    Fabio Checconi <fchecconi@gmail.com>
  */
 #include "sched.h"
 #include <linux/slab.h>
 struct dl_bandwidth def_dl_bandwidth;
 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
 {
 	return container_of(dl_se, struct task_struct, dl);
 }
 static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
 {
 	return container_of(dl_rq, struct rq, dl);
 }
 static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 	struct rq *rq = task_rq(p);
 	return &rq->dl;
 }
 static inline int on_dl_rq(struct sched_dl_entity *dl_se)
 {
 	return !RB_EMPTY_NODE(&dl_se->rb_node);
 }
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
 	return dl_rq->rb_leftmost == &dl_se->rb_node;
 }
 void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
 {
 	raw_spin_lock_init(&dl_b->dl_runtime_lock);
 	dl_b->dl_period = period;
 	dl_b->dl_runtime = runtime;
 }
 extern unsigned long to_ratio(u64 period, u64 runtime);
 void init_dl_bw(struct dl_bw *dl_b)
 {
 	raw_spin_lock_init(&dl_b->lock);
 	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
 	if (global_rt_runtime() == RUNTIME_INF)
 		dl_b->bw = -1;
 	else
 		dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
 	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
 	dl_b->total_bw = 0;
 }
 void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
 {
 	dl_rq->rb_root = RB_ROOT;
 #ifdef CONFIG_SMP
 	/* zero means no -deadline tasks */
 	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
 	dl_rq->dl_nr_migratory = 0;
 	dl_rq->overloaded = 0;
 	dl_rq->pushable_dl_tasks_root = RB_ROOT;
 #else
 	init_dl_bw(&dl_rq->dl_bw);
 #endif
 }
 #ifdef CONFIG_SMP
 static inline int dl_overloaded(struct rq *rq)
 {
 	return atomic_read(&rq->rd->dlo_count);
 }
 static inline void dl_set_overload(struct rq *rq)
 {
 	if (!rq->online)
 		return;
 	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
 	/*
 	 * Must be visible before the overload count is
 	 * set (as in sched_rt.c).
 	 *
 	 * Matched by the barrier in pull_dl_task().
 	 */
 	smp_wmb();
 	atomic_inc(&rq->rd->dlo_count);
 }
 static inline void dl_clear_overload(struct rq *rq)
 {
 	if (!rq->online)
 		return;
 	atomic_dec(&rq->rd->dlo_count);
 	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
 }
 static void update_dl_migration(struct dl_rq *dl_rq)
 {
 	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
 		if (!dl_rq->overloaded) {
 			dl_set_overload(rq_of_dl_rq(dl_rq));
 			dl_rq->overloaded = 1;
 		}
 	} else if (dl_rq->overloaded) {
 		dl_clear_overload(rq_of_dl_rq(dl_rq));
 		dl_rq->overloaded = 0;
 	}
 }
 static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 	if (p->nr_cpus_allowed > 1)
 		dl_rq->dl_nr_migratory++;
 	update_dl_migration(dl_rq);
 }
 static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 	if (p->nr_cpus_allowed > 1)
 		dl_rq->dl_nr_migratory--;
 	update_dl_migration(dl_rq);
 }
 /*
  * The list of pushable -deadline task is not a plist, like in
  * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
  */
 static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
 {
 	struct dl_rq *dl_rq = &rq->dl;
 	struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;
 	struct rb_node *parent = NULL;
 	struct task_struct *entry;
 	int leftmost = 1;
 	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
 	while (*link) {
 		parent = *link;
 		entry = rb_entry(parent, struct task_struct,
 				 pushable_dl_tasks);
 		if (dl_entity_preempt(&p->dl, &entry->dl))
 			link = &parent->rb_left;
 		else {
 			link = &parent->rb_right;
 			leftmost = 0;
 		}
 	}
 	if (leftmost)
 		dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
 	rb_link_node(&p->pushable_dl_tasks, parent, link);
 	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
 }
 static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
 {
 	struct dl_rq *dl_rq = &rq->dl;
 	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
 		return;
 	if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {
 		struct rb_node *next_node;
 		next_node = rb_next(&p->pushable_dl_tasks);
 		dl_rq->pushable_dl_tasks_leftmost = next_node;
 	}
 	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
 }
 static inline int has_pushable_dl_tasks(struct rq *rq)
 {
 	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);
 }
 static int push_dl_task(struct rq *rq);
 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
 {
 	return dl_task(prev);
 }
 static inline void set_post_schedule(struct rq *rq)
 {
 	rq->post_schedule = has_pushable_dl_tasks(rq);
 }
 #else
 static inline
 void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
 {
 }
 static inline
 void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
 {
 }
 static inline
 void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 }
 static inline
 void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 }
 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
 {
 	return false;
 }
 static inline int pull_dl_task(struct rq *rq)
 {
 	return 0;
 }
 static inline void set_post_schedule(struct rq *rq)
 {
 }
 #endif /* CONFIG_SMP */
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
 				  int flags);
 /*
  * We are being explicitly informed that a new instance is starting,
  * and this means that:
  *  - the absolute deadline of the entity has to be placed at
  *    current time + relative deadline;
  *  - the runtime of the entity has to be set to the maximum value.
  *
  * The capability of specifying such event is useful whenever a -deadline
  * entity wants to (try to!) synchronize its behaviour with the scheduler's
  * one, and to (try to!) reconcile itself with its own scheduling
  * parameters.
  */
 static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
 				       struct sched_dl_entity *pi_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 	WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
 	/*
 	 * We use the regular wall clock time to set deadlines in the
 	 * future; in fact, we must consider execution overheads (time
 	 * spent on hardirq context, etc.).
 	 */
 	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 	dl_se->runtime = pi_se->dl_runtime;
 	dl_se->dl_new = 0;
 }
 /*
  * Pure Earliest Deadline First (EDF) scheduling does not deal with the
  * possibility of a entity lasting more than what it declared, and thus
  * exhausting its runtime.
  *
  * Here we are interested in making runtime overrun possible, but we do
  * not want a entity which is misbehaving to affect the scheduling of all
  * other entities.
  * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
  * is used, in order to confine each entity within its own bandwidth.
  *
  * This function deals exactly with that, and ensures that when the runtime
  * of a entity is replenished, its deadline is also postponed. That ensures
  * the overrunning entity can't interfere with other entity in the system and
  * can't make them miss their deadlines. Reasons why this kind of overruns
  * could happen are, typically, a entity voluntarily trying to overcome its
  * runtime, or it just underestimated it during sched_setscheduler_ex().
  */
 static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 				struct sched_dl_entity *pi_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 	BUG_ON(pi_se->dl_runtime <= 0);
 	/*
 	 * This could be the case for a !-dl task that is boosted.
 	 * Just go with full inherited parameters.
 	 */
 	if (dl_se->dl_deadline == 0) {
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 	/*
 	 * We keep moving the deadline away until we get some
 	 * available runtime for the entity. This ensures correct
 	 * handling of situations where the runtime overrun is
 	 * arbitrary large.
 	 */
 	while (dl_se->runtime <= 0) {
 		dl_se->deadline += pi_se->dl_period;
 		dl_se->runtime += pi_se->dl_runtime;
 	}
 	/*
 	 * At this point, the deadline really should be "in
 	 * the future" with respect to rq->clock. If it's
 	 * not, we are, for some reason, lagging too much!
 	 * Anyway, after having warn userspace abut that,
 	 * we still try to keep the things running by
 	 * resetting the deadline and the budget of the
 	 * entity.
 	 */
 	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
 		static bool lag_once = false;
 		if (!lag_once) {
 			lag_once = true;
 			printk_sched("sched: DL replenish lagged to much\n");
 		}
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 }
 /*
  * Here we check if --at time t-- an entity (which is probably being
  * [re]activated or, in general, enqueued) can use its remaining runtime
  * and its current deadline _without_ exceeding the bandwidth it is
  * assigned (function returns true if it can't). We are in fact applying
  * one of the CBS rules: when a task wakes up, if the residual runtime
  * over residual deadline fits within the allocated bandwidth, then we
  * can keep the current (absolute) deadline and residual budget without
  * disrupting the schedulability of the system. Otherwise, we should
  * refill the runtime and set the deadline a period in the future,
  * because keeping the current (absolute) deadline of the task would
  * result in breaking guarantees promised to other tasks (refer to
  * Documentation/scheduler/sched-deadline.txt for more informations).
  *
  * This function returns true if:
  *
  *   runtime / (deadline - t) > dl_runtime / dl_period ,
  *
  * IOW we can't recycle current parameters.
  *
  * Notice that the bandwidth check is done against the period. For
  * task with deadline equal to period this is the same of using
  * dl_deadline instead of dl_period in the equation above.
  */
 static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 			       struct sched_dl_entity *pi_se, u64 t)
 {
 	u64 left, right;
 	/*
 	 * left and right are the two sides of the equation above,
 	 * after a bit of shuffling to use multiplications instead
 	 * of divisions.
 	 *
 	 * Note that none of the time values involved in the two
 	 * multiplications are absolute: dl_deadline and dl_runtime
 	 * are the relative deadline and the maximum runtime of each
 	 * instance, runtime is the runtime left for the last instance
 	 * and (deadline - t), since t is rq->clock, is the time left
 	 * to the (absolute) deadline. Even if overflowing the u64 type
 	 * is very unlikely to occur in both cases, here we scale down
 	 * as we want to avoid that risk at all. Scaling down by 10
 	 * means that we reduce granularity to 1us. We are fine with it,
 	 * since this is only a true/false check and, anyway, thinking
 	 * of anything below microseconds resolution is actually fiction
 	 * (but still we want to give the user that illusion >;).
 	 */
 	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
 	right = ((dl_se->deadline - t) >> DL_SCALE) *
 		(pi_se->dl_runtime >> DL_SCALE);
 	return dl_time_before(right, left);
 }
 /*
  * When a -deadline entity is queued back on the runqueue, its runtime and
  * deadline might need updating.
  *
  * The policy here is that we update the deadline of the entity only if:
  *  - the current deadline is in the past,
  *  - using the remaining runtime with the current deadline would make
  *    the entity exceed its bandwidth.
  */
 static void update_dl_entity(struct sched_dl_entity *dl_se,
 			     struct sched_dl_entity *pi_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 	/*
 	 * The arrival of a new instance needs special treatment, i.e.,
 	 * the actual scheduling parameters have to be "renewed".
 	 */
 	if (dl_se->dl_new) {
 		setup_new_dl_entity(dl_se, pi_se);
 		return;
 	}
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 }
 /*
  * If the entity depleted all its runtime, and if we want it to sleep
  * while waiting for some new execution time to become available, we
  * set the bandwidth enforcement timer to the replenishment instant
  * and try to activate it.
  *
  * Notice that it is important for the caller to know if the timer
  * actually started or not (i.e., the replenishment instant is in
  * the future or in the past).
  */
 static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 	ktime_t now, act;
 	ktime_t soft, hard;
 	unsigned long range;
 	s64 delta;
 	if (boosted)
 		return 0;
 	/*
 	 * We want the timer to fire at the deadline, but considering
 	 * that it is actually coming from rq->clock and not from
 	 * hrtimer's time base reading.
 	 */
 	act = ns_to_ktime(dl_se->deadline);
 	now = hrtimer_cb_get_time(&dl_se->dl_timer);
 	delta = ktime_to_ns(now) - rq_clock(rq);
 	act = ktime_add_ns(act, delta);
 	/*
 	 * If the expiry time already passed, e.g., because the value
 	 * chosen as the deadline is too small, don't even try to
 	 * start the timer in the past!
 	 */
 	if (ktime_us_delta(act, now) < 0)
 		return 0;
 	hrtimer_set_expires(&dl_se->dl_timer, act);
 	soft = hrtimer_get_softexpires(&dl_se->dl_timer);
 	hard = hrtimer_get_expires(&dl_se->dl_timer);
 	range = ktime_to_ns(ktime_sub(hard, soft));
 	__hrtimer_start_range_ns(&dl_se->dl_timer, soft,
 				 range, HRTIMER_MODE_ABS, 0);
 	return hrtimer_active(&dl_se->dl_timer);
 }
 /*
  * This is the bandwidth enforcement timer callback. If here, we know
  * a task is not on its dl_rq, since the fact that the timer was running
  * means the task is throttled and needs a runtime replenishment.
  *
  * However, what we actually do depends on the fact the task is active,
  * (it is on its rq) or has been removed from there by a call to
  * dequeue_task_dl(). In the former case we must issue the runtime
  * replenishment and add the task back to the dl_rq; in the latter, we just
  * do nothing but clearing dl_throttled, so that runtime and deadline
  * updating (and the queueing back to dl_rq) will be done by the
  * next call to enqueue_task_dl().
  */
 static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 {
 	struct sched_dl_entity *dl_se = container_of(timer,
 						     struct sched_dl_entity,
 						     dl_timer);
 	struct task_struct *p = dl_task_of(dl_se);
-	struct rq *rq = task_rq(p);
+	struct rq *rq;
+again:
+	rq = task_rq(p);
 	raw_spin_lock(&rq->lock);
+	if (rq != task_rq(p)) {
+		/* Task was moved, retrying. */
+		raw_spin_unlock(&rq->lock);
+		goto again;
+	}
 	/*
 	 * We need to take care of a possible races here. In fact, the
 	 * task might have changed its scheduling policy to something
 	 * different from SCHED_DEADLINE or changed its reservation
 	 * parameters (through sched_setscheduler()).
 	 */
 	if (!dl_task(p) || dl_se->dl_new)
 		goto unlock;
 	sched_clock_tick();
 	update_rq_clock(rq);
 	dl_se->dl_throttled = 0;
 	dl_se->dl_yielded = 0;
 	if (p->on_rq) {
 		enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
 		if (task_has_dl_policy(rq->curr))
 			check_preempt_curr_dl(rq, p, 0);
 		else
 			resched_task(rq->curr);
 #ifdef CONFIG_SMP
 		/*
 		 * Queueing this task back might have overloaded rq,
 		 * check if we need to kick someone away.
 		 */
 		if (has_pushable_dl_tasks(rq))
 			push_dl_task(rq);
 #endif
 	}
 unlock:
 	raw_spin_unlock(&rq->lock);
 	return HRTIMER_NORESTART;
 }
 void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
 	struct hrtimer *timer = &dl_se->dl_timer;
 	if (hrtimer_active(timer)) {
 		hrtimer_try_to_cancel(timer);
 		return;
 	}
 	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	timer->function = dl_task_timer;
 }
 static
 int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
 {
 	int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));
 	int rorun = dl_se->runtime <= 0;
 	if (!rorun && !dmiss)
 		return 0;
 	/*
 	 * If we are beyond our current deadline and we are still
 	 * executing, then we have already used some of the runtime of
 	 * the next instance. Thus, if we do not account that, we are
 	 * stealing bandwidth from the system at each deadline miss!
 	 */
 	if (dmiss) {
 		dl_se->runtime = rorun ? dl_se->runtime : 0;
 		dl_se->runtime -= rq_clock(rq) - dl_se->deadline;
 	}
 	return 1;
 }
 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
 /*
  * Update the current task's runtime statistics (provided it is still
  * a -deadline task and has not been removed from the dl_rq).
  */
 static void update_curr_dl(struct rq *rq)
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_dl_entity *dl_se = &curr->dl;
 	u64 delta_exec;
 	if (!dl_task(curr) || !on_dl_rq(dl_se))
 		return;
 	/*
 	 * Consumed budget is computed considering the time as
 	 * observed by schedulable tasks (excluding time spent
 	 * in hardirq context, etc.). Deadlines are instead
 	 * computed using hard walltime. This seems to be the more
 	 * natural solution, but the full ramifications of this
 	 * approach need further study.
 	 */
 	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
 	if (unlikely((s64)delta_exec <= 0))
 		return;
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
 	curr->se.sum_exec_runtime += delta_exec;
 	account_group_exec_runtime(curr, delta_exec);
 	curr->se.exec_start = rq_clock_task(rq);
 	cpuacct_charge(curr, delta_exec);
 	sched_rt_avg_update(rq, delta_exec);
 	dl_se->runtime -= delta_exec;
 	if (dl_runtime_exceeded(rq, dl_se)) {
 		__dequeue_task_dl(rq, curr, 0);
 		if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))
 			dl_se->dl_throttled = 1;
 		else
 			enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
 		if (!is_leftmost(curr, &rq->dl))
 			resched_task(curr);
 	}
 	/*
 	 * Because -- for now -- we share the rt bandwidth, we need to
 	 * account our runtime there too, otherwise actual rt tasks
 	 * would be able to exceed the shared quota.
 	 *
 	 * Account to the root rt group for now.
 	 *
 	 * The solution we're working towards is having the RT groups scheduled
 	 * using deadline servers -- however there's a few nasties to figure
 	 * out before that can happen.
 	 */
 	if (rt_bandwidth_enabled()) {
 		struct rt_rq *rt_rq = &rq->rt;
 		raw_spin_lock(&rt_rq->rt_runtime_lock);
 		/*
 		 * We'll let actual RT tasks worry about the overflow here, we
 		 * have our own CBS to keep us inline; only account when RT
 		 * bandwidth is relevant.
 		 */
 		if (sched_rt_bandwidth_account(rt_rq))
 			rt_rq->rt_time += delta_exec;
 		raw_spin_unlock(&rt_rq->rt_runtime_lock);
 	}
 }
 #ifdef CONFIG_SMP
 static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);
 static inline u64 next_deadline(struct rq *rq)
 {
 	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
 	if (next && dl_prio(next->prio))
 		return next->dl.deadline;
 	else
 		return 0;
 }
 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
 {
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 	if (dl_rq->earliest_dl.curr == 0 ||
 	    dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
 		/*
 		 * If the dl_rq had no -deadline tasks, or if the new task
 		 * has shorter deadline than the current one on dl_rq, we
 		 * know that the previous earliest becomes our next earliest,
 		 * as the new task becomes the earliest itself.
 		 */
 		dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
 		dl_rq->earliest_dl.curr = deadline;
 		cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
 	} else if (dl_rq->earliest_dl.next == 0 ||
 		   dl_time_before(deadline, dl_rq->earliest_dl.next)) {
 		/*
 		 * On the other hand, if the new -deadline task has a
 		 * a later deadline than the earliest one on dl_rq, but
 		 * it is earlier than the next (if any), we must
 		 * recompute the next-earliest.
 		 */
 		dl_rq->earliest_dl.next = next_deadline(rq);
 	}
 }
 static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
 {
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 	/*
 	 * Since we may have removed our earliest (and/or next earliest)
 	 * task we must recompute them.
 	 */
 	if (!dl_rq->dl_nr_running) {
 		dl_rq->earliest_dl.curr = 0;
 		dl_rq->earliest_dl.next = 0;
 		cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
 	} else {
 		struct rb_node *leftmost = dl_rq->rb_leftmost;
 		struct sched_dl_entity *entry;
 		entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
 		dl_rq->earliest_dl.curr = entry->deadline;
 		dl_rq->earliest_dl.next = next_deadline(rq);
 		cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
 	}
 }
 #else
 static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
 static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
 #endif /* CONFIG_SMP */
 static inline
 void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	int prio = dl_task_of(dl_se)->prio;
 	u64 deadline = dl_se->deadline;
 	WARN_ON(!dl_prio(prio));
 	dl_rq->dl_nr_running++;
 	inc_nr_running(rq_of_dl_rq(dl_rq));
 	inc_dl_deadline(dl_rq, deadline);
 	inc_dl_migration(dl_se, dl_rq);
 }
 static inline
 void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	int prio = dl_task_of(dl_se)->prio;
 	WARN_ON(!dl_prio(prio));
 	WARN_ON(!dl_rq->dl_nr_running);
 	dl_rq->dl_nr_running--;
 	dec_nr_running(rq_of_dl_rq(dl_rq));
 	dec_dl_deadline(dl_rq, dl_se->deadline);
 	dec_dl_migration(dl_se, dl_rq);
 }
 static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rb_node **link = &dl_rq->rb_root.rb_node;
 	struct rb_node *parent = NULL;
 	struct sched_dl_entity *entry;
 	int leftmost = 1;
 	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
 	while (*link) {
 		parent = *link;
 		entry = rb_entry(parent, struct sched_dl_entity, rb_node);
 		if (dl_time_before(dl_se->deadline, entry->deadline))
 			link = &parent->rb_left;
 		else {
 			link = &parent->rb_right;
 			leftmost = 0;
 		}
 	}
 	if (leftmost)
 		dl_rq->rb_leftmost = &dl_se->rb_node;
 	rb_link_node(&dl_se->rb_node, parent, link);
 	rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
 	inc_dl_tasks(dl_se, dl_rq);
 }
 static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	if (RB_EMPTY_NODE(&dl_se->rb_node))
 		return;
 	if (dl_rq->rb_leftmost == &dl_se->rb_node) {
 		struct rb_node *next_node;
 		next_node = rb_next(&dl_se->rb_node);
 		dl_rq->rb_leftmost = next_node;
 	}
 	rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
 	RB_CLEAR_NODE(&dl_se->rb_node);
 	dec_dl_tasks(dl_se, dl_rq);
 }
 static void
 enqueue_dl_entity(struct sched_dl_entity *dl_se,
 		  struct sched_dl_entity *pi_se, int flags)
 {
 	BUG_ON(on_dl_rq(dl_se));
 	/*
 	 * If this is a wakeup or a new instance, the scheduling
 	 * parameters of the task might need updating. Otherwise,
 	 * we want a replenishment of its runtime.
 	 */
 	if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
 		replenish_dl_entity(dl_se, pi_se);
 	else
 		update_dl_entity(dl_se, pi_se);
 	__enqueue_dl_entity(dl_se);
 }
 static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 {
 	__dequeue_dl_entity(dl_se);
 }
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
 	struct sched_dl_entity *pi_se = &p->dl;
 	/*
 	 * Use the scheduling parameters of the top pi-waiter
 	 * task if we have one and its (relative) deadline is
 	 * smaller than our one... OTW we keep our runtime and
 	 * deadline.
 	 */
 	if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio))
 		pi_se = &pi_task->dl;
 	/*
 	 * If p is throttled, we do nothing. In fact, if it exhausted
 	 * its budget it needs a replenishment and, since it now is on
 	 * its rq, the bandwidth timer callback (which clearly has not
 	 * run yet) will take care of this.
 	 */
 	if (p->dl.dl_throttled)
 		return;
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	dequeue_dl_entity(&p->dl);
 	dequeue_pushable_dl_task(rq, p);
 }
 static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_curr_dl(rq);
 	__dequeue_task_dl(rq, p, flags);
 }
 /*
  * Yield task semantic for -deadline tasks is:
  *
  *   get off from the CPU until our next instance, with
  *   a new runtime. This is of little use now, since we
  *   don't have a bandwidth reclaiming mechanism. Anyway,
  *   bandwidth reclaiming is planned for the future, and
  *   yield_task_dl will indicate that some spare budget
  *   is available for other task instances to use it.
  */
 static void yield_task_dl(struct rq *rq)
 {
 	struct task_struct *p = rq->curr;
 	/*
 	 * We make the task go to sleep until its current deadline by
 	 * forcing its runtime to zero. This way, update_curr_dl() stops
 	 * it and the bandwidth timer will wake it up and will give it
 	 * new scheduling parameters (thanks to dl_yielded=1).
 	 */
 	if (p->dl.runtime > 0) {
 		rq->curr->dl.dl_yielded = 1;
 		p->dl.runtime = 0;
 	}
 	update_curr_dl(rq);
 }
 #ifdef CONFIG_SMP
 static int find_later_rq(struct task_struct *task);
 static int
 select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
 	struct task_struct *curr;
 	struct rq *rq;
 	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
 		goto out;
 	rq = cpu_rq(cpu);
 	rcu_read_lock();
 	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
 	/*
 	 * If we are dealing with a -deadline task, we must
 	 * decide where to wake it up.
 	 * If it has a later deadline and the current task
 	 * on this rq can't move (provided the waking task
 	 * can!) we prefer to send it somewhere else. On the
 	 * other hand, if it has a shorter deadline, we
 	 * try to make it stay here, it might be important.
 	 */
 	if (unlikely(dl_task(curr)) &&
 	    (curr->nr_cpus_allowed < 2 ||
 	     !dl_entity_preempt(&p->dl, &curr->dl)) &&
 	    (p->nr_cpus_allowed > 1)) {
 		int target = find_later_rq(p);
 		if (target != -1)
 			cpu = target;
 	}
 	rcu_read_unlock();
 out:
 	return cpu;
 }
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 {
 	/*
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
 	if (rq->curr->nr_cpus_allowed == 1 ||
 	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
 		return;
 	/*
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
 	if (p->nr_cpus_allowed != 1 &&
 	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
 		return;
 	resched_task(rq->curr);
 }
 static int pull_dl_task(struct rq *this_rq);
 #endif /* CONFIG_SMP */
 /*
  * Only called when both the current and waking task are -deadline
  * tasks.
  */
 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
 				  int flags)
 {
 	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
 		resched_task(rq->curr);
 		return;
 	}
 #ifdef CONFIG_SMP
 	/*
 	 * In the unlikely case current and p have the same deadline
 	 * let us try to decide what's the best thing to do...
 	 */
 	if ((p->dl.deadline == rq->curr->dl.deadline) &&
 	    !test_tsk_need_resched(rq->curr))
 		check_preempt_equal_dl(rq, p);
 #endif /* CONFIG_SMP */
 }
 #ifdef CONFIG_SCHED_HRTICK
 static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
 {
 	s64 delta = p->dl.dl_runtime - p->dl.runtime;
 	if (delta > 10000)
 		hrtick_start(rq, p->dl.runtime);
 }
 #endif
 static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
 						   struct dl_rq *dl_rq)
 {
 	struct rb_node *left = dl_rq->rb_leftmost;
 	if (!left)
 		return NULL;
 	return rb_entry(left, struct sched_dl_entity, rb_node);
 }
 struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
 {
 	struct sched_dl_entity *dl_se;
 	struct task_struct *p;
 	struct dl_rq *dl_rq;
 	dl_rq = &rq->dl;
 	if (need_pull_dl_task(rq, prev)) {
 		pull_dl_task(rq);
 		/*
 		 * pull_rt_task() can drop (and re-acquire) rq->lock; this
 		 * means a stop task can slip in, in which case we need to
 		 * re-start task selection.
 		 */
 		if (rq->stop && rq->stop->on_rq)
 			return RETRY_TASK;
 	}
 	/*
 	 * When prev is DL, we may throttle it in put_prev_task().
 	 * So, we update time before we check for dl_nr_running.
 	 */
 	if (prev->sched_class == &dl_sched_class)
 		update_curr_dl(rq);
 	if (unlikely(!dl_rq->dl_nr_running))
 		return NULL;
 	put_prev_task(rq, prev);
 	dl_se = pick_next_dl_entity(rq, dl_rq);
 	BUG_ON(!dl_se);
 	p = dl_task_of(dl_se);
 	p->se.exec_start = rq_clock_task(rq);
 	/* Running task will never be pushed. */
        dequeue_pushable_dl_task(rq, p);
 #ifdef CONFIG_SCHED_HRTICK
 	if (hrtick_enabled(rq))
 		start_hrtick_dl(rq, p);
 #endif
 	set_post_schedule(rq);
 	return p;
 }
 static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
 {
 	update_curr_dl(rq);
 	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
 {
 	update_curr_dl(rq);
 #ifdef CONFIG_SCHED_HRTICK
 	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)
 		start_hrtick_dl(rq, p);
 #endif
 }
 static void task_fork_dl(struct task_struct *p)
 {
 	/*
 	 * SCHED_DEADLINE tasks cannot fork and this is achieved through
 	 * sched_fork()
 	 */
 }
 static void task_dead_dl(struct task_struct *p)
 {
 	struct hrtimer *timer = &p->dl.dl_timer;
 	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
 	/*
 	 * Since we are TASK_DEAD we won't slip out of the domain!
 	 */
 	raw_spin_lock_irq(&dl_b->lock);
 	dl_b->total_bw -= p->dl.dl_bw;
 	raw_spin_unlock_irq(&dl_b->lock);
 	hrtimer_cancel(timer);
 }
 static void set_curr_task_dl(struct rq *rq)
 {
 	struct task_struct *p = rq->curr;
 	p->se.exec_start = rq_clock_task(rq);
 	/* You can't push away the running task */
 	dequeue_pushable_dl_task(rq, p);
 }
 #ifdef CONFIG_SMP
 /* Only try algorithms three times */
 #define DL_MAX_TRIES 3
 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
 	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
 	    (p->nr_cpus_allowed > 1))
 		return 1;
 	return 0;
 }
 /* Returns the second earliest -deadline task, NULL otherwise */
 static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)
 {
 	struct rb_node *next_node = rq->dl.rb_leftmost;
 	struct sched_dl_entity *dl_se;
 	struct task_struct *p = NULL;
 next_node:
 	next_node = rb_next(next_node);
 	if (next_node) {
 		dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
 		p = dl_task_of(dl_se);
 		if (pick_dl_task(rq, p, cpu))
 			return p;
 		goto next_node;
 	}
 	return NULL;
 }
 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
 static int find_later_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
 	struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);
 	int this_cpu = smp_processor_id();
 	int best_cpu, cpu = task_cpu(task);
 	/* Make sure the mask is initialized first */
 	if (unlikely(!later_mask))
 		return -1;
 	if (task->nr_cpus_allowed == 1)
 		return -1;
 	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
 			task, later_mask);
 	if (best_cpu == -1)
 		return -1;
 	/*
 	 * If we are here, some target has been found,
 	 * the most suitable of which is cached in best_cpu.
 	 * This is, among the runqueues where the current tasks
 	 * have later deadlines than the task's one, the rq
 	 * with the latest possible one.
 	 *
 	 * Now we check how well this matches with task's
 	 * affinity and system topology.
 	 *
 	 * The last cpu where the task run is our first
 	 * guess, since it is most likely cache-hot there.
 	 */
 	if (cpumask_test_cpu(cpu, later_mask))
 		return cpu;
 	/*
 	 * Check if this_cpu is to be skipped (i.e., it is
 	 * not in the mask) or not.
 	 */
 	if (!cpumask_test_cpu(this_cpu, later_mask))
 		this_cpu = -1;
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		if (sd->flags & SD_WAKE_AFFINE) {
 			/*
 			 * If possible, preempting this_cpu is
 			 * cheaper than migrating.
 			 */
 			if (this_cpu != -1 &&
 			    cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
 				rcu_read_unlock();
 				return this_cpu;
 			}
 			/*
 			 * Last chance: if best_cpu is valid and is
 			 * in the mask, that becomes our choice.
 			 */
 			if (best_cpu < nr_cpu_ids &&
 			    cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
 				rcu_read_unlock();
 				return best_cpu;
 			}
 		}
 	}
 	rcu_read_unlock();
 	/*
 	 * At this point, all our guesses failed, we just return
 	 * 'something', and let the caller sort the things out.
 	 */
 	if (this_cpu != -1)
 		return this_cpu;
 	cpu = cpumask_any(later_mask);
 	if (cpu < nr_cpu_ids)
 		return cpu;
 	return -1;
 }
 /* Locks the rq it finds */
 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 {
 	struct rq *later_rq = NULL;
 	int tries;
 	int cpu;
 	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
 		cpu = find_later_rq(task);
 		if ((cpu == -1) || (cpu == rq->cpu))
 			break;
 		later_rq = cpu_rq(cpu);
 		/* Retry if something changed. */
 		if (double_lock_balance(rq, later_rq)) {
 			if (unlikely(task_rq(task) != rq ||
 				     !cpumask_test_cpu(later_rq->cpu,
 				                       &task->cpus_allowed) ||
 				     task_running(rq, task) || !task->on_rq)) {
 				double_unlock_balance(rq, later_rq);
 				later_rq = NULL;
 				break;
 			}
 		}
 		/*
 		 * If the rq we found has no -deadline task, or
 		 * its earliest one has a later deadline than our
 		 * task, the rq is a good one.
 		 */
 		if (!later_rq->dl.dl_nr_running ||
 		    dl_time_before(task->dl.deadline,
 				   later_rq->dl.earliest_dl.curr))
 			break;
 		/* Otherwise we try again. */
 		double_unlock_balance(rq, later_rq);
 		later_rq = NULL;
 	}
 	return later_rq;
 }
 static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
 {
 	struct task_struct *p;
 	if (!has_pushable_dl_tasks(rq))
 		return NULL;
 	p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,
 		     struct task_struct, pushable_dl_tasks);
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
 	BUG_ON(p->nr_cpus_allowed <= 1);
 	BUG_ON(!p->on_rq);
 	BUG_ON(!dl_task(p));
 	return p;
 }
 /*
  * See if the non running -deadline tasks on this rq
  * can be sent to some other CPU where they can preempt
  * and start executing.
  */
 static int push_dl_task(struct rq *rq)
 {
 	struct task_struct *next_task;
 	struct rq *later_rq;
 	if (!rq->dl.overloaded)
 		return 0;
 	next_task = pick_next_pushable_dl_task(rq);
 	if (!next_task)
 		return 0;
 retry:
 	if (unlikely(next_task == rq->curr)) {
 		WARN_ON(1);
 		return 0;
 	}
 	/*
 	 * If next_task preempts rq->curr, and rq->curr
 	 * can move away, it makes sense to just reschedule
 	 * without going further in pushing next_task.
 	 */
 	if (dl_task(rq->curr) &&
 	    dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
 	    rq->curr->nr_cpus_allowed > 1) {
 		resched_task(rq->curr);
 		return 0;
 	}
 	/* We might release rq lock */
 	get_task_struct(next_task);
 	/* Will lock the rq it'll find */
 	later_rq = find_lock_later_rq(next_task, rq);
 	if (!later_rq) {
 		struct task_struct *task;
 		/*
 		 * We must check all this again, since
 		 * find_lock_later_rq releases rq->lock and it is
 		 * then possible that next_task has migrated.
 		 */
 		task = pick_next_pushable_dl_task(rq);
 		if (task_cpu(next_task) == rq->cpu && task == next_task) {
 			/*
 			 * The task is still there. We don't try
 			 * again, some other cpu will pull it when ready.
 			 */
 			dequeue_pushable_dl_task(rq, next_task);
 			goto out;
 		}
 		if (!task)
 			/* No more tasks */
 			goto out;
 		put_task_struct(next_task);
 		next_task = task;
 		goto retry;
 	}
 	deactivate_task(rq, next_task, 0);
 	set_task_cpu(next_task, later_rq->cpu);
 	activate_task(later_rq, next_task, 0);
 	resched_task(later_rq->curr);
 	double_unlock_balance(rq, later_rq);
 out:
 	put_task_struct(next_task);
 	return 1;
 }
 static void push_dl_tasks(struct rq *rq)
 {
 	/* Terminates as it moves a -deadline task */
 	while (push_dl_task(rq))
 		;
 }
 static int pull_dl_task(struct rq *this_rq)
 {
 	int this_cpu = this_rq->cpu, ret = 0, cpu;
 	struct task_struct *p;
 	struct rq *src_rq;
 	u64 dmin = LONG_MAX;
 	if (likely(!dl_overloaded(this_rq)))
 		return 0;
 	/*
 	 * Match the barrier from dl_set_overloaded; this guarantees that if we
 	 * see overloaded we must also see the dlo_mask bit.
 	 */
 	smp_rmb();
 	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
 		if (this_cpu == cpu)
 			continue;
 		src_rq = cpu_rq(cpu);
 		/*
 		 * It looks racy, abd it is! However, as in sched_rt.c,
 		 * we are fine with this.
 		 */
 		if (this_rq->dl.dl_nr_running &&
 		    dl_time_before(this_rq->dl.earliest_dl.curr,
 				   src_rq->dl.earliest_dl.next))
 			continue;
 		/* Might drop this_rq->lock */
 		double_lock_balance(this_rq, src_rq);
 		/*
 		 * If there are no more pullable tasks on the
 		 * rq, we're done with it.
 		 */
 		if (src_rq->dl.dl_nr_running <= 1)
 			goto skip;
 		p = pick_next_earliest_dl_task(src_rq, this_cpu);
 		/*
 		 * We found a task to be pulled if:
 		 *  - it preempts our current (if there's one),
 		 *  - it will preempt the last one we pulled (if any).
 		 */
 		if (p && dl_time_before(p->dl.deadline, dmin) &&
 		    (!this_rq->dl.dl_nr_running ||
 		     dl_time_before(p->dl.deadline,
 				    this_rq->dl.earliest_dl.curr))) {
 			WARN_ON(p == src_rq->curr);
 			WARN_ON(!p->on_rq);
 			/*
 			 * Then we pull iff p has actually an earlier
 			 * deadline than the current task of its runqueue.
 			 */
 			if (dl_time_before(p->dl.deadline,
 					   src_rq->curr->dl.deadline))
 				goto skip;
 			ret = 1;
 			deactivate_task(src_rq, p, 0);
 			set_task_cpu(p, this_cpu);
 			activate_task(this_rq, p, 0);
 			dmin = p->dl.deadline;
 			/* Is there any other task even earlier? */
 		}
 skip:
 		double_unlock_balance(this_rq, src_rq);
 	}
 	return ret;
 }
 static void post_schedule_dl(struct rq *rq)
 {
 	push_dl_tasks(rq);
 }
 /*
  * Since the task is not running and a reschedule is not going to happen
  * anytime soon on its runqueue, we try pushing it away now.
  */
 static void task_woken_dl(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
 	    has_pushable_dl_tasks(rq) &&
 	    p->nr_cpus_allowed > 1 &&
 	    dl_task(rq->curr) &&
 	    (rq->curr->nr_cpus_allowed < 2 ||
 	     dl_entity_preempt(&rq->curr->dl, &p->dl))) {
 		push_dl_tasks(rq);
 	}
 }
 static void set_cpus_allowed_dl(struct task_struct *p,
 				const struct cpumask *new_mask)
 {
 	struct rq *rq;
 	int weight;
 	BUG_ON(!dl_task(p));
 	/*
 	 * Update only if the task is actually running (i.e.,
 	 * it is on the rq AND it is not throttled).
 	 */
 	if (!on_dl_rq(&p->dl))
 		return;
 	weight = cpumask_weight(new_mask);
 	/*
 	 * Only update if the process changes its state from whether it
 	 * can migrate or not.
 	 */
 	if ((p->nr_cpus_allowed > 1) == (weight > 1))
 		return;
 	rq = task_rq(p);
 	/*
 	 * The process used to be able to migrate OR it can now migrate
 	 */
 	if (weight <= 1) {
 		if (!task_current(rq, p))
 			dequeue_pushable_dl_task(rq, p);
 		BUG_ON(!rq->dl.dl_nr_migratory);
 		rq->dl.dl_nr_migratory--;
 	} else {
 		if (!task_current(rq, p))
 			enqueue_pushable_dl_task(rq, p);
 		rq->dl.dl_nr_migratory++;
 	}
 	update_dl_migration(&rq->dl);
 }
 /* Assumes rq->lock is held */
 static void rq_online_dl(struct rq *rq)
 {
 	if (rq->dl.overloaded)
 		dl_set_overload(rq);
 	if (rq->dl.dl_nr_running > 0)
 		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
 }
 /* Assumes rq->lock is held */
 static void rq_offline_dl(struct rq *rq)
 {
 	if (rq->dl.overloaded)
 		dl_clear_overload(rq);
 	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
 }
 void init_sched_dl_class(void)
 {
 	unsigned int i;
 	for_each_possible_cpu(i)
 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
 					GFP_KERNEL, cpu_to_node(i));
 }
 #endif /* CONFIG_SMP */
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
 	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
 		hrtimer_try_to_cancel(&p->dl.dl_timer);
 #ifdef CONFIG_SMP
 	/*
 	 * Since this might be the only -deadline task on the rq,
 	 * this is the right place to try to pull some other one
 	 * from an overloaded cpu, if any.
 	 */
 	if (!rq->dl.dl_nr_running)
 		pull_dl_task(rq);
 #endif
 }
 /*
  * When switching to -deadline, we may overload the rq, then
  * we try to push someone off, if possible.
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
 	int check_resched = 1;
 	/*
 	 * If p is throttled, don't consider the possibility
 	 * of preempting rq->curr, the check will be done right
 	 * after its runtime will get replenished.
 	 */
 	if (unlikely(p->dl.dl_throttled))
 		return;
 	if (p->on_rq && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p))
 			/* Only reschedule if pushing failed */
 			check_resched = 0;
 #endif /* CONFIG_SMP */
 		if (check_resched && task_has_dl_policy(rq->curr))
 			check_preempt_curr_dl(rq, p, 0);
 	}
 }
 /*
  * If the scheduling parameters of a -deadline task changed,
  * a push or pull operation might be needed.
  */
 static void prio_changed_dl(struct rq *rq, struct task_struct *p,
 			    int oldprio)
 {
 	if (p->on_rq || rq->curr == p) {
 #ifdef CONFIG_SMP
 		/*
 		 * This might be too much, but unfortunately
 		 * we don't have the old deadline value, and
 		 * we can't argue if the task is increasing
 		 * or lowering its prio, so...
 		 */
 		if (!rq->dl.overloaded)
 			pull_dl_task(rq);
 		/*
 		 * If we now have a earlier deadline task than p,
 		 * then reschedule, provided p is still on this
 		 * runqueue.
 		 */
 		if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
 		    rq->curr == p)
 			resched_task(p);
 #else
 		/*
 		 * Again, we don't know if p has a earlier
 		 * or later deadline, so let's blindly set a
 		 * (maybe not needed) rescheduling point.
 		 */
 		resched_task(p);
 #endif /* CONFIG_SMP */
 	} else
 		switched_to_dl(rq, p);
 }
 const struct sched_class dl_sched_class = {
 	.next			= &rt_sched_class,
 	.enqueue_task		= enqueue_task_dl,
 	.dequeue_task		= dequeue_task_dl,
 	.yield_task		= yield_task_dl,
 	.check_preempt_curr	= check_preempt_curr_dl,
 	.pick_next_task		= pick_next_task_dl,
 	.put_prev_task		= put_prev_task_dl,
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_dl,
 	.set_cpus_allowed       = set_cpus_allowed_dl,
 	.rq_online              = rq_online_dl,
 	.rq_offline             = rq_offline_dl,
 	.post_schedule		= post_schedule_dl,
 	.task_woken		= task_woken_dl,
 #endif
 	.set_curr_task		= set_curr_task_dl,
 	.task_tick		= task_tick_dl,
 	.task_fork              = task_fork_dl,
 	.task_dead		= task_dead_dl,
 	.prio_changed           = prio_changed_dl,
 	.switched_from		= switched_from_dl,
 	.switched_to		= switched_to_dl,
 };