Doug / smarc-fsl-linux-kernel

1

/*

1

/*

2

* Read-Copy Update mechanism for mutual exclusion (tree-based version)

2

* Read-Copy Update mechanism for mutual exclusion (tree-based version)

3

* Internal non-public definitions that provide either classic

3

* Internal non-public definitions that provide either classic

4

* or preemptible semantics.

4

* or preemptible semantics.

5

*

5

*

6

* This program is free software; you can redistribute it and/or modify

6

* This program is free software; you can redistribute it and/or modify

7

* it under the terms of the GNU General Public License as published by

7

* it under the terms of the GNU General Public License as published by

8

* the Free Software Foundation; either version 2 of the License, or

8

* the Free Software Foundation; either version 2 of the License, or

9

* (at your option) any later version.

9

* (at your option) any later version.

10

*

10

*

11

* This program is distributed in the hope that it will be useful,

11

* This program is distributed in the hope that it will be useful,

12

* but WITHOUT ANY WARRANTY; without even the implied warranty of

12

* but WITHOUT ANY WARRANTY; without even the implied warranty of

13

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14

* GNU General Public License for more details.

14

* GNU General Public License for more details.

15

*

15

*

16

* You should have received a copy of the GNU General Public License

16

* You should have received a copy of the GNU General Public License

17

* along with this program; if not, write to the Free Software

17

* along with this program; if not, write to the Free Software

18

* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

18

* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

19

*

19

*

20

* Copyright Red Hat, 2009

20

* Copyright Red Hat, 2009

21

* Copyright IBM Corporation, 2009

21

* Copyright IBM Corporation, 2009

22

*

22

*

23

* Author: Ingo Molnar <mingo@elte.hu>

23

* Author: Ingo Molnar <mingo@elte.hu>

24

* Paul E. McKenney <paulmck@linux.vnet.ibm.com>

24

* Paul E. McKenney <paulmck@linux.vnet.ibm.com>

25

*/

25

*/

26

27

#include <linux/delay.h>

27

#include <linux/delay.h>

28

#include <linux/gfp.h>

28

#include <linux/gfp.h>

29

#include <linux/oom.h>

29

#include <linux/oom.h>

30

#include <linux/smpboot.h>

30

#include <linux/smpboot.h>

31

#include <linux/tick.h>

31

#include <linux/tick.h>

32

33

#define RCU_KTHREAD_PRIO 1

33

#define RCU_KTHREAD_PRIO 1

34

35

#ifdef CONFIG_RCU_BOOST

35

#ifdef CONFIG_RCU_BOOST

36

#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO

36

#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO

37

#else

37

#else

38

#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO

38

#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO

39

#endif

39

#endif

40

41

#ifdef CONFIG_RCU_NOCB_CPU

41

#ifdef CONFIG_RCU_NOCB_CPU

42

static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */

42

static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */

43

static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */

43

static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */

44

static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */

44

static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */

45

static char __initdata nocb_buf[NR_CPUS * 5];

45

static char __initdata nocb_buf[NR_CPUS * 5];

46

#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

46

#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

47

48

/*

48

/*

49

* Check the RCU kernel configuration parameters and print informative

49

* Check the RCU kernel configuration parameters and print informative

50

* messages about anything out of the ordinary. If you like #ifdef, you

50

* messages about anything out of the ordinary. If you like #ifdef, you

51

* will love this function.

51

* will love this function.

52

*/

52

*/

53

static void __init rcu_bootup_announce_oddness(void)

53

static void __init rcu_bootup_announce_oddness(void)

54

{

54

{

55

#ifdef CONFIG_RCU_TRACE

55

#ifdef CONFIG_RCU_TRACE

56

pr_info("\tRCU debugfs-based tracing is enabled.\n");

56

pr_info("\tRCU debugfs-based tracing is enabled.\n");

57

#endif

57

#endif

58

#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)

58

#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)

59

pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",

59

pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",

60

CONFIG_RCU_FANOUT);

60

CONFIG_RCU_FANOUT);

61

#endif

61

#endif

62

#ifdef CONFIG_RCU_FANOUT_EXACT

62

#ifdef CONFIG_RCU_FANOUT_EXACT

63

pr_info("\tHierarchical RCU autobalancing is disabled.\n");

63

pr_info("\tHierarchical RCU autobalancing is disabled.\n");

64

#endif

64

#endif

65

#ifdef CONFIG_RCU_FAST_NO_HZ

65

#ifdef CONFIG_RCU_FAST_NO_HZ

66

pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");

66

pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");

67

#endif

67

#endif

68

#ifdef CONFIG_PROVE_RCU

68

#ifdef CONFIG_PROVE_RCU

69

pr_info("\tRCU lockdep checking is enabled.\n");

69

pr_info("\tRCU lockdep checking is enabled.\n");

70

#endif

70

#endif

71

#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE

71

#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE

72

pr_info("\tRCU torture testing starts during boot.\n");

72

pr_info("\tRCU torture testing starts during boot.\n");

73

#endif

73

#endif

74

#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)

74

#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)

75

pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");

75

pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");

76

#endif

76

#endif

77

#if defined(CONFIG_RCU_CPU_STALL_INFO)

77

#if defined(CONFIG_RCU_CPU_STALL_INFO)

78

pr_info("\tAdditional per-CPU info printed with stalls.\n");

78

pr_info("\tAdditional per-CPU info printed with stalls.\n");

79

#endif

79

#endif

80

#if NUM_RCU_LVL_4 != 0

80

#if NUM_RCU_LVL_4 != 0

81

pr_info("\tFour-level hierarchy is enabled.\n");

81

pr_info("\tFour-level hierarchy is enabled.\n");

82

#endif

82

#endif

83

if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)

83

if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)

84

pr_info("\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);

84

pr_info("\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);

85

if (nr_cpu_ids != NR_CPUS)

85

if (nr_cpu_ids != NR_CPUS)

86

pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);

86

pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);

87

#ifdef CONFIG_RCU_NOCB_CPU

87

#ifdef CONFIG_RCU_NOCB_CPU

88

#ifndef CONFIG_RCU_NOCB_CPU_NONE

88

#ifndef CONFIG_RCU_NOCB_CPU_NONE

89

if (!have_rcu_nocb_mask) {

89

if (!have_rcu_nocb_mask) {

90

zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);

90

zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);

91

have_rcu_nocb_mask = true;

91

have_rcu_nocb_mask = true;

92

}

92

}

93

#ifdef CONFIG_RCU_NOCB_CPU_ZERO

93

#ifdef CONFIG_RCU_NOCB_CPU_ZERO

94

pr_info("\tExperimental no-CBs CPU 0\n");

94

pr_info("\tExperimental no-CBs CPU 0\n");

95

cpumask_set_cpu(0, rcu_nocb_mask);

95

cpumask_set_cpu(0, rcu_nocb_mask);

96

#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */

96

#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */

97

#ifdef CONFIG_RCU_NOCB_CPU_ALL

97

#ifdef CONFIG_RCU_NOCB_CPU_ALL

98

pr_info("\tExperimental no-CBs for all CPUs\n");

98

pr_info("\tExperimental no-CBs for all CPUs\n");

99

cpumask_setall(rcu_nocb_mask);

99

cpumask_setall(rcu_nocb_mask);

100

#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */

100

#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */

101

#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */

101

#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */

102

if (have_rcu_nocb_mask) {

102

if (have_rcu_nocb_mask) {

103

cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);

103

cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);

104

pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);

104

pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);

105

if (rcu_nocb_poll)

105

if (rcu_nocb_poll)

106

pr_info("\tExperimental polled no-CBs CPUs.\n");

106

pr_info("\tExperimental polled no-CBs CPUs.\n");

107

}

107

}

108

#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

108

#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

109

}

109

}

110

111

#ifdef CONFIG_TREE_PREEMPT_RCU

111

#ifdef CONFIG_TREE_PREEMPT_RCU

112

113

struct rcu_state rcu_preempt_state =

113

struct rcu_state rcu_preempt_state =

114

RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);

114

RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);

115

DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);

115

DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);

116

static struct rcu_state *rcu_state = &rcu_preempt_state;

116

static struct rcu_state *rcu_state = &rcu_preempt_state;

117

118

static int rcu_preempted_readers_exp(struct rcu_node *rnp);

118

static int rcu_preempted_readers_exp(struct rcu_node *rnp);

119

120

/*

120

/*

121

* Tell them what RCU they are running.

121

* Tell them what RCU they are running.

122

*/

122

*/

123

static void __init rcu_bootup_announce(void)

123

static void __init rcu_bootup_announce(void)

124

{

124

{

125

pr_info("Preemptible hierarchical RCU implementation.\n");

125

pr_info("Preemptible hierarchical RCU implementation.\n");

126

rcu_bootup_announce_oddness();

126

rcu_bootup_announce_oddness();

127

}

127

}

128

129

/*

129

/*

130

* Return the number of RCU-preempt batches processed thus far

130

* Return the number of RCU-preempt batches processed thus far

131

* for debug and statistics.

131

* for debug and statistics.

132

*/

132

*/

133

long rcu_batches_completed_preempt(void)

133

long rcu_batches_completed_preempt(void)

134

{

134

{

135

return rcu_preempt_state.completed;

135

return rcu_preempt_state.completed;

136

}

136

}

137

EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);

137

EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);

138

139

/*

139

/*

140

* Return the number of RCU batches processed thus far for debug & stats.

140

* Return the number of RCU batches processed thus far for debug & stats.

141

*/

141

*/

142

long rcu_batches_completed(void)

142

long rcu_batches_completed(void)

143

{

143

{

144

return rcu_batches_completed_preempt();

144

return rcu_batches_completed_preempt();

145

}

145

}

146

EXPORT_SYMBOL_GPL(rcu_batches_completed);

146

EXPORT_SYMBOL_GPL(rcu_batches_completed);

147

148

/*

148

/*

149

* Force a quiescent state for preemptible RCU.

149

* Force a quiescent state for preemptible RCU.

150

*/

150

*/

151

void rcu_force_quiescent_state(void)

151

void rcu_force_quiescent_state(void)

152

{

152

{

153

force_quiescent_state(&rcu_preempt_state);

153

force_quiescent_state(&rcu_preempt_state);

154

}

154

}

155

EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);

155

EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);

156

157

/*

157

/*

158

* Record a preemptible-RCU quiescent state for the specified CPU. Note

158

* Record a preemptible-RCU quiescent state for the specified CPU. Note

159

* that this just means that the task currently running on the CPU is

159

* that this just means that the task currently running on the CPU is

160

* not in a quiescent state. There might be any number of tasks blocked

160

* not in a quiescent state. There might be any number of tasks blocked

161

* while in an RCU read-side critical section.

161

* while in an RCU read-side critical section.

162

*

162

*

163

* Unlike the other rcu_*_qs() functions, callers to this function

163

* Unlike the other rcu_*_qs() functions, callers to this function

164

* must disable irqs in order to protect the assignment to

164

* must disable irqs in order to protect the assignment to

165

* ->rcu_read_unlock_special.

165

* ->rcu_read_unlock_special.

166

*/

166

*/

167

static void rcu_preempt_qs(int cpu)

167

static void rcu_preempt_qs(int cpu)

168

{

168

{

169

struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);

169

struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);

170

171

if (rdp->passed_quiesce == 0)

171

if (rdp->passed_quiesce == 0)

172

trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");

172

trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");

173

rdp->passed_quiesce = 1;

173

rdp->passed_quiesce = 1;

174

current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;

174

current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;

175

}

175

}

176

177

/*

177

/*

178

* We have entered the scheduler, and the current task might soon be

178

* We have entered the scheduler, and the current task might soon be

179

* context-switched away from. If this task is in an RCU read-side

179

* context-switched away from. If this task is in an RCU read-side

180

* critical section, we will no longer be able to rely on the CPU to

180

* critical section, we will no longer be able to rely on the CPU to

181

* record that fact, so we enqueue the task on the blkd_tasks list.

181

* record that fact, so we enqueue the task on the blkd_tasks list.

182

* The task will dequeue itself when it exits the outermost enclosing

182

* The task will dequeue itself when it exits the outermost enclosing

183

* RCU read-side critical section. Therefore, the current grace period

183

* RCU read-side critical section. Therefore, the current grace period

184

* cannot be permitted to complete until the blkd_tasks list entries

184

* cannot be permitted to complete until the blkd_tasks list entries

185

* predating the current grace period drain, in other words, until

185

* predating the current grace period drain, in other words, until

186

* rnp->gp_tasks becomes NULL.

186

* rnp->gp_tasks becomes NULL.

187

*

187

*

188

* Caller must disable preemption.

188

* Caller must disable preemption.

189

*/

189

*/

190

static void rcu_preempt_note_context_switch(int cpu)

190

static void rcu_preempt_note_context_switch(int cpu)

191

{

191

{

192

struct task_struct *t = current;

192

struct task_struct *t = current;

193

unsigned long flags;

193

unsigned long flags;

194

struct rcu_data *rdp;

194

struct rcu_data *rdp;

195

struct rcu_node *rnp;

195

struct rcu_node *rnp;

196

197

if (t->rcu_read_lock_nesting > 0 &&

197

if (t->rcu_read_lock_nesting > 0 &&

198

(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {

198

(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {

199

200

/* Possibly blocking in an RCU read-side critical section. */

200

/* Possibly blocking in an RCU read-side critical section. */

201

rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);

201

rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);

202

rnp = rdp->mynode;

202

rnp = rdp->mynode;

203

raw_spin_lock_irqsave(&rnp->lock, flags);

203

raw_spin_lock_irqsave(&rnp->lock, flags);

204

t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;

204

t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;

205

t->rcu_blocked_node = rnp;

205

t->rcu_blocked_node = rnp;

206

207

/*

207

/*

208

* If this CPU has already checked in, then this task

208

* If this CPU has already checked in, then this task

209

* will hold up the next grace period rather than the

209

* will hold up the next grace period rather than the

210

* current grace period. Queue the task accordingly.

210

* current grace period. Queue the task accordingly.

211

* If the task is queued for the current grace period

211

* If the task is queued for the current grace period

212

* (i.e., this CPU has not yet passed through a quiescent

212

* (i.e., this CPU has not yet passed through a quiescent

213

* state for the current grace period), then as long

213

* state for the current grace period), then as long

214

* as that task remains queued, the current grace period

214

* as that task remains queued, the current grace period

215

* cannot end. Note that there is some uncertainty as

215

* cannot end. Note that there is some uncertainty as

216

* to exactly when the current grace period started.

216

* to exactly when the current grace period started.

217

* We take a conservative approach, which can result

217

* We take a conservative approach, which can result

218

* in unnecessarily waiting on tasks that started very

218

* in unnecessarily waiting on tasks that started very

219

* slightly after the current grace period began. C'est

219

* slightly after the current grace period began. C'est

220

* la vie!!!

220

* la vie!!!

221

*

221

*

222

* But first, note that the current CPU must still be

222

* But first, note that the current CPU must still be

223

* on line!

223

* on line!

224

*/

224

*/

225

WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);

225

WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);

226

WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));

226

WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));

227

if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {

227

if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {

228

list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);

228

list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);

229

rnp->gp_tasks = &t->rcu_node_entry;

229

rnp->gp_tasks = &t->rcu_node_entry;

230

#ifdef CONFIG_RCU_BOOST

230

#ifdef CONFIG_RCU_BOOST

231

if (rnp->boost_tasks != NULL)

231

if (rnp->boost_tasks != NULL)

232

rnp->boost_tasks = rnp->gp_tasks;

232

rnp->boost_tasks = rnp->gp_tasks;

233

#endif /* #ifdef CONFIG_RCU_BOOST */

233

#endif /* #ifdef CONFIG_RCU_BOOST */

234

} else {

234

} else {

235

list_add(&t->rcu_node_entry, &rnp->blkd_tasks);

235

list_add(&t->rcu_node_entry, &rnp->blkd_tasks);

236

if (rnp->qsmask & rdp->grpmask)

236

if (rnp->qsmask & rdp->grpmask)

237

rnp->gp_tasks = &t->rcu_node_entry;

237

rnp->gp_tasks = &t->rcu_node_entry;

238

}

238

}

239

trace_rcu_preempt_task(rdp->rsp->name,

239

trace_rcu_preempt_task(rdp->rsp->name,

240

t->pid,

240

t->pid,

241

(rnp->qsmask & rdp->grpmask)

241

(rnp->qsmask & rdp->grpmask)

242

? rnp->gpnum

242

? rnp->gpnum

243

: rnp->gpnum + 1);

243

: rnp->gpnum + 1);

244

raw_spin_unlock_irqrestore(&rnp->lock, flags);

244

raw_spin_unlock_irqrestore(&rnp->lock, flags);

245

} else if (t->rcu_read_lock_nesting < 0 &&

245

} else if (t->rcu_read_lock_nesting < 0 &&

246

t->rcu_read_unlock_special) {

246

t->rcu_read_unlock_special) {

247

248

/*

248

/*

249

* Complete exit from RCU read-side critical section on

249

* Complete exit from RCU read-side critical section on

250

* behalf of preempted instance of __rcu_read_unlock().

250

* behalf of preempted instance of __rcu_read_unlock().

251

*/

251

*/

252

rcu_read_unlock_special(t);

252

rcu_read_unlock_special(t);

253

}

253

}

254

255

/*

255

/*

256

* Either we were not in an RCU read-side critical section to

256

* Either we were not in an RCU read-side critical section to

257

* begin with, or we have now recorded that critical section

257

* begin with, or we have now recorded that critical section

258

* globally. Either way, we can now note a quiescent state

258

* globally. Either way, we can now note a quiescent state

259

* for this CPU. Again, if we were in an RCU read-side critical

259

* for this CPU. Again, if we were in an RCU read-side critical

260

* section, and if that critical section was blocking the current

260

* section, and if that critical section was blocking the current

261

* grace period, then the fact that the task has been enqueued

261

* grace period, then the fact that the task has been enqueued

262

* means that we continue to block the current grace period.

262

* means that we continue to block the current grace period.

263

*/

263

*/

264

local_irq_save(flags);

264

local_irq_save(flags);

265

rcu_preempt_qs(cpu);

265

rcu_preempt_qs(cpu);

266

local_irq_restore(flags);

266

local_irq_restore(flags);

267

}

267

}

268

269

/*

269

/*

270

* Check for preempted RCU readers blocking the current grace period

270

* Check for preempted RCU readers blocking the current grace period

271

* for the specified rcu_node structure. If the caller needs a reliable

271

* for the specified rcu_node structure. If the caller needs a reliable

272

* answer, it must hold the rcu_node's ->lock.

272

* answer, it must hold the rcu_node's ->lock.

273

*/

273

*/

274

static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)

274

static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)

275

{

275

{

276

return rnp->gp_tasks != NULL;

276

return rnp->gp_tasks != NULL;

277

}

277

}

278

279

/*

279

/*

280

* Record a quiescent state for all tasks that were previously queued

280

* Record a quiescent state for all tasks that were previously queued

281

* on the specified rcu_node structure and that were blocking the current

281

* on the specified rcu_node structure and that were blocking the current

282

* RCU grace period. The caller must hold the specified rnp->lock with

282

* RCU grace period. The caller must hold the specified rnp->lock with

283

* irqs disabled, and this lock is released upon return, but irqs remain

283

* irqs disabled, and this lock is released upon return, but irqs remain

284

* disabled.

284

* disabled.

285

*/

285

*/

286

static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)

286

static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)

287

__releases(rnp->lock)

287

__releases(rnp->lock)

288

{

288

{

289

unsigned long mask;

289

unsigned long mask;

290

struct rcu_node *rnp_p;

290

struct rcu_node *rnp_p;

291

292

if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {

292

if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {

293

raw_spin_unlock_irqrestore(&rnp->lock, flags);

293

raw_spin_unlock_irqrestore(&rnp->lock, flags);

294

return; /* Still need more quiescent states! */

294

return; /* Still need more quiescent states! */

295

}

295

}

296

297

rnp_p = rnp->parent;

297

rnp_p = rnp->parent;

298

if (rnp_p == NULL) {

298

if (rnp_p == NULL) {

299

/*

299

/*

300

* Either there is only one rcu_node in the tree,

300

* Either there is only one rcu_node in the tree,

301

* or tasks were kicked up to root rcu_node due to

301

* or tasks were kicked up to root rcu_node due to

302

* CPUs going offline.

302

* CPUs going offline.

303

*/

303

*/

304

rcu_report_qs_rsp(&rcu_preempt_state, flags);

304

rcu_report_qs_rsp(&rcu_preempt_state, flags);

305

return;

305

return;

306

}

306

}

307

308

/* Report up the rest of the hierarchy. */

308

/* Report up the rest of the hierarchy. */

309

mask = rnp->grpmask;

309

mask = rnp->grpmask;

310

raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */

310

raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */

311

raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */

311

raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */

312

rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);

312

rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);

313

}

313

}

314

315

/*

315

/*

316

* Advance a ->blkd_tasks-list pointer to the next entry, instead

316

* Advance a ->blkd_tasks-list pointer to the next entry, instead

317

* returning NULL if at the end of the list.

317

* returning NULL if at the end of the list.

318

*/

318

*/

319

static struct list_head *rcu_next_node_entry(struct task_struct *t,

319

static struct list_head *rcu_next_node_entry(struct task_struct *t,

320

struct rcu_node *rnp)

320

struct rcu_node *rnp)

321

{

321

{

322

struct list_head *np;

322

struct list_head *np;

323

324

np = t->rcu_node_entry.next;

324

np = t->rcu_node_entry.next;

325

if (np == &rnp->blkd_tasks)

325

if (np == &rnp->blkd_tasks)

326

np = NULL;

326

np = NULL;

327

return np;

327

return np;

328

}

328

}

329

330

/*

330

/*

331

* Handle special cases during rcu_read_unlock(), such as needing to

331

* Handle special cases during rcu_read_unlock(), such as needing to

332

* notify RCU core processing or task having blocked during the RCU

332

* notify RCU core processing or task having blocked during the RCU

333

* read-side critical section.

333

* read-side critical section.

334

*/

334

*/

335

void rcu_read_unlock_special(struct task_struct *t)

335

void rcu_read_unlock_special(struct task_struct *t)

336

{

336

{

337

int empty;

337

int empty;

338

int empty_exp;

338

int empty_exp;

339

int empty_exp_now;

339

int empty_exp_now;

340

unsigned long flags;

340

unsigned long flags;

341

struct list_head *np;

341

struct list_head *np;

342

#ifdef CONFIG_RCU_BOOST

342

#ifdef CONFIG_RCU_BOOST

343

struct rt_mutex *rbmp = NULL;

343

struct rt_mutex *rbmp = NULL;

344

#endif /* #ifdef CONFIG_RCU_BOOST */

344

#endif /* #ifdef CONFIG_RCU_BOOST */

345

struct rcu_node *rnp;

345

struct rcu_node *rnp;

346

int special;

346

int special;

347

348

/* NMI handlers cannot block and cannot safely manipulate state. */

348

/* NMI handlers cannot block and cannot safely manipulate state. */

349

if (in_nmi())

349

if (in_nmi())

350

return;

350

return;

351

352

local_irq_save(flags);

352

local_irq_save(flags);

353

354

/*

354

/*

355

* If RCU core is waiting for this CPU to exit critical section,

355

* If RCU core is waiting for this CPU to exit critical section,

356

* let it know that we have done so.

356

* let it know that we have done so.

357

*/

357

*/

358

special = t->rcu_read_unlock_special;

358

special = t->rcu_read_unlock_special;

359

if (special & RCU_READ_UNLOCK_NEED_QS) {

359

if (special & RCU_READ_UNLOCK_NEED_QS) {

360

rcu_preempt_qs(smp_processor_id());

360

rcu_preempt_qs(smp_processor_id());

361

}

361

}

362

363

/* Hardware IRQ handlers cannot block. */

363

/* Hardware IRQ handlers cannot block. */

364

if (in_irq() || in_serving_softirq()) {

364

if (in_irq() || in_serving_softirq()) {

365

local_irq_restore(flags);

365

local_irq_restore(flags);

366

return;

366

return;

367

}

367

}

368

369

/* Clean up if blocked during RCU read-side critical section. */

369

/* Clean up if blocked during RCU read-side critical section. */

370

if (special & RCU_READ_UNLOCK_BLOCKED) {

370

if (special & RCU_READ_UNLOCK_BLOCKED) {

371

t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;

371

t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;

372

373

/*

373

/*

374

* Remove this task from the list it blocked on. The

374

* Remove this task from the list it blocked on. The

375

* task can migrate while we acquire the lock, but at

375

* task can migrate while we acquire the lock, but at

376

* most one time. So at most two passes through loop.

376

* most one time. So at most two passes through loop.

377

*/

377

*/

378

for (;;) {

378

for (;;) {

379

rnp = t->rcu_blocked_node;

379

rnp = t->rcu_blocked_node;

380

raw_spin_lock(&rnp->lock); /* irqs already disabled. */

380

raw_spin_lock(&rnp->lock); /* irqs already disabled. */

381

if (rnp == t->rcu_blocked_node)

381

if (rnp == t->rcu_blocked_node)

382

break;

382

break;

383

raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */

383

raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */

384

}

384

}

385

empty = !rcu_preempt_blocked_readers_cgp(rnp);

385

empty = !rcu_preempt_blocked_readers_cgp(rnp);

386

empty_exp = !rcu_preempted_readers_exp(rnp);

386

empty_exp = !rcu_preempted_readers_exp(rnp);

387

smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */

387

smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */

388

np = rcu_next_node_entry(t, rnp);

388

np = rcu_next_node_entry(t, rnp);

389

list_del_init(&t->rcu_node_entry);

389

list_del_init(&t->rcu_node_entry);

390

t->rcu_blocked_node = NULL;

390

t->rcu_blocked_node = NULL;

391

trace_rcu_unlock_preempted_task("rcu_preempt",

391

trace_rcu_unlock_preempted_task("rcu_preempt",

392

rnp->gpnum, t->pid);

392

rnp->gpnum, t->pid);

393

if (&t->rcu_node_entry == rnp->gp_tasks)

393

if (&t->rcu_node_entry == rnp->gp_tasks)

394

rnp->gp_tasks = np;

394

rnp->gp_tasks = np;

395

if (&t->rcu_node_entry == rnp->exp_tasks)

395

if (&t->rcu_node_entry == rnp->exp_tasks)

396

rnp->exp_tasks = np;

396

rnp->exp_tasks = np;

397

#ifdef CONFIG_RCU_BOOST

397

#ifdef CONFIG_RCU_BOOST

398

if (&t->rcu_node_entry == rnp->boost_tasks)

398

if (&t->rcu_node_entry == rnp->boost_tasks)

399

rnp->boost_tasks = np;

399

rnp->boost_tasks = np;

400

/* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */

400

/* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */

401

if (t->rcu_boost_mutex) {

401

if (t->rcu_boost_mutex) {

402

rbmp = t->rcu_boost_mutex;

402

rbmp = t->rcu_boost_mutex;

403

t->rcu_boost_mutex = NULL;

403

t->rcu_boost_mutex = NULL;

404

}

404

}

405

#endif /* #ifdef CONFIG_RCU_BOOST */

405

#endif /* #ifdef CONFIG_RCU_BOOST */

406

407

/*

407

/*

408

* If this was the last task on the current list, and if

408

* If this was the last task on the current list, and if

409

* we aren't waiting on any CPUs, report the quiescent state.

409

* we aren't waiting on any CPUs, report the quiescent state.

410

* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,

410

* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,

411

* so we must take a snapshot of the expedited state.

411

* so we must take a snapshot of the expedited state.

412

*/

412

*/

413

empty_exp_now = !rcu_preempted_readers_exp(rnp);

413

empty_exp_now = !rcu_preempted_readers_exp(rnp);

414

if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {

414

if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {

415

trace_rcu_quiescent_state_report("preempt_rcu",

415

trace_rcu_quiescent_state_report("preempt_rcu",

416

rnp->gpnum,

416

rnp->gpnum,

417

0, rnp->qsmask,

417

0, rnp->qsmask,

418

rnp->level,

418

rnp->level,

419

rnp->grplo,

419

rnp->grplo,

420

rnp->grphi,

420

rnp->grphi,

421

!!rnp->gp_tasks);

421

!!rnp->gp_tasks);

422

rcu_report_unblock_qs_rnp(rnp, flags);

422

rcu_report_unblock_qs_rnp(rnp, flags);

423

} else {

423

} else {

424

raw_spin_unlock_irqrestore(&rnp->lock, flags);

424

raw_spin_unlock_irqrestore(&rnp->lock, flags);

425

}

425

}

426

427

#ifdef CONFIG_RCU_BOOST

427

#ifdef CONFIG_RCU_BOOST

428

/* Unboost if we were boosted. */

428

/* Unboost if we were boosted. */

429

if (rbmp)

429

if (rbmp)

430

rt_mutex_unlock(rbmp);

430

rt_mutex_unlock(rbmp);

431

#endif /* #ifdef CONFIG_RCU_BOOST */

431

#endif /* #ifdef CONFIG_RCU_BOOST */

432

433

/*

433

/*

434

* If this was the last task on the expedited lists,

434

* If this was the last task on the expedited lists,

435

* then we need to report up the rcu_node hierarchy.

435

* then we need to report up the rcu_node hierarchy.

436

*/

436

*/

437

if (!empty_exp && empty_exp_now)

437

if (!empty_exp && empty_exp_now)

438

rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);

438

rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);

439

} else {

439

} else {

440

local_irq_restore(flags);

440

local_irq_restore(flags);

441

}

441

}

442

}

442

}

443

444

#ifdef CONFIG_RCU_CPU_STALL_VERBOSE

444

#ifdef CONFIG_RCU_CPU_STALL_VERBOSE

445

446

/*

446

/*

447

* Dump detailed information for all tasks blocking the current RCU

447

* Dump detailed information for all tasks blocking the current RCU

448

* grace period on the specified rcu_node structure.

448

* grace period on the specified rcu_node structure.

449

*/

449

*/

450

static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)

450

static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)

451

{

451

{

452

unsigned long flags;

452

unsigned long flags;

453

struct task_struct *t;

453

struct task_struct *t;

454

455

raw_spin_lock_irqsave(&rnp->lock, flags);

455

raw_spin_lock_irqsave(&rnp->lock, flags);

456

if (!rcu_preempt_blocked_readers_cgp(rnp)) {

456

if (!rcu_preempt_blocked_readers_cgp(rnp)) {

457

raw_spin_unlock_irqrestore(&rnp->lock, flags);

457

raw_spin_unlock_irqrestore(&rnp->lock, flags);

458

return;

458

return;

459

}

459

}

460

t = list_entry(rnp->gp_tasks,

460

t = list_entry(rnp->gp_tasks,

461

struct task_struct, rcu_node_entry);

461

struct task_struct, rcu_node_entry);

462

list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)

462

list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)

463

sched_show_task(t);

463

sched_show_task(t);

464

raw_spin_unlock_irqrestore(&rnp->lock, flags);

464

raw_spin_unlock_irqrestore(&rnp->lock, flags);

465

}

465

}

466

467

/*

467

/*

468

* Dump detailed information for all tasks blocking the current RCU

468

* Dump detailed information for all tasks blocking the current RCU

469

* grace period.

469

* grace period.

470

*/

470

*/

471

static void rcu_print_detail_task_stall(struct rcu_state *rsp)

471

static void rcu_print_detail_task_stall(struct rcu_state *rsp)

472

{

472

{

473

struct rcu_node *rnp = rcu_get_root(rsp);

473

struct rcu_node *rnp = rcu_get_root(rsp);

474

475

rcu_print_detail_task_stall_rnp(rnp);

475

rcu_print_detail_task_stall_rnp(rnp);

476

rcu_for_each_leaf_node(rsp, rnp)

476

rcu_for_each_leaf_node(rsp, rnp)

477

rcu_print_detail_task_stall_rnp(rnp);

477

rcu_print_detail_task_stall_rnp(rnp);

478

}

478

}

479

480

#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */

480

#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */

481

482

static void rcu_print_detail_task_stall(struct rcu_state *rsp)

482

static void rcu_print_detail_task_stall(struct rcu_state *rsp)

483

{

483

{

484

}

484

}

485

486

#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */

486

#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */

487

488

#ifdef CONFIG_RCU_CPU_STALL_INFO

488

#ifdef CONFIG_RCU_CPU_STALL_INFO

489

490

static void rcu_print_task_stall_begin(struct rcu_node *rnp)

490

static void rcu_print_task_stall_begin(struct rcu_node *rnp)

491

{

491

{

492

pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",

492

pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",

493

rnp->level, rnp->grplo, rnp->grphi);

493

rnp->level, rnp->grplo, rnp->grphi);

494

}

494

}

495

496

static void rcu_print_task_stall_end(void)

496

static void rcu_print_task_stall_end(void)

497

{

497

{

498

pr_cont("\n");

498

pr_cont("\n");

499

}

499

}

500

501

#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */

501

#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */

502

503

static void rcu_print_task_stall_begin(struct rcu_node *rnp)

503

static void rcu_print_task_stall_begin(struct rcu_node *rnp)

504

{

504

{

505

}

505

}

506

507

static void rcu_print_task_stall_end(void)

507

static void rcu_print_task_stall_end(void)

508

{

508

{

509

}

509

}

510

511

#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */

511

#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */

512

513

/*

513

/*

514

* Scan the current list of tasks blocked within RCU read-side critical

514

* Scan the current list of tasks blocked within RCU read-side critical

515

* sections, printing out the tid of each.

515

* sections, printing out the tid of each.

516

*/

516

*/

517

static int rcu_print_task_stall(struct rcu_node *rnp)

517

static int rcu_print_task_stall(struct rcu_node *rnp)

518

{

518

{

519

struct task_struct *t;

519

struct task_struct *t;

520

int ndetected = 0;

520

int ndetected = 0;

521

522

if (!rcu_preempt_blocked_readers_cgp(rnp))

522

if (!rcu_preempt_blocked_readers_cgp(rnp))

523

return 0;

523

return 0;

524

rcu_print_task_stall_begin(rnp);

524

rcu_print_task_stall_begin(rnp);

525

t = list_entry(rnp->gp_tasks,

525

t = list_entry(rnp->gp_tasks,

526

struct task_struct, rcu_node_entry);

526

struct task_struct, rcu_node_entry);

527

list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {

527

list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {

528

pr_cont(" P%d", t->pid);

528

pr_cont(" P%d", t->pid);

529

ndetected++;

529

ndetected++;

530

}

530

}

531

rcu_print_task_stall_end();

531

rcu_print_task_stall_end();

532

return ndetected;

532

return ndetected;

533

}

533

}

534

535

/*

535

/*

536

* Check that the list of blocked tasks for the newly completed grace

536

* Check that the list of blocked tasks for the newly completed grace

537

* period is in fact empty. It is a serious bug to complete a grace

537

* period is in fact empty. It is a serious bug to complete a grace

538

* period that still has RCU readers blocked! This function must be

538

* period that still has RCU readers blocked! This function must be

539

* invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock

539

* invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock

540

* must be held by the caller.

540

* must be held by the caller.

541

*

541

*

542

* Also, if there are blocked tasks on the list, they automatically

542

* Also, if there are blocked tasks on the list, they automatically

543

* block the newly created grace period, so set up ->gp_tasks accordingly.

543

* block the newly created grace period, so set up ->gp_tasks accordingly.

544

*/

544

*/

545

static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)

545

static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)

546

{

546

{

547

WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));

547

WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));

548

if (!list_empty(&rnp->blkd_tasks))

548

if (!list_empty(&rnp->blkd_tasks))

549

rnp->gp_tasks = rnp->blkd_tasks.next;

549

rnp->gp_tasks = rnp->blkd_tasks.next;

550

WARN_ON_ONCE(rnp->qsmask);

550

WARN_ON_ONCE(rnp->qsmask);

551

}

551

}

552

553

#ifdef CONFIG_HOTPLUG_CPU

553

#ifdef CONFIG_HOTPLUG_CPU

554

555

/*

555

/*

556

* Handle tasklist migration for case in which all CPUs covered by the

556

* Handle tasklist migration for case in which all CPUs covered by the

557

* specified rcu_node have gone offline. Move them up to the root

557

* specified rcu_node have gone offline. Move them up to the root

558

* rcu_node. The reason for not just moving them to the immediate

558

* rcu_node. The reason for not just moving them to the immediate

559

* parent is to remove the need for rcu_read_unlock_special() to

559

* parent is to remove the need for rcu_read_unlock_special() to

560

* make more than two attempts to acquire the target rcu_node's lock.

560

* make more than two attempts to acquire the target rcu_node's lock.

561

* Returns true if there were tasks blocking the current RCU grace

561

* Returns true if there were tasks blocking the current RCU grace

562

* period.

562

* period.

563

*

563

*

564

* Returns 1 if there was previously a task blocking the current grace

564

* Returns 1 if there was previously a task blocking the current grace

565

* period on the specified rcu_node structure.

565

* period on the specified rcu_node structure.

566

*

566

*

567

* The caller must hold rnp->lock with irqs disabled.

567

* The caller must hold rnp->lock with irqs disabled.

568

*/

568

*/

569

static int rcu_preempt_offline_tasks(struct rcu_state *rsp,

569

static int rcu_preempt_offline_tasks(struct rcu_state *rsp,

570

struct rcu_node *rnp,

570

struct rcu_node *rnp,

571

struct rcu_data *rdp)

571

struct rcu_data *rdp)

572

{

572

{

573

struct list_head *lp;

573

struct list_head *lp;

574

struct list_head *lp_root;

574

struct list_head *lp_root;

575

int retval = 0;

575

int retval = 0;

576

struct rcu_node *rnp_root = rcu_get_root(rsp);

576

struct rcu_node *rnp_root = rcu_get_root(rsp);

577

struct task_struct *t;

577

struct task_struct *t;

578

579

if (rnp == rnp_root) {

579

if (rnp == rnp_root) {

580

WARN_ONCE(1, "Last CPU thought to be offlined?");

580

WARN_ONCE(1, "Last CPU thought to be offlined?");

581

return 0; /* Shouldn't happen: at least one CPU online. */

581

return 0; /* Shouldn't happen: at least one CPU online. */

582

}

582

}

583

584

/* If we are on an internal node, complain bitterly. */

584

/* If we are on an internal node, complain bitterly. */

585

WARN_ON_ONCE(rnp != rdp->mynode);

585

WARN_ON_ONCE(rnp != rdp->mynode);

586

587

/*

587

/*

588

* Move tasks up to root rcu_node. Don't try to get fancy for

588

* Move tasks up to root rcu_node. Don't try to get fancy for

589

* this corner-case operation -- just put this node's tasks

589

* this corner-case operation -- just put this node's tasks

590

* at the head of the root node's list, and update the root node's

590

* at the head of the root node's list, and update the root node's

591

* ->gp_tasks and ->exp_tasks pointers to those of this node's,

591

* ->gp_tasks and ->exp_tasks pointers to those of this node's,

592

* if non-NULL. This might result in waiting for more tasks than

592

* if non-NULL. This might result in waiting for more tasks than

593

* absolutely necessary, but this is a good performance/complexity

593

* absolutely necessary, but this is a good performance/complexity

594

* tradeoff.

594

* tradeoff.

595

*/

595

*/

596

if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)

596

if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)

597

retval |= RCU_OFL_TASKS_NORM_GP;

597

retval |= RCU_OFL_TASKS_NORM_GP;

598

if (rcu_preempted_readers_exp(rnp))

598

if (rcu_preempted_readers_exp(rnp))

599

retval |= RCU_OFL_TASKS_EXP_GP;

599

retval |= RCU_OFL_TASKS_EXP_GP;

600

lp = &rnp->blkd_tasks;

600

lp = &rnp->blkd_tasks;

601

lp_root = &rnp_root->blkd_tasks;

601

lp_root = &rnp_root->blkd_tasks;

602

while (!list_empty(lp)) {

602

while (!list_empty(lp)) {

603

t = list_entry(lp->next, typeof(*t), rcu_node_entry);

603

t = list_entry(lp->next, typeof(*t), rcu_node_entry);

604

raw_spin_lock(&rnp_root->lock); /* irqs already disabled */

604

raw_spin_lock(&rnp_root->lock); /* irqs already disabled */

605

list_del(&t->rcu_node_entry);

605

list_del(&t->rcu_node_entry);

606

t->rcu_blocked_node = rnp_root;

606

t->rcu_blocked_node = rnp_root;

607

list_add(&t->rcu_node_entry, lp_root);

607

list_add(&t->rcu_node_entry, lp_root);

608

if (&t->rcu_node_entry == rnp->gp_tasks)

608

if (&t->rcu_node_entry == rnp->gp_tasks)

609

rnp_root->gp_tasks = rnp->gp_tasks;

609

rnp_root->gp_tasks = rnp->gp_tasks;

610

if (&t->rcu_node_entry == rnp->exp_tasks)

610

if (&t->rcu_node_entry == rnp->exp_tasks)

611

rnp_root->exp_tasks = rnp->exp_tasks;

611

rnp_root->exp_tasks = rnp->exp_tasks;

612

#ifdef CONFIG_RCU_BOOST

612

#ifdef CONFIG_RCU_BOOST

613

if (&t->rcu_node_entry == rnp->boost_tasks)

613

if (&t->rcu_node_entry == rnp->boost_tasks)

614

rnp_root->boost_tasks = rnp->boost_tasks;

614

rnp_root->boost_tasks = rnp->boost_tasks;

615

#endif /* #ifdef CONFIG_RCU_BOOST */

615

#endif /* #ifdef CONFIG_RCU_BOOST */

616

raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */

616

raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */

617

}

617

}

618

619

rnp->gp_tasks = NULL;

619

rnp->gp_tasks = NULL;

620

rnp->exp_tasks = NULL;

620

rnp->exp_tasks = NULL;

621

#ifdef CONFIG_RCU_BOOST

621

#ifdef CONFIG_RCU_BOOST

622

rnp->boost_tasks = NULL;

622

rnp->boost_tasks = NULL;

623

/*

623

/*

624

* In case root is being boosted and leaf was not. Make sure

624

* In case root is being boosted and leaf was not. Make sure

625

* that we boost the tasks blocking the current grace period

625

* that we boost the tasks blocking the current grace period

626

* in this case.

626

* in this case.

627

*/

627

*/

628

raw_spin_lock(&rnp_root->lock); /* irqs already disabled */

628

raw_spin_lock(&rnp_root->lock); /* irqs already disabled */

629

if (rnp_root->boost_tasks != NULL &&

629

if (rnp_root->boost_tasks != NULL &&

630

rnp_root->boost_tasks != rnp_root->gp_tasks &&

630

rnp_root->boost_tasks != rnp_root->gp_tasks &&

631

rnp_root->boost_tasks != rnp_root->exp_tasks)

631

rnp_root->boost_tasks != rnp_root->exp_tasks)

632

rnp_root->boost_tasks = rnp_root->gp_tasks;

632

rnp_root->boost_tasks = rnp_root->gp_tasks;

633

raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */

633

raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */

634

#endif /* #ifdef CONFIG_RCU_BOOST */

634

#endif /* #ifdef CONFIG_RCU_BOOST */

635

636

return retval;

636

return retval;

637

}

637

}

638

639

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

639

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

640

641

/*

641

/*

642

* Check for a quiescent state from the current CPU. When a task blocks,

642

* Check for a quiescent state from the current CPU. When a task blocks,

643

* the task is recorded in the corresponding CPU's rcu_node structure,

643

* the task is recorded in the corresponding CPU's rcu_node structure,

644

* which is checked elsewhere.

644

* which is checked elsewhere.

645

*

645

*

646

* Caller must disable hard irqs.

646

* Caller must disable hard irqs.

647

*/

647

*/

648

static void rcu_preempt_check_callbacks(int cpu)

648

static void rcu_preempt_check_callbacks(int cpu)

649

{

649

{

650

struct task_struct *t = current;

650

struct task_struct *t = current;

651

652

if (t->rcu_read_lock_nesting == 0) {

652

if (t->rcu_read_lock_nesting == 0) {

653

rcu_preempt_qs(cpu);

653

rcu_preempt_qs(cpu);

654

return;

654

return;

655

}

655

}

656

if (t->rcu_read_lock_nesting > 0 &&

656

if (t->rcu_read_lock_nesting > 0 &&

657

per_cpu(rcu_preempt_data, cpu).qs_pending)

657

per_cpu(rcu_preempt_data, cpu).qs_pending)

658

t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;

658

t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;

659

}

659

}

660

661

#ifdef CONFIG_RCU_BOOST

661

#ifdef CONFIG_RCU_BOOST

662

663

static void rcu_preempt_do_callbacks(void)

663

static void rcu_preempt_do_callbacks(void)

664

{

664

{

665

rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));

665

rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));

666

}

666

}

667

668

#endif /* #ifdef CONFIG_RCU_BOOST */

668

#endif /* #ifdef CONFIG_RCU_BOOST */

669

670

/*

670

/*

671

* Queue a preemptible-RCU callback for invocation after a grace period.

671

* Queue a preemptible-RCU callback for invocation after a grace period.

672

*/

672

*/

673

void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))

673

void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))

674

{

674

{

675

__call_rcu(head, func, &rcu_preempt_state, -1, 0);

675

__call_rcu(head, func, &rcu_preempt_state, -1, 0);

676

}

676

}

677

EXPORT_SYMBOL_GPL(call_rcu);

677

EXPORT_SYMBOL_GPL(call_rcu);

678

679

/*

679

/*

680

* Queue an RCU callback for lazy invocation after a grace period.

680

* Queue an RCU callback for lazy invocation after a grace period.

681

* This will likely be later named something like "call_rcu_lazy()",

681

* This will likely be later named something like "call_rcu_lazy()",

682

* but this change will require some way of tagging the lazy RCU

682

* but this change will require some way of tagging the lazy RCU

683

* callbacks in the list of pending callbacks. Until then, this

683

* callbacks in the list of pending callbacks. Until then, this

684

* function may only be called from __kfree_rcu().

684

* function may only be called from __kfree_rcu().

685

*/

685

*/

686

void kfree_call_rcu(struct rcu_head *head,

686

void kfree_call_rcu(struct rcu_head *head,

687

void (*func)(struct rcu_head *rcu))

687

void (*func)(struct rcu_head *rcu))

688

{

688

{

689

__call_rcu(head, func, &rcu_preempt_state, -1, 1);

689

__call_rcu(head, func, &rcu_preempt_state, -1, 1);

690

}

690

}

691

EXPORT_SYMBOL_GPL(kfree_call_rcu);

691

EXPORT_SYMBOL_GPL(kfree_call_rcu);

692

693

/**

693

/**

694

* synchronize_rcu - wait until a grace period has elapsed.

694

* synchronize_rcu - wait until a grace period has elapsed.

695

*

695

*

696

* Control will return to the caller some time after a full grace

696

* Control will return to the caller some time after a full grace

697

* period has elapsed, in other words after all currently executing RCU

697

* period has elapsed, in other words after all currently executing RCU

698

* read-side critical sections have completed. Note, however, that

698

* read-side critical sections have completed. Note, however, that

699

* upon return from synchronize_rcu(), the caller might well be executing

699

* upon return from synchronize_rcu(), the caller might well be executing

700

* concurrently with new RCU read-side critical sections that began while

700

* concurrently with new RCU read-side critical sections that began while

701

* synchronize_rcu() was waiting. RCU read-side critical sections are

701

* synchronize_rcu() was waiting. RCU read-side critical sections are

702

* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.

702

* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.

703

*

703

*

704

* See the description of synchronize_sched() for more detailed information

704

* See the description of synchronize_sched() for more detailed information

705

* on memory ordering guarantees.

705

* on memory ordering guarantees.

706

*/

706

*/

707

void synchronize_rcu(void)

707

void synchronize_rcu(void)

708

{

708

{

709

rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&

709

rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&

710

!lock_is_held(&rcu_lock_map) &&

710

!lock_is_held(&rcu_lock_map) &&

711

!lock_is_held(&rcu_sched_lock_map),

711

!lock_is_held(&rcu_sched_lock_map),

712

"Illegal synchronize_rcu() in RCU read-side critical section");

712

"Illegal synchronize_rcu() in RCU read-side critical section");

713

if (!rcu_scheduler_active)

713

if (!rcu_scheduler_active)

714

return;

714

return;

715

if (rcu_expedited)

715

if (rcu_expedited)

716

synchronize_rcu_expedited();

716

synchronize_rcu_expedited();

717

else

717

else

718

wait_rcu_gp(call_rcu);

718

wait_rcu_gp(call_rcu);

719

}

719

}

720

EXPORT_SYMBOL_GPL(synchronize_rcu);

720

EXPORT_SYMBOL_GPL(synchronize_rcu);

721

722

static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);

722

static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);

723

static unsigned long sync_rcu_preempt_exp_count;

723

static unsigned long sync_rcu_preempt_exp_count;

724

static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);

724

static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);

725

726

/*

726

/*

727

* Return non-zero if there are any tasks in RCU read-side critical

727

* Return non-zero if there are any tasks in RCU read-side critical

728

* sections blocking the current preemptible-RCU expedited grace period.

728

* sections blocking the current preemptible-RCU expedited grace period.

729

* If there is no preemptible-RCU expedited grace period currently in

729

* If there is no preemptible-RCU expedited grace period currently in

730

* progress, returns zero unconditionally.

730

* progress, returns zero unconditionally.

731

*/

731

*/

732

static int rcu_preempted_readers_exp(struct rcu_node *rnp)

732

static int rcu_preempted_readers_exp(struct rcu_node *rnp)

733

{

733

{

734

return rnp->exp_tasks != NULL;

734

return rnp->exp_tasks != NULL;

735

}

735

}

736

737

/*

737

/*

738

* return non-zero if there is no RCU expedited grace period in progress

738

* return non-zero if there is no RCU expedited grace period in progress

739

* for the specified rcu_node structure, in other words, if all CPUs and

739

* for the specified rcu_node structure, in other words, if all CPUs and

740

* tasks covered by the specified rcu_node structure have done their bit

740

* tasks covered by the specified rcu_node structure have done their bit

741

* for the current expedited grace period. Works only for preemptible

741

* for the current expedited grace period. Works only for preemptible

742

* RCU -- other RCU implementation use other means.

742

* RCU -- other RCU implementation use other means.

743

*

743

*

744

* Caller must hold sync_rcu_preempt_exp_mutex.

744

* Caller must hold sync_rcu_preempt_exp_mutex.

745

*/

745

*/

746

static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)

746

static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)

747

{

747

{

748

return !rcu_preempted_readers_exp(rnp) &&

748

return !rcu_preempted_readers_exp(rnp) &&

749

ACCESS_ONCE(rnp->expmask) == 0;

749

ACCESS_ONCE(rnp->expmask) == 0;

750

}

750

}

751

752

/*

752

/*

753

* Report the exit from RCU read-side critical section for the last task

753

* Report the exit from RCU read-side critical section for the last task

754

* that queued itself during or before the current expedited preemptible-RCU

754

* that queued itself during or before the current expedited preemptible-RCU

755

* grace period. This event is reported either to the rcu_node structure on

755

* grace period. This event is reported either to the rcu_node structure on

756

* which the task was queued or to one of that rcu_node structure's ancestors,

756

* which the task was queued or to one of that rcu_node structure's ancestors,

757

* recursively up the tree. (Calm down, calm down, we do the recursion

757

* recursively up the tree. (Calm down, calm down, we do the recursion

758

* iteratively!)

758

* iteratively!)

759

*

759

*

760

* Most callers will set the "wake" flag, but the task initiating the

760

* Most callers will set the "wake" flag, but the task initiating the

761

* expedited grace period need not wake itself.

761

* expedited grace period need not wake itself.

762

*

762

*

763

* Caller must hold sync_rcu_preempt_exp_mutex.

763

* Caller must hold sync_rcu_preempt_exp_mutex.

764

*/

764

*/

765

static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,

765

static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,

766

bool wake)

766

bool wake)

767

{

767

{

768

unsigned long flags;

768

unsigned long flags;

769

unsigned long mask;

769

unsigned long mask;

770

771

raw_spin_lock_irqsave(&rnp->lock, flags);

771

raw_spin_lock_irqsave(&rnp->lock, flags);

772

for (;;) {

772

for (;;) {

773

if (!sync_rcu_preempt_exp_done(rnp)) {

773

if (!sync_rcu_preempt_exp_done(rnp)) {

774

raw_spin_unlock_irqrestore(&rnp->lock, flags);

774

raw_spin_unlock_irqrestore(&rnp->lock, flags);

775

break;

775

break;

776

}

776

}

777

if (rnp->parent == NULL) {

777

if (rnp->parent == NULL) {

778

raw_spin_unlock_irqrestore(&rnp->lock, flags);

778

raw_spin_unlock_irqrestore(&rnp->lock, flags);

779

if (wake)

779

if (wake)

780

wake_up(&sync_rcu_preempt_exp_wq);

780

wake_up(&sync_rcu_preempt_exp_wq);

781

break;

781

break;

782

}

782

}

783

mask = rnp->grpmask;

783

mask = rnp->grpmask;

784

raw_spin_unlock(&rnp->lock); /* irqs remain disabled */

784

raw_spin_unlock(&rnp->lock); /* irqs remain disabled */

785

rnp = rnp->parent;

785

rnp = rnp->parent;

786

raw_spin_lock(&rnp->lock); /* irqs already disabled */

786

raw_spin_lock(&rnp->lock); /* irqs already disabled */

787

rnp->expmask &= ~mask;

787

rnp->expmask &= ~mask;

788

}

788

}

789

}

789

}

790

791

/*

791

/*

792

* Snapshot the tasks blocking the newly started preemptible-RCU expedited

792

* Snapshot the tasks blocking the newly started preemptible-RCU expedited

793

* grace period for the specified rcu_node structure. If there are no such

793

* grace period for the specified rcu_node structure. If there are no such

794

* tasks, report it up the rcu_node hierarchy.

794

* tasks, report it up the rcu_node hierarchy.

795

*

795

*

796

* Caller must hold sync_rcu_preempt_exp_mutex and must exclude

796

* Caller must hold sync_rcu_preempt_exp_mutex and must exclude

797

* CPU hotplug operations.

797

* CPU hotplug operations.

798

*/

798

*/

799

static void

799

static void

800

sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)

800

sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)

801

{

801

{

802

unsigned long flags;

802

unsigned long flags;

803

int must_wait = 0;

803

int must_wait = 0;

804

805

raw_spin_lock_irqsave(&rnp->lock, flags);

805

raw_spin_lock_irqsave(&rnp->lock, flags);

806

if (list_empty(&rnp->blkd_tasks)) {

806

if (list_empty(&rnp->blkd_tasks)) {

807

raw_spin_unlock_irqrestore(&rnp->lock, flags);

807

raw_spin_unlock_irqrestore(&rnp->lock, flags);

808

} else {

808

} else {

809

rnp->exp_tasks = rnp->blkd_tasks.next;

809

rnp->exp_tasks = rnp->blkd_tasks.next;

810

rcu_initiate_boost(rnp, flags); /* releases rnp->lock */

810

rcu_initiate_boost(rnp, flags); /* releases rnp->lock */

811

must_wait = 1;

811

must_wait = 1;

812

}

812

}

813

if (!must_wait)

813

if (!must_wait)

814

rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */

814

rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */

815

}

815

}

816

817

/**

817

/**

818

* synchronize_rcu_expedited - Brute-force RCU grace period

818

* synchronize_rcu_expedited - Brute-force RCU grace period

819

*

819

*

820

* Wait for an RCU-preempt grace period, but expedite it. The basic

820

* Wait for an RCU-preempt grace period, but expedite it. The basic

821

* idea is to invoke synchronize_sched_expedited() to push all the tasks to

821

* idea is to invoke synchronize_sched_expedited() to push all the tasks to

822

* the ->blkd_tasks lists and wait for this list to drain. This consumes

822

* the ->blkd_tasks lists and wait for this list to drain. This consumes

823

* significant time on all CPUs and is unfriendly to real-time workloads,

823

* significant time on all CPUs and is unfriendly to real-time workloads,

824

* so is thus not recommended for any sort of common-case code.

824

* so is thus not recommended for any sort of common-case code.

825

* In fact, if you are using synchronize_rcu_expedited() in a loop,

825

* In fact, if you are using synchronize_rcu_expedited() in a loop,

826

* please restructure your code to batch your updates, and then Use a

826

* please restructure your code to batch your updates, and then Use a

827

* single synchronize_rcu() instead.

827

* single synchronize_rcu() instead.

828

*

828

*

829

* Note that it is illegal to call this function while holding any lock

829

* Note that it is illegal to call this function while holding any lock

830

* that is acquired by a CPU-hotplug notifier. And yes, it is also illegal

830

* that is acquired by a CPU-hotplug notifier. And yes, it is also illegal

831

* to call this function from a CPU-hotplug notifier. Failing to observe

831

* to call this function from a CPU-hotplug notifier. Failing to observe

832

* these restriction will result in deadlock.

832

* these restriction will result in deadlock.

833

*/

833

*/

834

void synchronize_rcu_expedited(void)

834

void synchronize_rcu_expedited(void)

835

{

835

{

836

unsigned long flags;

836

unsigned long flags;

837

struct rcu_node *rnp;

837

struct rcu_node *rnp;

838

struct rcu_state *rsp = &rcu_preempt_state;

838

struct rcu_state *rsp = &rcu_preempt_state;

839

unsigned long snap;

839

unsigned long snap;

840

int trycount = 0;

840

int trycount = 0;

841

842

smp_mb(); /* Caller's modifications seen first by other CPUs. */

842

smp_mb(); /* Caller's modifications seen first by other CPUs. */

843

snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;

843

snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;

844

smp_mb(); /* Above access cannot bleed into critical section. */

844

smp_mb(); /* Above access cannot bleed into critical section. */

845

846

/*

846

/*

847

* Block CPU-hotplug operations. This means that any CPU-hotplug

847

* Block CPU-hotplug operations. This means that any CPU-hotplug

848

* operation that finds an rcu_node structure with tasks in the

848

* operation that finds an rcu_node structure with tasks in the

849

* process of being boosted will know that all tasks blocking

849

* process of being boosted will know that all tasks blocking

850

* this expedited grace period will already be in the process of

850

* this expedited grace period will already be in the process of

851

* being boosted. This simplifies the process of moving tasks

851

* being boosted. This simplifies the process of moving tasks

852

* from leaf to root rcu_node structures.

852

* from leaf to root rcu_node structures.

853

*/

853

*/

854

get_online_cpus();

854

get_online_cpus();

855

856

/*

856

/*

857

* Acquire lock, falling back to synchronize_rcu() if too many

857

* Acquire lock, falling back to synchronize_rcu() if too many

858

* lock-acquisition failures. Of course, if someone does the

858

* lock-acquisition failures. Of course, if someone does the

859

* expedited grace period for us, just leave.

859

* expedited grace period for us, just leave.

860

*/

860

*/

861

while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {

861

while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {

862

if (ULONG_CMP_LT(snap,

862

if (ULONG_CMP_LT(snap,

863

ACCESS_ONCE(sync_rcu_preempt_exp_count))) {

863

ACCESS_ONCE(sync_rcu_preempt_exp_count))) {

864

put_online_cpus();

864

put_online_cpus();

865

goto mb_ret; /* Others did our work for us. */

865

goto mb_ret; /* Others did our work for us. */

866

}

866

}

867

if (trycount++ < 10) {

867

if (trycount++ < 10) {

868

udelay(trycount * num_online_cpus());

868

udelay(trycount * num_online_cpus());

869

} else {

869

} else {

870

put_online_cpus();

870

put_online_cpus();

871

wait_rcu_gp(call_rcu);

871

wait_rcu_gp(call_rcu);

872

return;

872

return;

873

}

873

}

874

}

874

}

875

if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {

875

if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {

876

put_online_cpus();

876

put_online_cpus();

877

goto unlock_mb_ret; /* Others did our work for us. */

877

goto unlock_mb_ret; /* Others did our work for us. */

878

}

878

}

879

880

/* force all RCU readers onto ->blkd_tasks lists. */

880

/* force all RCU readers onto ->blkd_tasks lists. */

881

synchronize_sched_expedited();

881

synchronize_sched_expedited();

882

883

/* Initialize ->expmask for all non-leaf rcu_node structures. */

883

/* Initialize ->expmask for all non-leaf rcu_node structures. */

884

rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {

884

rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {

885

raw_spin_lock_irqsave(&rnp->lock, flags);

885

raw_spin_lock_irqsave(&rnp->lock, flags);

886

rnp->expmask = rnp->qsmaskinit;

886

rnp->expmask = rnp->qsmaskinit;

887

raw_spin_unlock_irqrestore(&rnp->lock, flags);

887

raw_spin_unlock_irqrestore(&rnp->lock, flags);

888

}

888

}

889

890

/* Snapshot current state of ->blkd_tasks lists. */

890

/* Snapshot current state of ->blkd_tasks lists. */

891

rcu_for_each_leaf_node(rsp, rnp)

891

rcu_for_each_leaf_node(rsp, rnp)

892

sync_rcu_preempt_exp_init(rsp, rnp);

892

sync_rcu_preempt_exp_init(rsp, rnp);

893

if (NUM_RCU_NODES > 1)

893

if (NUM_RCU_NODES > 1)

894

sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));

894

sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));

895

896

put_online_cpus();

896

put_online_cpus();

897

898

/* Wait for snapshotted ->blkd_tasks lists to drain. */

898

/* Wait for snapshotted ->blkd_tasks lists to drain. */

899

rnp = rcu_get_root(rsp);

899

rnp = rcu_get_root(rsp);

900

wait_event(sync_rcu_preempt_exp_wq,

900

wait_event(sync_rcu_preempt_exp_wq,

901

sync_rcu_preempt_exp_done(rnp));

901

sync_rcu_preempt_exp_done(rnp));

902

903

/* Clean up and exit. */

903

/* Clean up and exit. */

904

smp_mb(); /* ensure expedited GP seen before counter increment. */

904

smp_mb(); /* ensure expedited GP seen before counter increment. */

905

ACCESS_ONCE(sync_rcu_preempt_exp_count)++;

905

ACCESS_ONCE(sync_rcu_preempt_exp_count)++;

906

unlock_mb_ret:

906

unlock_mb_ret:

907

mutex_unlock(&sync_rcu_preempt_exp_mutex);

907

mutex_unlock(&sync_rcu_preempt_exp_mutex);

908

mb_ret:

908

mb_ret:

909

smp_mb(); /* ensure subsequent action seen after grace period. */

909

smp_mb(); /* ensure subsequent action seen after grace period. */

910

}

910

}

911

EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

911

EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

912

913

/**

913

/**

914

* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.

914

* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.

915

*

915

*

916

* Note that this primitive does not necessarily wait for an RCU grace period

916

* Note that this primitive does not necessarily wait for an RCU grace period

917

* to complete. For example, if there are no RCU callbacks queued anywhere

917

* to complete. For example, if there are no RCU callbacks queued anywhere

918

* in the system, then rcu_barrier() is within its rights to return

918

* in the system, then rcu_barrier() is within its rights to return

919

* immediately, without waiting for anything, much less an RCU grace period.

919

* immediately, without waiting for anything, much less an RCU grace period.

920

*/

920

*/

921

void rcu_barrier(void)

921

void rcu_barrier(void)

922

{

922

{

923

_rcu_barrier(&rcu_preempt_state);

923

_rcu_barrier(&rcu_preempt_state);

924

}

924

}

925

EXPORT_SYMBOL_GPL(rcu_barrier);

925

EXPORT_SYMBOL_GPL(rcu_barrier);

926

927

/*

927

/*

928

* Initialize preemptible RCU's state structures.

928

* Initialize preemptible RCU's state structures.

929

*/

929

*/

930

static void __init __rcu_init_preempt(void)

930

static void __init __rcu_init_preempt(void)

931

{

931

{

932

rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);

932

rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);

933

}

933

}

934

935

/*

936

* Check for a task exiting while in a preemptible-RCU read-side

937

* critical section, clean up if so. No need to issue warnings,

938

* as debug_check_no_locks_held() already does this if lockdep

939

* is enabled.

940

*/

941

void exit_rcu(void)

942

{

943

struct task_struct *t = current;

944

945

if (likely(list_empty(&current->rcu_node_entry)))

946

return;

947

t->rcu_read_lock_nesting = 1;

948

barrier();

949

t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;

950

__rcu_read_unlock();

951

}

952

935

#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */

953

#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */

936

954

937

static struct rcu_state *rcu_state = &rcu_sched_state;

955

static struct rcu_state *rcu_state = &rcu_sched_state;

938

956

939

/*

957

/*

940

* Tell them what RCU they are running.

958

* Tell them what RCU they are running.

941

*/

959

*/

942

static void __init rcu_bootup_announce(void)

960

static void __init rcu_bootup_announce(void)

943

{

961

{

944

pr_info("Hierarchical RCU implementation.\n");

962

pr_info("Hierarchical RCU implementation.\n");

945

rcu_bootup_announce_oddness();

963

rcu_bootup_announce_oddness();

946

}

964

}

947

965

948

/*

966

/*

949

* Return the number of RCU batches processed thus far for debug & stats.

967

* Return the number of RCU batches processed thus far for debug & stats.

950

*/

968

*/

951

long rcu_batches_completed(void)

969

long rcu_batches_completed(void)

952

{

970

{

953

return rcu_batches_completed_sched();

971

return rcu_batches_completed_sched();

954

}

972

}

955

EXPORT_SYMBOL_GPL(rcu_batches_completed);

973

EXPORT_SYMBOL_GPL(rcu_batches_completed);

956

974

957

/*

975

/*

958

* Force a quiescent state for RCU, which, because there is no preemptible

976

* Force a quiescent state for RCU, which, because there is no preemptible

959

* RCU, becomes the same as rcu-sched.

977

* RCU, becomes the same as rcu-sched.

960

*/

978

*/

961

void rcu_force_quiescent_state(void)

979

void rcu_force_quiescent_state(void)

962

{

980

{

963

rcu_sched_force_quiescent_state();

981

rcu_sched_force_quiescent_state();

964

}

982

}

965

EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);

983

EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);

966

984

967

/*

985

/*

968

* Because preemptible RCU does not exist, we never have to check for

986

* Because preemptible RCU does not exist, we never have to check for

969

* CPUs being in quiescent states.

987

* CPUs being in quiescent states.

970

*/

988

*/

971

static void rcu_preempt_note_context_switch(int cpu)

989

static void rcu_preempt_note_context_switch(int cpu)

972

{

990

{

973

}

991

}

974

992

975

/*

993

/*

976

* Because preemptible RCU does not exist, there are never any preempted

994

* Because preemptible RCU does not exist, there are never any preempted

977

* RCU readers.

995

* RCU readers.

978

*/

996

*/

979

static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)

997

static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)

980

{

998

{

981

return 0;

999

return 0;

982

}

1000

}

983

1001

984

#ifdef CONFIG_HOTPLUG_CPU

1002

#ifdef CONFIG_HOTPLUG_CPU

985

1003

986

/* Because preemptible RCU does not exist, no quieting of tasks. */

1004

/* Because preemptible RCU does not exist, no quieting of tasks. */

987

static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)

1005

static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)

988

{

1006

{

989

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1007

raw_spin_unlock_irqrestore(&rnp->lock, flags);

990

}

1008

}

991

1009

992

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

1010

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

993

1011

994

/*

1012

/*

995

* Because preemptible RCU does not exist, we never have to check for

1013

* Because preemptible RCU does not exist, we never have to check for

996

* tasks blocked within RCU read-side critical sections.

1014

* tasks blocked within RCU read-side critical sections.

997

*/

1015

*/

998

static void rcu_print_detail_task_stall(struct rcu_state *rsp)

1016

static void rcu_print_detail_task_stall(struct rcu_state *rsp)

999

{

1017

{

1000

}

1018

}

1001

1019

1002

/*

1020

/*

1003

* Because preemptible RCU does not exist, we never have to check for

1021

* Because preemptible RCU does not exist, we never have to check for

1004

* tasks blocked within RCU read-side critical sections.

1022

* tasks blocked within RCU read-side critical sections.

1005

*/

1023

*/

1006

static int rcu_print_task_stall(struct rcu_node *rnp)

1024

static int rcu_print_task_stall(struct rcu_node *rnp)

1007

{

1025

{

1008

return 0;

1026

return 0;

1009

}

1027

}

1010

1028

1011

/*

1029

/*

1012

* Because there is no preemptible RCU, there can be no readers blocked,

1030

* Because there is no preemptible RCU, there can be no readers blocked,

1013

* so there is no need to check for blocked tasks. So check only for

1031

* so there is no need to check for blocked tasks. So check only for

1014

* bogus qsmask values.

1032

* bogus qsmask values.

1015

*/

1033

*/

1016

static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)

1034

static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)

1017

{

1035

{

1018

WARN_ON_ONCE(rnp->qsmask);

1036

WARN_ON_ONCE(rnp->qsmask);

1019

}

1037

}

1020

1038

1021

#ifdef CONFIG_HOTPLUG_CPU

1039

#ifdef CONFIG_HOTPLUG_CPU

1022

1040

1023

/*

1041

/*

1024

* Because preemptible RCU does not exist, it never needs to migrate

1042

* Because preemptible RCU does not exist, it never needs to migrate

1025

* tasks that were blocked within RCU read-side critical sections, and

1043

* tasks that were blocked within RCU read-side critical sections, and

1026

* such non-existent tasks cannot possibly have been blocking the current

1044

* such non-existent tasks cannot possibly have been blocking the current

1027

* grace period.

1045

* grace period.

1028

*/

1046

*/

1029

static int rcu_preempt_offline_tasks(struct rcu_state *rsp,

1047

static int rcu_preempt_offline_tasks(struct rcu_state *rsp,

1030

struct rcu_node *rnp,

1048

struct rcu_node *rnp,

1031

struct rcu_data *rdp)

1049

struct rcu_data *rdp)

1032

{

1050

{

1033

return 0;

1051

return 0;

1034

}

1052

}

1035

1053

1036

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

1054

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

1037

1055

1038

/*

1056

/*

1039

* Because preemptible RCU does not exist, it never has any callbacks

1057

* Because preemptible RCU does not exist, it never has any callbacks

1040

* to check.

1058

* to check.

1041

*/

1059

*/

1042

static void rcu_preempt_check_callbacks(int cpu)

1060

static void rcu_preempt_check_callbacks(int cpu)

1043

{

1061

{

1044

}

1062

}

1045

1063

1046

/*

1064

/*

1047

* Queue an RCU callback for lazy invocation after a grace period.

1065

* Queue an RCU callback for lazy invocation after a grace period.

1048

* This will likely be later named something like "call_rcu_lazy()",

1066

* This will likely be later named something like "call_rcu_lazy()",

1049

* but this change will require some way of tagging the lazy RCU

1067

* but this change will require some way of tagging the lazy RCU

1050

* callbacks in the list of pending callbacks. Until then, this

1068

* callbacks in the list of pending callbacks. Until then, this

1051

* function may only be called from __kfree_rcu().

1069

* function may only be called from __kfree_rcu().

1052

*

1070

*

1053

* Because there is no preemptible RCU, we use RCU-sched instead.

1071

* Because there is no preemptible RCU, we use RCU-sched instead.

1054

*/

1072

*/

1055

void kfree_call_rcu(struct rcu_head *head,

1073

void kfree_call_rcu(struct rcu_head *head,

1056

void (*func)(struct rcu_head *rcu))

1074

void (*func)(struct rcu_head *rcu))

1057

{

1075

{

1058

__call_rcu(head, func, &rcu_sched_state, -1, 1);

1076

__call_rcu(head, func, &rcu_sched_state, -1, 1);

1059

}

1077

}

1060

EXPORT_SYMBOL_GPL(kfree_call_rcu);

1078

EXPORT_SYMBOL_GPL(kfree_call_rcu);

1061

1079

1062

/*

1080

/*

1063

* Wait for an rcu-preempt grace period, but make it happen quickly.

1081

* Wait for an rcu-preempt grace period, but make it happen quickly.

1064

* But because preemptible RCU does not exist, map to rcu-sched.

1082

* But because preemptible RCU does not exist, map to rcu-sched.

1065

*/

1083

*/

1066

void synchronize_rcu_expedited(void)

1084

void synchronize_rcu_expedited(void)

1067

{

1085

{

1068

synchronize_sched_expedited();

1086

synchronize_sched_expedited();

1069

}

1087

}

1070

EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

1088

EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

1071

1089

1072

#ifdef CONFIG_HOTPLUG_CPU

1090

#ifdef CONFIG_HOTPLUG_CPU

1073

1091

1074

/*

1092

/*

1075

* Because preemptible RCU does not exist, there is never any need to

1093

* Because preemptible RCU does not exist, there is never any need to

1076

* report on tasks preempted in RCU read-side critical sections during

1094

* report on tasks preempted in RCU read-side critical sections during

1077

* expedited RCU grace periods.

1095

* expedited RCU grace periods.

1078

*/

1096

*/

1079

static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,

1097

static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,

1080

bool wake)

1098

bool wake)

1081

{

1099

{

1082

}

1100

}

1083

1101

1084

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

1102

#endif /* #ifdef CONFIG_HOTPLUG_CPU */

1085

1103

1086

/*

1104

/*

1087

* Because preemptible RCU does not exist, rcu_barrier() is just

1105

* Because preemptible RCU does not exist, rcu_barrier() is just

1088

* another name for rcu_barrier_sched().

1106

* another name for rcu_barrier_sched().

1089

*/

1107

*/

1090

void rcu_barrier(void)

1108

void rcu_barrier(void)

1091

{

1109

{

1092

rcu_barrier_sched();

1110

rcu_barrier_sched();

1093

}

1111

}

1094

EXPORT_SYMBOL_GPL(rcu_barrier);

1112

EXPORT_SYMBOL_GPL(rcu_barrier);

1095

1113

1096

/*

1114

/*

1097

* Because preemptible RCU does not exist, it need not be initialized.

1115

* Because preemptible RCU does not exist, it need not be initialized.

1098

*/

1116

*/

1099

static void __init __rcu_init_preempt(void)

1117

static void __init __rcu_init_preempt(void)

1118

{

1119

}

1120

1121

/*

1122

* Because preemptible RCU does not exist, tasks cannot possibly exit

1123

* while in preemptible RCU read-side critical sections.

1124

*/

1125

void exit_rcu(void)

1100

{

1126

{

1101

}

1127

}

1102

1128

1103

#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */

1129

#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */

1104

1130

1105

#ifdef CONFIG_RCU_BOOST

1131

#ifdef CONFIG_RCU_BOOST

1106

1132

1107

#include "rtmutex_common.h"

1133

#include "rtmutex_common.h"

1108

1134

1109

#ifdef CONFIG_RCU_TRACE

1135

#ifdef CONFIG_RCU_TRACE

1110

1136

1111

static void rcu_initiate_boost_trace(struct rcu_node *rnp)

1137

static void rcu_initiate_boost_trace(struct rcu_node *rnp)

1112

{

1138

{

1113

if (list_empty(&rnp->blkd_tasks))

1139

if (list_empty(&rnp->blkd_tasks))

1114

rnp->n_balk_blkd_tasks++;

1140

rnp->n_balk_blkd_tasks++;

1115

else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)

1141

else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)

1116

rnp->n_balk_exp_gp_tasks++;

1142

rnp->n_balk_exp_gp_tasks++;

1117

else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)

1143

else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)

1118

rnp->n_balk_boost_tasks++;

1144

rnp->n_balk_boost_tasks++;

1119

else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)

1145

else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)

1120

rnp->n_balk_notblocked++;

1146

rnp->n_balk_notblocked++;

1121

else if (rnp->gp_tasks != NULL &&

1147

else if (rnp->gp_tasks != NULL &&

1122

ULONG_CMP_LT(jiffies, rnp->boost_time))

1148

ULONG_CMP_LT(jiffies, rnp->boost_time))

1123

rnp->n_balk_notyet++;

1149

rnp->n_balk_notyet++;

1124

else

1150

else

1125

rnp->n_balk_nos++;

1151

rnp->n_balk_nos++;

1126

}

1152

}

1127

1153

1128

#else /* #ifdef CONFIG_RCU_TRACE */

1154

#else /* #ifdef CONFIG_RCU_TRACE */

1129

1155

1130

static void rcu_initiate_boost_trace(struct rcu_node *rnp)

1156

static void rcu_initiate_boost_trace(struct rcu_node *rnp)

1131

{

1157

{

1132

}

1158

}

1133

1159

1134

#endif /* #else #ifdef CONFIG_RCU_TRACE */

1160

#endif /* #else #ifdef CONFIG_RCU_TRACE */

1135

1161

1136

static void rcu_wake_cond(struct task_struct *t, int status)

1162

static void rcu_wake_cond(struct task_struct *t, int status)

1137

{

1163

{

1138

/*

1164

/*

1139

* If the thread is yielding, only wake it when this

1165

* If the thread is yielding, only wake it when this

1140

* is invoked from idle

1166

* is invoked from idle

1141

*/

1167

*/

1142

if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))

1168

if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))

1143

wake_up_process(t);

1169

wake_up_process(t);

1144

}

1170

}

1145

1171

1146

/*

1172

/*

1147

* Carry out RCU priority boosting on the task indicated by ->exp_tasks

1173

* Carry out RCU priority boosting on the task indicated by ->exp_tasks

1148

* or ->boost_tasks, advancing the pointer to the next task in the

1174

* or ->boost_tasks, advancing the pointer to the next task in the

1149

* ->blkd_tasks list.

1175

* ->blkd_tasks list.

1150

*

1176

*

1151

* Note that irqs must be enabled: boosting the task can block.

1177

* Note that irqs must be enabled: boosting the task can block.

1152

* Returns 1 if there are more tasks needing to be boosted.

1178

* Returns 1 if there are more tasks needing to be boosted.

1153

*/

1179

*/

1154

static int rcu_boost(struct rcu_node *rnp)

1180

static int rcu_boost(struct rcu_node *rnp)

1155

{

1181

{

1156

unsigned long flags;

1182

unsigned long flags;

1157

struct rt_mutex mtx;

1183

struct rt_mutex mtx;

1158

struct task_struct *t;

1184

struct task_struct *t;

1159

struct list_head *tb;

1185

struct list_head *tb;

1160

1186

1161

if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)

1187

if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)

1162

return 0; /* Nothing left to boost. */

1188

return 0; /* Nothing left to boost. */

1163

1189

1164

raw_spin_lock_irqsave(&rnp->lock, flags);

1190

raw_spin_lock_irqsave(&rnp->lock, flags);

1165

1191

1166

/*

1192

/*

1167

* Recheck under the lock: all tasks in need of boosting

1193

* Recheck under the lock: all tasks in need of boosting

1168

* might exit their RCU read-side critical sections on their own.

1194

* might exit their RCU read-side critical sections on their own.

1169

*/

1195

*/

1170

if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {

1196

if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {

1171

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1197

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1172

return 0;

1198

return 0;

1173

}

1199

}

1174

1200

1175

/*

1201

/*

1176

* Preferentially boost tasks blocking expedited grace periods.

1202

* Preferentially boost tasks blocking expedited grace periods.

1177

* This cannot starve the normal grace periods because a second

1203

* This cannot starve the normal grace periods because a second

1178

* expedited grace period must boost all blocked tasks, including

1204

* expedited grace period must boost all blocked tasks, including

1179

* those blocking the pre-existing normal grace period.

1205

* those blocking the pre-existing normal grace period.

1180

*/

1206

*/

1181

if (rnp->exp_tasks != NULL) {

1207

if (rnp->exp_tasks != NULL) {

1182

tb = rnp->exp_tasks;

1208

tb = rnp->exp_tasks;

1183

rnp->n_exp_boosts++;

1209

rnp->n_exp_boosts++;

1184

} else {

1210

} else {

1185

tb = rnp->boost_tasks;

1211

tb = rnp->boost_tasks;

1186

rnp->n_normal_boosts++;

1212

rnp->n_normal_boosts++;

1187

}

1213

}

1188

rnp->n_tasks_boosted++;

1214

rnp->n_tasks_boosted++;

1189

1215

1190

/*

1216

/*

1191

* We boost task t by manufacturing an rt_mutex that appears to

1217

* We boost task t by manufacturing an rt_mutex that appears to

1192

* be held by task t. We leave a pointer to that rt_mutex where

1218

* be held by task t. We leave a pointer to that rt_mutex where

1193

* task t can find it, and task t will release the mutex when it

1219

* task t can find it, and task t will release the mutex when it

1194

* exits its outermost RCU read-side critical section. Then

1220

* exits its outermost RCU read-side critical section. Then

1195

* simply acquiring this artificial rt_mutex will boost task

1221

* simply acquiring this artificial rt_mutex will boost task

1196

* t's priority. (Thanks to tglx for suggesting this approach!)

1222

* t's priority. (Thanks to tglx for suggesting this approach!)

1197

*

1223

*

1198

* Note that task t must acquire rnp->lock to remove itself from

1224

* Note that task t must acquire rnp->lock to remove itself from

1199

* the ->blkd_tasks list, which it will do from exit() if from

1225

* the ->blkd_tasks list, which it will do from exit() if from

1200

* nowhere else. We therefore are guaranteed that task t will

1226

* nowhere else. We therefore are guaranteed that task t will

1201

* stay around at least until we drop rnp->lock. Note that

1227

* stay around at least until we drop rnp->lock. Note that

1202

* rnp->lock also resolves races between our priority boosting

1228

* rnp->lock also resolves races between our priority boosting

1203

* and task t's exiting its outermost RCU read-side critical

1229

* and task t's exiting its outermost RCU read-side critical

1204

* section.

1230

* section.

1205

*/

1231

*/

1206

t = container_of(tb, struct task_struct, rcu_node_entry);

1232

t = container_of(tb, struct task_struct, rcu_node_entry);

1207

rt_mutex_init_proxy_locked(&mtx, t);

1233

rt_mutex_init_proxy_locked(&mtx, t);

1208

t->rcu_boost_mutex = &mtx;

1234

t->rcu_boost_mutex = &mtx;

1209

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1235

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1210

rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */

1236

rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */

1211

rt_mutex_unlock(&mtx); /* Keep lockdep happy. */

1237

rt_mutex_unlock(&mtx); /* Keep lockdep happy. */

1212

1238

1213

return ACCESS_ONCE(rnp->exp_tasks) != NULL ||

1239

return ACCESS_ONCE(rnp->exp_tasks) != NULL ||

1214

ACCESS_ONCE(rnp->boost_tasks) != NULL;

1240

ACCESS_ONCE(rnp->boost_tasks) != NULL;

1215

}

1241

}

1216

1242

1217

/*

1243

/*

1218

* Priority-boosting kthread. One per leaf rcu_node and one for the

1244

* Priority-boosting kthread. One per leaf rcu_node and one for the

1219

* root rcu_node.

1245

* root rcu_node.

1220

*/

1246

*/

1221

static int rcu_boost_kthread(void *arg)

1247

static int rcu_boost_kthread(void *arg)

1222

{

1248

{

1223

struct rcu_node *rnp = (struct rcu_node *)arg;

1249

struct rcu_node *rnp = (struct rcu_node *)arg;

1224

int spincnt = 0;

1250

int spincnt = 0;

1225

int more2boost;

1251

int more2boost;

1226

1252

1227

trace_rcu_utilization("Start boost kthread@init");

1253

trace_rcu_utilization("Start boost kthread@init");

1228

for (;;) {

1254

for (;;) {

1229

rnp->boost_kthread_status = RCU_KTHREAD_WAITING;

1255

rnp->boost_kthread_status = RCU_KTHREAD_WAITING;

1230

trace_rcu_utilization("End boost kthread@rcu_wait");

1256

trace_rcu_utilization("End boost kthread@rcu_wait");

1231

rcu_wait(rnp->boost_tasks || rnp->exp_tasks);

1257

rcu_wait(rnp->boost_tasks || rnp->exp_tasks);

1232

trace_rcu_utilization("Start boost kthread@rcu_wait");

1258

trace_rcu_utilization("Start boost kthread@rcu_wait");

1233

rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;

1259

rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;

1234

more2boost = rcu_boost(rnp);

1260

more2boost = rcu_boost(rnp);

1235

if (more2boost)

1261

if (more2boost)

1236

spincnt++;

1262

spincnt++;

1237

else

1263

else

1238

spincnt = 0;

1264

spincnt = 0;

1239

if (spincnt > 10) {

1265

if (spincnt > 10) {

1240

rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;

1266

rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;

1241

trace_rcu_utilization("End boost kthread@rcu_yield");

1267

trace_rcu_utilization("End boost kthread@rcu_yield");

1242

schedule_timeout_interruptible(2);

1268

schedule_timeout_interruptible(2);

1243

trace_rcu_utilization("Start boost kthread@rcu_yield");

1269

trace_rcu_utilization("Start boost kthread@rcu_yield");

1244

spincnt = 0;

1270

spincnt = 0;

1245

}

1271

}

1246

}

1272

}

1247

/* NOTREACHED */

1273

/* NOTREACHED */

1248

trace_rcu_utilization("End boost kthread@notreached");

1274

trace_rcu_utilization("End boost kthread@notreached");

1249

return 0;

1275

return 0;

1250

}

1276

}

1251

1277

1252

/*

1278

/*

1253

* Check to see if it is time to start boosting RCU readers that are

1279

* Check to see if it is time to start boosting RCU readers that are

1254

* blocking the current grace period, and, if so, tell the per-rcu_node

1280

* blocking the current grace period, and, if so, tell the per-rcu_node

1255

* kthread to start boosting them. If there is an expedited grace

1281

* kthread to start boosting them. If there is an expedited grace

1256

* period in progress, it is always time to boost.

1282

* period in progress, it is always time to boost.

1257

*

1283

*

1258

* The caller must hold rnp->lock, which this function releases.

1284

* The caller must hold rnp->lock, which this function releases.

1259

* The ->boost_kthread_task is immortal, so we don't need to worry

1285

* The ->boost_kthread_task is immortal, so we don't need to worry

1260

* about it going away.

1286

* about it going away.

1261

*/

1287

*/

1262

static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)

1288

static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)

1263

{

1289

{

1264

struct task_struct *t;

1290

struct task_struct *t;

1265

1291

1266

if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {

1292

if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {

1267

rnp->n_balk_exp_gp_tasks++;

1293

rnp->n_balk_exp_gp_tasks++;

1268

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1294

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1269

return;

1295

return;

1270

}

1296

}

1271

if (rnp->exp_tasks != NULL ||

1297

if (rnp->exp_tasks != NULL ||

1272

(rnp->gp_tasks != NULL &&

1298

(rnp->gp_tasks != NULL &&

1273

rnp->boost_tasks == NULL &&

1299

rnp->boost_tasks == NULL &&

1274

rnp->qsmask == 0 &&

1300

rnp->qsmask == 0 &&

1275

ULONG_CMP_GE(jiffies, rnp->boost_time))) {

1301

ULONG_CMP_GE(jiffies, rnp->boost_time))) {

1276

if (rnp->exp_tasks == NULL)

1302

if (rnp->exp_tasks == NULL)

1277

rnp->boost_tasks = rnp->gp_tasks;

1303

rnp->boost_tasks = rnp->gp_tasks;

1278

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1304

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1279

t = rnp->boost_kthread_task;

1305

t = rnp->boost_kthread_task;

1280

if (t)

1306

if (t)

1281

rcu_wake_cond(t, rnp->boost_kthread_status);

1307

rcu_wake_cond(t, rnp->boost_kthread_status);

1282

} else {

1308

} else {

1283

rcu_initiate_boost_trace(rnp);

1309

rcu_initiate_boost_trace(rnp);

1284

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1310

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1285

}

1311

}

1286

}

1312

}

1287

1313

1288

/*

1314

/*

1289

* Wake up the per-CPU kthread to invoke RCU callbacks.

1315

* Wake up the per-CPU kthread to invoke RCU callbacks.

1290

*/

1316

*/

1291

static void invoke_rcu_callbacks_kthread(void)

1317

static void invoke_rcu_callbacks_kthread(void)

1292

{

1318

{

1293

unsigned long flags;

1319

unsigned long flags;

1294

1320

1295

local_irq_save(flags);

1321

local_irq_save(flags);

1296

__this_cpu_write(rcu_cpu_has_work, 1);

1322

__this_cpu_write(rcu_cpu_has_work, 1);

1297

if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&

1323

if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&

1298

current != __this_cpu_read(rcu_cpu_kthread_task)) {

1324

current != __this_cpu_read(rcu_cpu_kthread_task)) {

1299

rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),

1325

rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),

1300

__this_cpu_read(rcu_cpu_kthread_status));

1326

__this_cpu_read(rcu_cpu_kthread_status));

1301

}

1327

}

1302

local_irq_restore(flags);

1328

local_irq_restore(flags);

1303

}

1329

}

1304

1330

1305

/*

1331

/*

1306

* Is the current CPU running the RCU-callbacks kthread?

1332

* Is the current CPU running the RCU-callbacks kthread?

1307

* Caller must have preemption disabled.

1333

* Caller must have preemption disabled.

1308

*/

1334

*/

1309

static bool rcu_is_callbacks_kthread(void)

1335

static bool rcu_is_callbacks_kthread(void)

1310

{

1336

{

1311

return __get_cpu_var(rcu_cpu_kthread_task) == current;

1337

return __get_cpu_var(rcu_cpu_kthread_task) == current;

1312

}

1338

}

1313

1339

1314

#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)

1340

#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)

1315

1341

1316

/*

1342

/*

1317

* Do priority-boost accounting for the start of a new grace period.

1343

* Do priority-boost accounting for the start of a new grace period.

1318

*/

1344

*/

1319

static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)

1345

static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)

1320

{

1346

{

1321

rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;

1347

rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;

1322

}

1348

}

1323

1349

1324

/*

1350

/*

1325

* Create an RCU-boost kthread for the specified node if one does not

1351

* Create an RCU-boost kthread for the specified node if one does not

1326

* already exist. We only create this kthread for preemptible RCU.

1352

* already exist. We only create this kthread for preemptible RCU.

1327

* Returns zero if all is well, a negated errno otherwise.

1353

* Returns zero if all is well, a negated errno otherwise.

1328

*/

1354

*/

1329

static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,

1355

static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,

1330

struct rcu_node *rnp)

1356

struct rcu_node *rnp)

1331

{

1357

{

1332

int rnp_index = rnp - &rsp->node[0];

1358

int rnp_index = rnp - &rsp->node[0];

1333

unsigned long flags;

1359

unsigned long flags;

1334

struct sched_param sp;

1360

struct sched_param sp;

1335

struct task_struct *t;

1361

struct task_struct *t;

1336

1362

1337

if (&rcu_preempt_state != rsp)

1363

if (&rcu_preempt_state != rsp)

1338

return 0;

1364

return 0;

1339

1365

1340

if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)

1366

if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)

1341

return 0;

1367

return 0;

1342

1368

1343

rsp->boost = 1;

1369

rsp->boost = 1;

1344

if (rnp->boost_kthread_task != NULL)

1370

if (rnp->boost_kthread_task != NULL)

1345

return 0;

1371

return 0;

1346

t = kthread_create(rcu_boost_kthread, (void *)rnp,

1372

t = kthread_create(rcu_boost_kthread, (void *)rnp,

1347

"rcub/%d", rnp_index);

1373

"rcub/%d", rnp_index);

1348

if (IS_ERR(t))

1374

if (IS_ERR(t))

1349

return PTR_ERR(t);

1375

return PTR_ERR(t);

1350

raw_spin_lock_irqsave(&rnp->lock, flags);

1376

raw_spin_lock_irqsave(&rnp->lock, flags);

1351

rnp->boost_kthread_task = t;

1377

rnp->boost_kthread_task = t;

1352

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1378

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1353

sp.sched_priority = RCU_BOOST_PRIO;

1379

sp.sched_priority = RCU_BOOST_PRIO;

1354

sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);

1380

sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);

1355

wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */

1381

wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */

1356

return 0;

1382

return 0;

1357

}

1383

}

1358

1384

1359

static void rcu_kthread_do_work(void)

1385

static void rcu_kthread_do_work(void)

1360

{

1386

{

1361

rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));

1387

rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));

1362

rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));

1388

rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));

1363

rcu_preempt_do_callbacks();

1389

rcu_preempt_do_callbacks();

1364

}

1390

}

1365

1391

1366

static void rcu_cpu_kthread_setup(unsigned int cpu)

1392

static void rcu_cpu_kthread_setup(unsigned int cpu)

1367

{

1393

{

1368

struct sched_param sp;

1394

struct sched_param sp;

1369

1395

1370

sp.sched_priority = RCU_KTHREAD_PRIO;

1396

sp.sched_priority = RCU_KTHREAD_PRIO;

1371

sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);

1397

sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);

1372

}

1398

}

1373

1399

1374

static void rcu_cpu_kthread_park(unsigned int cpu)

1400

static void rcu_cpu_kthread_park(unsigned int cpu)

1375

{

1401

{

1376

per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;

1402

per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;

1377

}

1403

}

1378

1404

1379

static int rcu_cpu_kthread_should_run(unsigned int cpu)

1405

static int rcu_cpu_kthread_should_run(unsigned int cpu)

1380

{

1406

{

1381

return __get_cpu_var(rcu_cpu_has_work);

1407

return __get_cpu_var(rcu_cpu_has_work);

1382

}

1408

}

1383

1409

1384

/*

1410

/*

1385

* Per-CPU kernel thread that invokes RCU callbacks. This replaces the

1411

* Per-CPU kernel thread that invokes RCU callbacks. This replaces the

1386

* RCU softirq used in flavors and configurations of RCU that do not

1412

* RCU softirq used in flavors and configurations of RCU that do not

1387

* support RCU priority boosting.

1413

* support RCU priority boosting.

1388

*/

1414

*/

1389

static void rcu_cpu_kthread(unsigned int cpu)

1415

static void rcu_cpu_kthread(unsigned int cpu)

1390

{

1416

{

1391

unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);

1417

unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);

1392

char work, *workp = &__get_cpu_var(rcu_cpu_has_work);

1418

char work, *workp = &__get_cpu_var(rcu_cpu_has_work);

1393

int spincnt;

1419

int spincnt;

1394

1420

1395

for (spincnt = 0; spincnt < 10; spincnt++) {

1421

for (spincnt = 0; spincnt < 10; spincnt++) {

1396

trace_rcu_utilization("Start CPU kthread@rcu_wait");

1422

trace_rcu_utilization("Start CPU kthread@rcu_wait");

1397

local_bh_disable();

1423

local_bh_disable();

1398

*statusp = RCU_KTHREAD_RUNNING;

1424

*statusp = RCU_KTHREAD_RUNNING;

1399

this_cpu_inc(rcu_cpu_kthread_loops);

1425

this_cpu_inc(rcu_cpu_kthread_loops);

1400

local_irq_disable();

1426

local_irq_disable();

1401

work = *workp;

1427

work = *workp;

1402

*workp = 0;

1428

*workp = 0;

1403

local_irq_enable();

1429

local_irq_enable();

1404

if (work)

1430

if (work)

1405

rcu_kthread_do_work();

1431

rcu_kthread_do_work();

1406

local_bh_enable();

1432

local_bh_enable();

1407

if (*workp == 0) {

1433

if (*workp == 0) {

1408

trace_rcu_utilization("End CPU kthread@rcu_wait");

1434

trace_rcu_utilization("End CPU kthread@rcu_wait");

1409

*statusp = RCU_KTHREAD_WAITING;

1435

*statusp = RCU_KTHREAD_WAITING;

1410

return;

1436

return;

1411

}

1437

}

1412

}

1438

}

1413

*statusp = RCU_KTHREAD_YIELDING;

1439

*statusp = RCU_KTHREAD_YIELDING;

1414

trace_rcu_utilization("Start CPU kthread@rcu_yield");

1440

trace_rcu_utilization("Start CPU kthread@rcu_yield");

1415

schedule_timeout_interruptible(2);

1441

schedule_timeout_interruptible(2);

1416

trace_rcu_utilization("End CPU kthread@rcu_yield");

1442

trace_rcu_utilization("End CPU kthread@rcu_yield");

1417

*statusp = RCU_KTHREAD_WAITING;

1443

*statusp = RCU_KTHREAD_WAITING;

1418

}

1444

}

1419

1445

1420

/*

1446

/*

1421

* Set the per-rcu_node kthread's affinity to cover all CPUs that are

1447

* Set the per-rcu_node kthread's affinity to cover all CPUs that are

1422

* served by the rcu_node in question. The CPU hotplug lock is still

1448

* served by the rcu_node in question. The CPU hotplug lock is still

1423

* held, so the value of rnp->qsmaskinit will be stable.

1449

* held, so the value of rnp->qsmaskinit will be stable.

1424

*

1450

*

1425

* We don't include outgoingcpu in the affinity set, use -1 if there is

1451

* We don't include outgoingcpu in the affinity set, use -1 if there is

1426

* no outgoing CPU. If there are no CPUs left in the affinity set,

1452

* no outgoing CPU. If there are no CPUs left in the affinity set,

1427

* this function allows the kthread to execute on any CPU.

1453

* this function allows the kthread to execute on any CPU.

1428

*/

1454

*/

1429

static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)

1455

static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)

1430

{

1456

{

1431

struct task_struct *t = rnp->boost_kthread_task;

1457

struct task_struct *t = rnp->boost_kthread_task;

1432

unsigned long mask = rnp->qsmaskinit;

1458

unsigned long mask = rnp->qsmaskinit;

1433

cpumask_var_t cm;

1459

cpumask_var_t cm;

1434

int cpu;

1460

int cpu;

1435

1461

1436

if (!t)

1462

if (!t)

1437

return;

1463

return;

1438

if (!zalloc_cpumask_var(&cm, GFP_KERNEL))

1464

if (!zalloc_cpumask_var(&cm, GFP_KERNEL))

1439

return;

1465

return;

1440

for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)

1466

for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)

1441

if ((mask & 0x1) && cpu != outgoingcpu)

1467

if ((mask & 0x1) && cpu != outgoingcpu)

1442

cpumask_set_cpu(cpu, cm);

1468

cpumask_set_cpu(cpu, cm);

1443

if (cpumask_weight(cm) == 0) {

1469

if (cpumask_weight(cm) == 0) {

1444

cpumask_setall(cm);

1470

cpumask_setall(cm);

1445

for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)

1471

for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)

1446

cpumask_clear_cpu(cpu, cm);

1472

cpumask_clear_cpu(cpu, cm);

1447

WARN_ON_ONCE(cpumask_weight(cm) == 0);

1473

WARN_ON_ONCE(cpumask_weight(cm) == 0);

1448

}

1474

}

1449

set_cpus_allowed_ptr(t, cm);

1475

set_cpus_allowed_ptr(t, cm);

1450

free_cpumask_var(cm);

1476

free_cpumask_var(cm);

1451

}

1477

}

1452

1478

1453

static struct smp_hotplug_thread rcu_cpu_thread_spec = {

1479

static struct smp_hotplug_thread rcu_cpu_thread_spec = {

1454

.store = &rcu_cpu_kthread_task,

1480

.store = &rcu_cpu_kthread_task,

1455

.thread_should_run = rcu_cpu_kthread_should_run,

1481

.thread_should_run = rcu_cpu_kthread_should_run,

1456

.thread_fn = rcu_cpu_kthread,

1482

.thread_fn = rcu_cpu_kthread,

1457

.thread_comm = "rcuc/%u",

1483

.thread_comm = "rcuc/%u",

1458

.setup = rcu_cpu_kthread_setup,

1484

.setup = rcu_cpu_kthread_setup,

1459

.park = rcu_cpu_kthread_park,

1485

.park = rcu_cpu_kthread_park,

1460

};

1486

};

1461

1487

1462

/*

1488

/*

1463

* Spawn all kthreads -- called as soon as the scheduler is running.

1489

* Spawn all kthreads -- called as soon as the scheduler is running.

1464

*/

1490

*/

1465

static int __init rcu_spawn_kthreads(void)

1491

static int __init rcu_spawn_kthreads(void)

1466

{

1492

{

1467

struct rcu_node *rnp;

1493

struct rcu_node *rnp;

1468

int cpu;

1494

int cpu;

1469

1495

1470

rcu_scheduler_fully_active = 1;

1496

rcu_scheduler_fully_active = 1;

1471

for_each_possible_cpu(cpu)

1497

for_each_possible_cpu(cpu)

1472

per_cpu(rcu_cpu_has_work, cpu) = 0;

1498

per_cpu(rcu_cpu_has_work, cpu) = 0;

1473

BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));

1499

BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));

1474

rnp = rcu_get_root(rcu_state);

1500

rnp = rcu_get_root(rcu_state);

1475

(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);

1501

(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);

1476

if (NUM_RCU_NODES > 1) {

1502

if (NUM_RCU_NODES > 1) {

1477

rcu_for_each_leaf_node(rcu_state, rnp)

1503

rcu_for_each_leaf_node(rcu_state, rnp)

1478

(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);

1504

(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);

1479

}

1505

}

1480

return 0;

1506

return 0;

1481

}

1507

}

1482

early_initcall(rcu_spawn_kthreads);

1508

early_initcall(rcu_spawn_kthreads);

1483

1509

1484

static void __cpuinit rcu_prepare_kthreads(int cpu)

1510

static void __cpuinit rcu_prepare_kthreads(int cpu)

1485

{

1511

{

1486

struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);

1512

struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);

1487

struct rcu_node *rnp = rdp->mynode;

1513

struct rcu_node *rnp = rdp->mynode;

1488

1514

1489

/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */

1515

/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */

1490

if (rcu_scheduler_fully_active)

1516

if (rcu_scheduler_fully_active)

1491

(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);

1517

(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);

1492

}

1518

}

1493

1519

1494

#else /* #ifdef CONFIG_RCU_BOOST */

1520

#else /* #ifdef CONFIG_RCU_BOOST */

1495

1521

1496

static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)

1522

static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)

1497

{

1523

{

1498

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1524

raw_spin_unlock_irqrestore(&rnp->lock, flags);

1499

}

1525

}

1500

1526

1501

static void invoke_rcu_callbacks_kthread(void)

1527

static void invoke_rcu_callbacks_kthread(void)

1502

{

1528

{

1503

WARN_ON_ONCE(1);

1529

WARN_ON_ONCE(1);

1504

}

1530

}

1505

1531

1506

static bool rcu_is_callbacks_kthread(void)

1532

static bool rcu_is_callbacks_kthread(void)

1507

{

1533

{

1508

return false;

1534

return false;

1509

}

1535

}

1510

1536

1511

static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)

1537

static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)

1512

{

1538

{

1513

}

1539

}

1514

1540

1515

static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)

1541

static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)

1516

{

1542

{

1517

}

1543

}

1518

1544

1519

static int __init rcu_scheduler_really_started(void)

1545

static int __init rcu_scheduler_really_started(void)

1520

{

1546

{

1521

rcu_scheduler_fully_active = 1;

1547

rcu_scheduler_fully_active = 1;

1522

return 0;

1548

return 0;

1523

}

1549

}

1524

early_initcall(rcu_scheduler_really_started);

1550

early_initcall(rcu_scheduler_really_started);

1525

1551

1526

static void __cpuinit rcu_prepare_kthreads(int cpu)

1552

static void __cpuinit rcu_prepare_kthreads(int cpu)

1527

{

1553

{

1528

}

1554

}

1529

1555

1530

#endif /* #else #ifdef CONFIG_RCU_BOOST */

1556

#endif /* #else #ifdef CONFIG_RCU_BOOST */

1531

1557

1532

#if !defined(CONFIG_RCU_FAST_NO_HZ)

1558

#if !defined(CONFIG_RCU_FAST_NO_HZ)

1533

1559

1534

/*

1560

/*

1535

* Check to see if any future RCU-related work will need to be done

1561

* Check to see if any future RCU-related work will need to be done

1536

* by the current CPU, even if none need be done immediately, returning

1562

* by the current CPU, even if none need be done immediately, returning

1537

* 1 if so. This function is part of the RCU implementation; it is -not-

1563

* 1 if so. This function is part of the RCU implementation; it is -not-

1538

* an exported member of the RCU API.

1564

* an exported member of the RCU API.

1539

*

1565

*

1540

* Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs

1566

* Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs

1541

* any flavor of RCU.

1567

* any flavor of RCU.

1542

*/

1568

*/

1543

int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)

1569

int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)

1544

{

1570

{

1545

*delta_jiffies = ULONG_MAX;

1571

*delta_jiffies = ULONG_MAX;

1546

return rcu_cpu_has_callbacks(cpu, NULL);

1572

return rcu_cpu_has_callbacks(cpu, NULL);

1547

}

1573

}

1548

1574

1549

/*

1575

/*

1550

* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up

1576

* Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up

1551

* after it.

1577

* after it.

1552

*/

1578

*/

1553

static void rcu_cleanup_after_idle(int cpu)

1579

static void rcu_cleanup_after_idle(int cpu)

1554

{

1580

{

1555

}

1581

}

1556

1582

1557

/*

1583

/*

1558

* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,

1584

* Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,

1559

* is nothing.

1585

* is nothing.

1560

*/

1586

*/

1561

static void rcu_prepare_for_idle(int cpu)

1587

static void rcu_prepare_for_idle(int cpu)

1562

{

1588

{

1563

}

1589

}

1564

1590

1565

/*

1591

/*

1566

* Don't bother keeping a running count of the number of RCU callbacks

1592

* Don't bother keeping a running count of the number of RCU callbacks

1567

* posted because CONFIG_RCU_FAST_NO_HZ=n.

1593

* posted because CONFIG_RCU_FAST_NO_HZ=n.

1568

*/

1594

*/

1569

static void rcu_idle_count_callbacks_posted(void)

1595

static void rcu_idle_count_callbacks_posted(void)

1570

{

1596

{

1571

}

1597

}

1572

1598

1573

#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */

1599

#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */

1574

1600

1575

/*

1601

/*

1576

* This code is invoked when a CPU goes idle, at which point we want

1602

* This code is invoked when a CPU goes idle, at which point we want

1577

* to have the CPU do everything required for RCU so that it can enter

1603

* to have the CPU do everything required for RCU so that it can enter

1578

* the energy-efficient dyntick-idle mode. This is handled by a

1604

* the energy-efficient dyntick-idle mode. This is handled by a

1579

* state machine implemented by rcu_prepare_for_idle() below.

1605

* state machine implemented by rcu_prepare_for_idle() below.

1580

*

1606

*

1581

* The following three proprocessor symbols control this state machine:

1607

* The following three proprocessor symbols control this state machine:

1582

*

1608

*

1583

* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted

1609

* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted

1584

* to sleep in dyntick-idle mode with RCU callbacks pending. This

1610

* to sleep in dyntick-idle mode with RCU callbacks pending. This

1585

* is sized to be roughly one RCU grace period. Those energy-efficiency

1611

* is sized to be roughly one RCU grace period. Those energy-efficiency

1586

* benchmarkers who might otherwise be tempted to set this to a large

1612

* benchmarkers who might otherwise be tempted to set this to a large

1587

* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your

1613

* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your

1588

* system. And if you are -that- concerned about energy efficiency,

1614

* system. And if you are -that- concerned about energy efficiency,

1589

* just power the system down and be done with it!

1615

* just power the system down and be done with it!

1590

* RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is

1616

* RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is

1591

* permitted to sleep in dyntick-idle mode with only lazy RCU

1617

* permitted to sleep in dyntick-idle mode with only lazy RCU

1592

* callbacks pending. Setting this too high can OOM your system.

1618

* callbacks pending. Setting this too high can OOM your system.

1593

*

1619

*

1594

* The values below work well in practice. If future workloads require

1620

* The values below work well in practice. If future workloads require

1595

* adjustment, they can be converted into kernel config parameters, though

1621

* adjustment, they can be converted into kernel config parameters, though

1596

* making the state machine smarter might be a better option.

1622

* making the state machine smarter might be a better option.

1597

*/

1623

*/

1598

#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */

1624

#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */

1599

#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */

1625

#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */

1600

1626

1601

static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;

1627

static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;

1602

module_param(rcu_idle_gp_delay, int, 0644);

1628

module_param(rcu_idle_gp_delay, int, 0644);

1603

static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;

1629

static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;

1604

module_param(rcu_idle_lazy_gp_delay, int, 0644);

1630

module_param(rcu_idle_lazy_gp_delay, int, 0644);

1605

1631

1606

extern int tick_nohz_enabled;

1632

extern int tick_nohz_enabled;

1607

1633

1608

/*

1634

/*

1609

* Try to advance callbacks for all flavors of RCU on the current CPU.

1635

* Try to advance callbacks for all flavors of RCU on the current CPU.

1610

* Afterwards, if there are any callbacks ready for immediate invocation,

1636

* Afterwards, if there are any callbacks ready for immediate invocation,

1611

* return true.

1637

* return true.

1612

*/

1638

*/

1613

static bool rcu_try_advance_all_cbs(void)

1639

static bool rcu_try_advance_all_cbs(void)

1614

{

1640

{

1615

bool cbs_ready = false;

1641

bool cbs_ready = false;

1616

struct rcu_data *rdp;

1642

struct rcu_data *rdp;

1617

struct rcu_node *rnp;

1643

struct rcu_node *rnp;

1618

struct rcu_state *rsp;

1644

struct rcu_state *rsp;

1619

1645

1620

for_each_rcu_flavor(rsp) {

1646

for_each_rcu_flavor(rsp) {

1621

rdp = this_cpu_ptr(rsp->rda);

1647

rdp = this_cpu_ptr(rsp->rda);

1622

rnp = rdp->mynode;

1648

rnp = rdp->mynode;

1623

1649

1624

/*

1650

/*

1625

* Don't bother checking unless a grace period has

1651

* Don't bother checking unless a grace period has

1626

* completed since we last checked and there are

1652

* completed since we last checked and there are

1627

* callbacks not yet ready to invoke.

1653

* callbacks not yet ready to invoke.

1628

*/

1654

*/

1629

if (rdp->completed != rnp->completed &&

1655

if (rdp->completed != rnp->completed &&

1630

rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])

1656

rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])

1631

rcu_process_gp_end(rsp, rdp);

1657

rcu_process_gp_end(rsp, rdp);

1632

1658

1633

if (cpu_has_callbacks_ready_to_invoke(rdp))

1659

if (cpu_has_callbacks_ready_to_invoke(rdp))

1634

cbs_ready = true;

1660

cbs_ready = true;

1635

}

1661

}

1636

return cbs_ready;

1662

return cbs_ready;

1637

}

1663

}

1638

1664

1639

/*

1665

/*

1640

* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready

1666

* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready

1641

* to invoke. If the CPU has callbacks, try to advance them. Tell the

1667

* to invoke. If the CPU has callbacks, try to advance them. Tell the

1642

* caller to set the timeout based on whether or not there are non-lazy

1668

* caller to set the timeout based on whether or not there are non-lazy

1643

* callbacks.

1669

* callbacks.

1644

*

1670

*

1645

* The caller must have disabled interrupts.

1671

* The caller must have disabled interrupts.

1646

*/

1672

*/

1647

int rcu_needs_cpu(int cpu, unsigned long *dj)

1673

int rcu_needs_cpu(int cpu, unsigned long *dj)

1648

{

1674

{

1649

struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

1675

struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

1650

1676

1651

/* Snapshot to detect later posting of non-lazy callback. */

1677

/* Snapshot to detect later posting of non-lazy callback. */

1652

rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;

1678

rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;

1653

1679

1654

/* If no callbacks, RCU doesn't need the CPU. */

1680

/* If no callbacks, RCU doesn't need the CPU. */

1655

if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {

1681

if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {

1656

*dj = ULONG_MAX;

1682

*dj = ULONG_MAX;

1657

return 0;

1683

return 0;

1658

}

1684

}

1659

1685

1660

/* Attempt to advance callbacks. */

1686

/* Attempt to advance callbacks. */

1661

if (rcu_try_advance_all_cbs()) {

1687

if (rcu_try_advance_all_cbs()) {

1662

/* Some ready to invoke, so initiate later invocation. */

1688

/* Some ready to invoke, so initiate later invocation. */

1663

invoke_rcu_core();

1689

invoke_rcu_core();

1664

return 1;

1690

return 1;

1665

}

1691

}

1666

rdtp->last_accelerate = jiffies;

1692

rdtp->last_accelerate = jiffies;

1667

1693

1668

/* Request timer delay depending on laziness, and round. */

1694

/* Request timer delay depending on laziness, and round. */

1669

if (!rdtp->all_lazy) {

1695

if (!rdtp->all_lazy) {

1670

*dj = round_up(rcu_idle_gp_delay + jiffies,

1696

*dj = round_up(rcu_idle_gp_delay + jiffies,

1671

rcu_idle_gp_delay) - jiffies;

1697

rcu_idle_gp_delay) - jiffies;

1672

} else {

1698

} else {

1673

*dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;

1699

*dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;

1674

}

1700

}

1675

return 0;

1701

return 0;

1676

}

1702

}

1677

1703

1678

/*

1704

/*

1679

* Prepare a CPU for idle from an RCU perspective. The first major task

1705

* Prepare a CPU for idle from an RCU perspective. The first major task

1680

* is to sense whether nohz mode has been enabled or disabled via sysfs.

1706

* is to sense whether nohz mode has been enabled or disabled via sysfs.

1681

* The second major task is to check to see if a non-lazy callback has

1707

* The second major task is to check to see if a non-lazy callback has

1682

* arrived at a CPU that previously had only lazy callbacks. The third

1708

* arrived at a CPU that previously had only lazy callbacks. The third

1683

* major task is to accelerate (that is, assign grace-period numbers to)

1709

* major task is to accelerate (that is, assign grace-period numbers to)

1684

* any recently arrived callbacks.

1710

* any recently arrived callbacks.

1685

*

1711

*

1686

* The caller must have disabled interrupts.

1712

* The caller must have disabled interrupts.

1687

*/

1713

*/

1688

static void rcu_prepare_for_idle(int cpu)

1714

static void rcu_prepare_for_idle(int cpu)

1689

{

1715

{

1690

struct rcu_data *rdp;

1716

struct rcu_data *rdp;

1691

struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

1717

struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

1692

struct rcu_node *rnp;

1718

struct rcu_node *rnp;

1693

struct rcu_state *rsp;

1719

struct rcu_state *rsp;

1694

int tne;

1720

int tne;

1695

1721

1696

/* Handle nohz enablement switches conservatively. */

1722

/* Handle nohz enablement switches conservatively. */

1697

tne = ACCESS_ONCE(tick_nohz_enabled);

1723

tne = ACCESS_ONCE(tick_nohz_enabled);

1698

if (tne != rdtp->tick_nohz_enabled_snap) {

1724

if (tne != rdtp->tick_nohz_enabled_snap) {

1699

if (rcu_cpu_has_callbacks(cpu, NULL))

1725

if (rcu_cpu_has_callbacks(cpu, NULL))

1700

invoke_rcu_core(); /* force nohz to see update. */

1726

invoke_rcu_core(); /* force nohz to see update. */

1701

rdtp->tick_nohz_enabled_snap = tne;

1727

rdtp->tick_nohz_enabled_snap = tne;

1702

return;

1728

return;

1703

}

1729

}

1704

if (!tne)

1730

if (!tne)

1705

return;

1731

return;

1706

1732

1707

/* If this is a no-CBs CPU, no callbacks, just return. */

1733

/* If this is a no-CBs CPU, no callbacks, just return. */

1708

if (rcu_is_nocb_cpu(cpu))

1734

if (rcu_is_nocb_cpu(cpu))

1709

return;

1735

return;

1710

1736

1711

/*

1737

/*

1712

* If a non-lazy callback arrived at a CPU having only lazy

1738

* If a non-lazy callback arrived at a CPU having only lazy

1713

* callbacks, invoke RCU core for the side-effect of recalculating

1739

* callbacks, invoke RCU core for the side-effect of recalculating

1714

* idle duration on re-entry to idle.

1740

* idle duration on re-entry to idle.

1715

*/

1741

*/

1716

if (rdtp->all_lazy &&

1742

if (rdtp->all_lazy &&

1717

rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {

1743

rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {

1718

invoke_rcu_core();

1744

invoke_rcu_core();

1719

return;

1745

return;

1720

}

1746

}

1721

1747

1722

/*

1748

/*

1723

* If we have not yet accelerated this jiffy, accelerate all

1749

* If we have not yet accelerated this jiffy, accelerate all

1724

* callbacks on this CPU.

1750

* callbacks on this CPU.

1725

*/

1751

*/

1726

if (rdtp->last_accelerate == jiffies)

1752

if (rdtp->last_accelerate == jiffies)

1727

return;

1753

return;

1728

rdtp->last_accelerate = jiffies;

1754

rdtp->last_accelerate = jiffies;

1729

for_each_rcu_flavor(rsp) {

1755

for_each_rcu_flavor(rsp) {

1730

rdp = per_cpu_ptr(rsp->rda, cpu);

1756

rdp = per_cpu_ptr(rsp->rda, cpu);

1731

if (!*rdp->nxttail[RCU_DONE_TAIL])

1757

if (!*rdp->nxttail[RCU_DONE_TAIL])

1732

continue;

1758

continue;

1733

rnp = rdp->mynode;

1759

rnp = rdp->mynode;

1734

raw_spin_lock(&rnp->lock); /* irqs already disabled. */

1760

raw_spin_lock(&rnp->lock); /* irqs already disabled. */

1735

rcu_accelerate_cbs(rsp, rnp, rdp);

1761

rcu_accelerate_cbs(rsp, rnp, rdp);

1736

raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */

1762

raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */

1737

}

1763

}

1738

}

1764

}

1739

1765

1740

/*

1766

/*

1741

* Clean up for exit from idle. Attempt to advance callbacks based on

1767

* Clean up for exit from idle. Attempt to advance callbacks based on

1742

* any grace periods that elapsed while the CPU was idle, and if any

1768

* any grace periods that elapsed while the CPU was idle, and if any

1743

* callbacks are now ready to invoke, initiate invocation.

1769

* callbacks are now ready to invoke, initiate invocation.

1744

*/

1770

*/

1745

static void rcu_cleanup_after_idle(int cpu)

1771

static void rcu_cleanup_after_idle(int cpu)

1746

{

1772

{

1747

struct rcu_data *rdp;

1773

struct rcu_data *rdp;

1748

struct rcu_state *rsp;

1774

struct rcu_state *rsp;

1749

1775

1750

if (rcu_is_nocb_cpu(cpu))

1776

if (rcu_is_nocb_cpu(cpu))

1751

return;

1777

return;

1752

rcu_try_advance_all_cbs();

1778

rcu_try_advance_all_cbs();

1753

for_each_rcu_flavor(rsp) {

1779

for_each_rcu_flavor(rsp) {

1754

rdp = per_cpu_ptr(rsp->rda, cpu);

1780

rdp = per_cpu_ptr(rsp->rda, cpu);

1755

if (cpu_has_callbacks_ready_to_invoke(rdp))

1781

if (cpu_has_callbacks_ready_to_invoke(rdp))

1756

invoke_rcu_core();

1782

invoke_rcu_core();

1757

}

1783

}

1758

}

1784

}

1759

1785

1760

/*

1786

/*

1761

* Keep a running count of the number of non-lazy callbacks posted

1787

* Keep a running count of the number of non-lazy callbacks posted

1762

* on this CPU. This running counter (which is never decremented) allows

1788

* on this CPU. This running counter (which is never decremented) allows

1763

* rcu_prepare_for_idle() to detect when something out of the idle loop

1789

* rcu_prepare_for_idle() to detect when something out of the idle loop

1764

* posts a callback, even if an equal number of callbacks are invoked.

1790

* posts a callback, even if an equal number of callbacks are invoked.

1765

* Of course, callbacks should only be posted from within a trace event

1791

* Of course, callbacks should only be posted from within a trace event

1766

* designed to be called from idle or from within RCU_NONIDLE().

1792

* designed to be called from idle or from within RCU_NONIDLE().

1767

*/

1793

*/

1768

static void rcu_idle_count_callbacks_posted(void)

1794

static void rcu_idle_count_callbacks_posted(void)

1769

{

1795

{

1770

__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);

1796

__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);

1771

}

1797

}

1772

1798

1773

/*

1799

/*

1774

* Data for flushing lazy RCU callbacks at OOM time.

1800

* Data for flushing lazy RCU callbacks at OOM time.

1775

*/

1801

*/

1776

static atomic_t oom_callback_count;

1802

static atomic_t oom_callback_count;

1777

static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);

1803

static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);

1778

1804

1779

/*

1805

/*

1780

* RCU OOM callback -- decrement the outstanding count and deliver the

1806

* RCU OOM callback -- decrement the outstanding count and deliver the

1781

* wake-up if we are the last one.

1807

* wake-up if we are the last one.

1782

*/

1808

*/

1783

static void rcu_oom_callback(struct rcu_head *rhp)

1809

static void rcu_oom_callback(struct rcu_head *rhp)

1784

{

1810

{

1785

if (atomic_dec_and_test(&oom_callback_count))

1811

if (atomic_dec_and_test(&oom_callback_count))

1786

wake_up(&oom_callback_wq);

1812

wake_up(&oom_callback_wq);

1787

}

1813

}

1788

1814

1789

/*

1815

/*

1790

* Post an rcu_oom_notify callback on the current CPU if it has at

1816

* Post an rcu_oom_notify callback on the current CPU if it has at

1791

* least one lazy callback. This will unnecessarily post callbacks

1817

* least one lazy callback. This will unnecessarily post callbacks

1792

* to CPUs that already have a non-lazy callback at the end of their

1818

* to CPUs that already have a non-lazy callback at the end of their

1793

* callback list, but this is an infrequent operation, so accept some

1819

* callback list, but this is an infrequent operation, so accept some

1794

* extra overhead to keep things simple.

1820

* extra overhead to keep things simple.

1795

*/

1821

*/

1796

static void rcu_oom_notify_cpu(void *unused)

1822

static void rcu_oom_notify_cpu(void *unused)

1797

{

1823

{

1798

struct rcu_state *rsp;

1824

struct rcu_state *rsp;

1799

struct rcu_data *rdp;

1825

struct rcu_data *rdp;

1800

1826

1801

for_each_rcu_flavor(rsp) {

1827

for_each_rcu_flavor(rsp) {

1802

rdp = __this_cpu_ptr(rsp->rda);

1828

rdp = __this_cpu_ptr(rsp->rda);

1803

if (rdp->qlen_lazy != 0) {

1829

if (rdp->qlen_lazy != 0) {

1804

atomic_inc(&oom_callback_count);

1830

atomic_inc(&oom_callback_count);

1805

rsp->call(&rdp->oom_head, rcu_oom_callback);

1831

rsp->call(&rdp->oom_head, rcu_oom_callback);

1806

}

1832

}

1807

}

1833

}

1808

}

1834

}

1809

1835

1810

/*

1836

/*

1811

* If low on memory, ensure that each CPU has a non-lazy callback.

1837

* If low on memory, ensure that each CPU has a non-lazy callback.

1812

* This will wake up CPUs that have only lazy callbacks, in turn

1838

* This will wake up CPUs that have only lazy callbacks, in turn

1813

* ensuring that they free up the corresponding memory in a timely manner.

1839

* ensuring that they free up the corresponding memory in a timely manner.

1814

* Because an uncertain amount of memory will be freed in some uncertain

1840

* Because an uncertain amount of memory will be freed in some uncertain

1815

* timeframe, we do not claim to have freed anything.

1841

* timeframe, we do not claim to have freed anything.

1816

*/

1842

*/

1817

static int rcu_oom_notify(struct notifier_block *self,

1843

static int rcu_oom_notify(struct notifier_block *self,

1818

unsigned long notused, void *nfreed)

1844

unsigned long notused, void *nfreed)

1819

{

1845

{

1820

int cpu;

1846

int cpu;

1821

1847

1822

/* Wait for callbacks from earlier instance to complete. */

1848

/* Wait for callbacks from earlier instance to complete. */

1823

wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);

1849

wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);

1824

1850

1825

/*

1851

/*

1826

* Prevent premature wakeup: ensure that all increments happen

1852

* Prevent premature wakeup: ensure that all increments happen

1827

* before there is a chance of the counter reaching zero.

1853

* before there is a chance of the counter reaching zero.

1828

*/

1854

*/

1829

atomic_set(&oom_callback_count, 1);

1855

atomic_set(&oom_callback_count, 1);

1830

1856

1831

get_online_cpus();

1857

get_online_cpus();

1832

for_each_online_cpu(cpu) {

1858

for_each_online_cpu(cpu) {

1833

smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);

1859

smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);

1834

cond_resched();

1860

cond_resched();

1835

}

1861

}

1836

put_online_cpus();

1862

put_online_cpus();

1837

1863

1838

/* Unconditionally decrement: no need to wake ourselves up. */

1864

/* Unconditionally decrement: no need to wake ourselves up. */

1839

atomic_dec(&oom_callback_count);

1865

atomic_dec(&oom_callback_count);

1840

1866

1841

return NOTIFY_OK;

1867

return NOTIFY_OK;

1842

}

1868

}

1843

1869

1844

static struct notifier_block rcu_oom_nb = {

1870

static struct notifier_block rcu_oom_nb = {

1845

.notifier_call = rcu_oom_notify

1871

.notifier_call = rcu_oom_notify

1846

};

1872

};

1847

1873

1848

static int __init rcu_register_oom_notifier(void)

1874

static int __init rcu_register_oom_notifier(void)

1849

{

1875

{

1850

register_oom_notifier(&rcu_oom_nb);

1876

register_oom_notifier(&rcu_oom_nb);

1851

return 0;

1877

return 0;

1852

}

1878

}

1853

early_initcall(rcu_register_oom_notifier);

1879

early_initcall(rcu_register_oom_notifier);

1854

1880

1855

#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */

1881

#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */

1856

1882

1857

#ifdef CONFIG_RCU_CPU_STALL_INFO

1883

#ifdef CONFIG_RCU_CPU_STALL_INFO

1858

1884

1859

#ifdef CONFIG_RCU_FAST_NO_HZ

1885

#ifdef CONFIG_RCU_FAST_NO_HZ

1860

1886

1861

static void print_cpu_stall_fast_no_hz(char *cp, int cpu)

1887

static void print_cpu_stall_fast_no_hz(char *cp, int cpu)

1862

{

1888

{

1863

struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

1889

struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

1864

unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;

1890

unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;

1865

1891

1866

sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",

1892

sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",

1867

rdtp->last_accelerate & 0xffff, jiffies & 0xffff,

1893

rdtp->last_accelerate & 0xffff, jiffies & 0xffff,

1868

ulong2long(nlpd),

1894

ulong2long(nlpd),

1869

rdtp->all_lazy ? 'L' : '.',

1895

rdtp->all_lazy ? 'L' : '.',

1870

rdtp->tick_nohz_enabled_snap ? '.' : 'D');

1896

rdtp->tick_nohz_enabled_snap ? '.' : 'D');

1871

}

1897

}

1872

1898

1873

#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */

1899

#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */

1874

1900

1875

static void print_cpu_stall_fast_no_hz(char *cp, int cpu)

1901

static void print_cpu_stall_fast_no_hz(char *cp, int cpu)

1876

{

1902

{

1877

*cp = '\0';

1903

*cp = '\0';

1878

}

1904

}

1879

1905

1880

#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */

1906

#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */

1881

1907

1882

/* Initiate the stall-info list. */

1908

/* Initiate the stall-info list. */

1883

static void print_cpu_stall_info_begin(void)

1909

static void print_cpu_stall_info_begin(void)

1884

{

1910

{

1885

pr_cont("\n");

1911

pr_cont("\n");

1886

}

1912

}

1887

1913

1888

/*

1914

/*

1889

* Print out diagnostic information for the specified stalled CPU.

1915

* Print out diagnostic information for the specified stalled CPU.

1890

*

1916

*

1891

* If the specified CPU is aware of the current RCU grace period

1917

* If the specified CPU is aware of the current RCU grace period

1892

* (flavor specified by rsp), then print the number of scheduling

1918

* (flavor specified by rsp), then print the number of scheduling

1893

* clock interrupts the CPU has taken during the time that it has

1919

* clock interrupts the CPU has taken during the time that it has

1894

* been aware. Otherwise, print the number of RCU grace periods

1920

* been aware. Otherwise, print the number of RCU grace periods

1895

* that this CPU is ignorant of, for example, "1" if the CPU was

1921

* that this CPU is ignorant of, for example, "1" if the CPU was

1896

* aware of the previous grace period.

1922

* aware of the previous grace period.

1897

*

1923

*

1898

* Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.

1924

* Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.

1899

*/

1925

*/

1900

static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)

1926

static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)

1901

{

1927

{

1902

char fast_no_hz[72];

1928

char fast_no_hz[72];

1903

struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);

1929

struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);

1904

struct rcu_dynticks *rdtp = rdp->dynticks;

1930

struct rcu_dynticks *rdtp = rdp->dynticks;

1905

char *ticks_title;

1931

char *ticks_title;

1906

unsigned long ticks_value;

1932

unsigned long ticks_value;

1907

1933

1908

if (rsp->gpnum == rdp->gpnum) {

1934

if (rsp->gpnum == rdp->gpnum) {

1909

ticks_title = "ticks this GP";

1935

ticks_title = "ticks this GP";

1910

ticks_value = rdp->ticks_this_gp;

1936

ticks_value = rdp->ticks_this_gp;

1911

} else {

1937

} else {

1912

ticks_title = "GPs behind";

1938

ticks_title = "GPs behind";

1913

ticks_value = rsp->gpnum - rdp->gpnum;

1939

ticks_value = rsp->gpnum - rdp->gpnum;

1914

}

1940

}

1915

print_cpu_stall_fast_no_hz(fast_no_hz, cpu);

1941

print_cpu_stall_fast_no_hz(fast_no_hz, cpu);

1916

pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",

1942

pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",

1917

cpu, ticks_value, ticks_title,

1943

cpu, ticks_value, ticks_title,

1918

atomic_read(&rdtp->dynticks) & 0xfff,

1944

atomic_read(&rdtp->dynticks) & 0xfff,

1919

rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,

1945

rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,

1920

rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),

1946

rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),

1921

fast_no_hz);

1947

fast_no_hz);

1922

}

1948

}

1923

1949

1924

/* Terminate the stall-info list. */

1950

/* Terminate the stall-info list. */

1925

static void print_cpu_stall_info_end(void)

1951

static void print_cpu_stall_info_end(void)

1926

{

1952

{

1927

pr_err("\t");

1953

pr_err("\t");

1928

}

1954

}

1929

1955

1930

/* Zero ->ticks_this_gp for all flavors of RCU. */

1956

/* Zero ->ticks_this_gp for all flavors of RCU. */

1931

static void zero_cpu_stall_ticks(struct rcu_data *rdp)

1957

static void zero_cpu_stall_ticks(struct rcu_data *rdp)

1932

{

1958

{

1933

rdp->ticks_this_gp = 0;

1959

rdp->ticks_this_gp = 0;

1934

rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());

1960

rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());

1935

}

1961

}

1936

1962

1937

/* Increment ->ticks_this_gp for all flavors of RCU. */

1963

/* Increment ->ticks_this_gp for all flavors of RCU. */

1938

static void increment_cpu_stall_ticks(void)

1964

static void increment_cpu_stall_ticks(void)

1939

{

1965

{

1940

struct rcu_state *rsp;

1966

struct rcu_state *rsp;

1941

1967

1942

for_each_rcu_flavor(rsp)

1968

for_each_rcu_flavor(rsp)

1943

__this_cpu_ptr(rsp->rda)->ticks_this_gp++;

1969

__this_cpu_ptr(rsp->rda)->ticks_this_gp++;

1944

}

1970

}

1945

1971

1946

#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */

1972

#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */

1947

1973

1948

static void print_cpu_stall_info_begin(void)

1974

static void print_cpu_stall_info_begin(void)

1949

{

1975

{

1950

pr_cont(" {");

1976

pr_cont(" {");

1951

}

1977

}

1952

1978

1953

static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)

1979

static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)

1954

{

1980

{

1955

pr_cont(" %d", cpu);

1981

pr_cont(" %d", cpu);

1956

}

1982

}

1957

1983

1958

static void print_cpu_stall_info_end(void)

1984

static void print_cpu_stall_info_end(void)

1959

{

1985

{

1960

pr_cont("} ");

1986

pr_cont("} ");

1961

}

1987

}

1962

1988

1963

static void zero_cpu_stall_ticks(struct rcu_data *rdp)

1989

static void zero_cpu_stall_ticks(struct rcu_data *rdp)

1964

{

1990

{

1965

}

1991

}

1966

1992

1967

static void increment_cpu_stall_ticks(void)

1993

static void increment_cpu_stall_ticks(void)

1968

{

1994

{

1969

}

1995

}

1970

1996

1971

#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */

1997

#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */

1972

1998

1973

#ifdef CONFIG_RCU_NOCB_CPU

1999

#ifdef CONFIG_RCU_NOCB_CPU

1974

2000

1975

/*

2001

/*

1976

* Offload callback processing from the boot-time-specified set of CPUs

2002

* Offload callback processing from the boot-time-specified set of CPUs

1977

* specified by rcu_nocb_mask. For each CPU in the set, there is a

2003

* specified by rcu_nocb_mask. For each CPU in the set, there is a

1978

* kthread created that pulls the callbacks from the corresponding CPU,

2004

* kthread created that pulls the callbacks from the corresponding CPU,

1979

* waits for a grace period to elapse, and invokes the callbacks.

2005

* waits for a grace period to elapse, and invokes the callbacks.

1980

* The no-CBs CPUs do a wake_up() on their kthread when they insert

2006

* The no-CBs CPUs do a wake_up() on their kthread when they insert

1981

* a callback into any empty list, unless the rcu_nocb_poll boot parameter

2007

* a callback into any empty list, unless the rcu_nocb_poll boot parameter

1982

* has been specified, in which case each kthread actively polls its

2008

* has been specified, in which case each kthread actively polls its

1983

* CPU. (Which isn't so great for energy efficiency, but which does

2009

* CPU. (Which isn't so great for energy efficiency, but which does

1984

* reduce RCU's overhead on that CPU.)

2010

* reduce RCU's overhead on that CPU.)

1985

*

2011

*

1986

* This is intended to be used in conjunction with Frederic Weisbecker's

2012

* This is intended to be used in conjunction with Frederic Weisbecker's

1987

* adaptive-idle work, which would seriously reduce OS jitter on CPUs

2013

* adaptive-idle work, which would seriously reduce OS jitter on CPUs

1988

* running CPU-bound user-mode computations.

2014

* running CPU-bound user-mode computations.

1989

*

2015

*

1990

* Offloading of callback processing could also in theory be used as

2016

* Offloading of callback processing could also in theory be used as

1991

* an energy-efficiency measure because CPUs with no RCU callbacks

2017

* an energy-efficiency measure because CPUs with no RCU callbacks

1992

* queued are more aggressive about entering dyntick-idle mode.

2018

* queued are more aggressive about entering dyntick-idle mode.

1993

*/

2019

*/

1994

2020

1995

2021

1996

/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */

2022

/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */

1997

static int __init rcu_nocb_setup(char *str)

2023

static int __init rcu_nocb_setup(char *str)

1998

{

2024

{

1999

alloc_bootmem_cpumask_var(&rcu_nocb_mask);

2025

alloc_bootmem_cpumask_var(&rcu_nocb_mask);

2000

have_rcu_nocb_mask = true;

2026

have_rcu_nocb_mask = true;

2001

cpulist_parse(str, rcu_nocb_mask);

2027

cpulist_parse(str, rcu_nocb_mask);

2002

return 1;

2028

return 1;

2003

}

2029

}

2004

__setup("rcu_nocbs=", rcu_nocb_setup);

2030

__setup("rcu_nocbs=", rcu_nocb_setup);

2005

2031

2006

static int __init parse_rcu_nocb_poll(char *arg)

2032

static int __init parse_rcu_nocb_poll(char *arg)

2007

{

2033

{

2008

rcu_nocb_poll = 1;

2034

rcu_nocb_poll = 1;

2009

return 0;

2035

return 0;

2010

}

2036

}

2011

early_param("rcu_nocb_poll", parse_rcu_nocb_poll);

2037

early_param("rcu_nocb_poll", parse_rcu_nocb_poll);

2012

2038

2013

/*

2039

/*

2014

* Do any no-CBs CPUs need another grace period?

2040

* Do any no-CBs CPUs need another grace period?

2015

*

2041

*

2016

* Interrupts must be disabled. If the caller does not hold the root

2042

* Interrupts must be disabled. If the caller does not hold the root

2017

* rnp_node structure's ->lock, the results are advisory only.

2043

* rnp_node structure's ->lock, the results are advisory only.

2018

*/

2044

*/

2019

static int rcu_nocb_needs_gp(struct rcu_state *rsp)

2045

static int rcu_nocb_needs_gp(struct rcu_state *rsp)

2020

{

2046

{

2021

struct rcu_node *rnp = rcu_get_root(rsp);

2047

struct rcu_node *rnp = rcu_get_root(rsp);

2022

2048

2023

return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];

2049

return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];

2024

}

2050

}

2025

2051

2026

/*

2052

/*

2027

* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended

2053

* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended

2028

* grace period.

2054

* grace period.

2029

*/

2055

*/

2030

static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)

2056

static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)

2031

{

2057

{

2032

wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);

2058

wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);

2033

}

2059

}

2034

2060

2035

/*

2061

/*

2036

* Set the root rcu_node structure's ->need_future_gp field

2062

* Set the root rcu_node structure's ->need_future_gp field

2037

* based on the sum of those of all rcu_node structures. This does

2063

* based on the sum of those of all rcu_node structures. This does

2038

* double-count the root rcu_node structure's requests, but this

2064

* double-count the root rcu_node structure's requests, but this

2039

* is necessary to handle the possibility of a rcu_nocb_kthread()

2065

* is necessary to handle the possibility of a rcu_nocb_kthread()

2040

* having awakened during the time that the rcu_node structures

2066

* having awakened during the time that the rcu_node structures

2041

* were being updated for the end of the previous grace period.

2067

* were being updated for the end of the previous grace period.

2042

*/

2068

*/

2043

static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)

2069

static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)

2044

{

2070

{

2045

rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;

2071

rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;

2046

}

2072

}

2047

2073

2048

static void rcu_init_one_nocb(struct rcu_node *rnp)

2074

static void rcu_init_one_nocb(struct rcu_node *rnp)

2049

{

2075

{

2050

init_waitqueue_head(&rnp->nocb_gp_wq[0]);

2076

init_waitqueue_head(&rnp->nocb_gp_wq[0]);

2051

init_waitqueue_head(&rnp->nocb_gp_wq[1]);

2077

init_waitqueue_head(&rnp->nocb_gp_wq[1]);

2052

}

2078

}

2053

2079

2054

/* Is the specified CPU a no-CPUs CPU? */

2080

/* Is the specified CPU a no-CPUs CPU? */

2055

bool rcu_is_nocb_cpu(int cpu)

2081

bool rcu_is_nocb_cpu(int cpu)

2056

{

2082

{

2057

if (have_rcu_nocb_mask)

2083

if (have_rcu_nocb_mask)

2058

return cpumask_test_cpu(cpu, rcu_nocb_mask);

2084

return cpumask_test_cpu(cpu, rcu_nocb_mask);

2059

return false;

2085

return false;

2060

}

2086

}

2061

2087

2062

/*

2088

/*

2063

* Enqueue the specified string of rcu_head structures onto the specified

2089

* Enqueue the specified string of rcu_head structures onto the specified

2064

* CPU's no-CBs lists. The CPU is specified by rdp, the head of the

2090

* CPU's no-CBs lists. The CPU is specified by rdp, the head of the

2065

* string by rhp, and the tail of the string by rhtp. The non-lazy/lazy

2091

* string by rhp, and the tail of the string by rhtp. The non-lazy/lazy

2066

* counts are supplied by rhcount and rhcount_lazy.

2092

* counts are supplied by rhcount and rhcount_lazy.

2067

*

2093

*

2068

* If warranted, also wake up the kthread servicing this CPUs queues.

2094

* If warranted, also wake up the kthread servicing this CPUs queues.

2069

*/

2095

*/

2070

static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,

2096

static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,

2071

struct rcu_head *rhp,

2097

struct rcu_head *rhp,

2072

struct rcu_head **rhtp,

2098

struct rcu_head **rhtp,

2073

int rhcount, int rhcount_lazy)

2099

int rhcount, int rhcount_lazy)

2074

{

2100

{

2075

int len;

2101

int len;

2076

struct rcu_head **old_rhpp;

2102

struct rcu_head **old_rhpp;

2077

struct task_struct *t;

2103

struct task_struct *t;

2078

2104

2079

/* Enqueue the callback on the nocb list and update counts. */

2105

/* Enqueue the callback on the nocb list and update counts. */

2080

old_rhpp = xchg(&rdp->nocb_tail, rhtp);

2106

old_rhpp = xchg(&rdp->nocb_tail, rhtp);

2081

ACCESS_ONCE(*old_rhpp) = rhp;

2107

ACCESS_ONCE(*old_rhpp) = rhp;

2082

atomic_long_add(rhcount, &rdp->nocb_q_count);

2108

atomic_long_add(rhcount, &rdp->nocb_q_count);

2083

atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);

2109

atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);

2084

2110

2085

/* If we are not being polled and there is a kthread, awaken it ... */

2111

/* If we are not being polled and there is a kthread, awaken it ... */

2086

t = ACCESS_ONCE(rdp->nocb_kthread);

2112

t = ACCESS_ONCE(rdp->nocb_kthread);

2087

if (rcu_nocb_poll | !t)

2113

if (rcu_nocb_poll | !t)

2088

return;

2114

return;

2089

len = atomic_long_read(&rdp->nocb_q_count);

2115

len = atomic_long_read(&rdp->nocb_q_count);

2090

if (old_rhpp == &rdp->nocb_head) {

2116

if (old_rhpp == &rdp->nocb_head) {

2091

wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */

2117

wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */

2092

rdp->qlen_last_fqs_check = 0;

2118

rdp->qlen_last_fqs_check = 0;

2093

} else if (len > rdp->qlen_last_fqs_check + qhimark) {

2119

} else if (len > rdp->qlen_last_fqs_check + qhimark) {

2094

wake_up_process(t); /* ... or if many callbacks queued. */

2120

wake_up_process(t); /* ... or if many callbacks queued. */

2095

rdp->qlen_last_fqs_check = LONG_MAX / 2;

2121

rdp->qlen_last_fqs_check = LONG_MAX / 2;

2096

}

2122

}

2097

return;

2123

return;

2098

}

2124

}

2099

2125

2100

/*

2126

/*

2101

* This is a helper for __call_rcu(), which invokes this when the normal

2127

* This is a helper for __call_rcu(), which invokes this when the normal

2102

* callback queue is inoperable. If this is not a no-CBs CPU, this

2128

* callback queue is inoperable. If this is not a no-CBs CPU, this

2103

* function returns failure back to __call_rcu(), which can complain

2129

* function returns failure back to __call_rcu(), which can complain

2104

* appropriately.

2130

* appropriately.

2105

*

2131

*

2106

* Otherwise, this function queues the callback where the corresponding

2132

* Otherwise, this function queues the callback where the corresponding

2107

* "rcuo" kthread can find it.

2133

* "rcuo" kthread can find it.

2108

*/

2134

*/

2109

static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,

2135

static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,

2110

bool lazy)

2136

bool lazy)

2111

{

2137

{

2112

2138

2113

if (!rcu_is_nocb_cpu(rdp->cpu))

2139

if (!rcu_is_nocb_cpu(rdp->cpu))

2114

return 0;

2140

return 0;

2115

__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);

2141

__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);

2116

if (__is_kfree_rcu_offset((unsigned long)rhp->func))

2142

if (__is_kfree_rcu_offset((unsigned long)rhp->func))

2117

trace_rcu_kfree_callback(rdp->rsp->name, rhp,

2143

trace_rcu_kfree_callback(rdp->rsp->name, rhp,

2118

(unsigned long)rhp->func,

2144

(unsigned long)rhp->func,

2119

rdp->qlen_lazy, rdp->qlen);

2145

rdp->qlen_lazy, rdp->qlen);

2120

else

2146

else

2121

trace_rcu_callback(rdp->rsp->name, rhp,

2147

trace_rcu_callback(rdp->rsp->name, rhp,

2122

rdp->qlen_lazy, rdp->qlen);

2148

rdp->qlen_lazy, rdp->qlen);

2123

return 1;

2149

return 1;

2124

}

2150

}

2125

2151

2126

/*

2152

/*

2127

* Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is

2153

* Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is

2128

* not a no-CBs CPU.

2154

* not a no-CBs CPU.

2129

*/

2155

*/

2130

static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,

2156

static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,

2131

struct rcu_data *rdp)

2157

struct rcu_data *rdp)

2132

{

2158

{

2133

long ql = rsp->qlen;

2159

long ql = rsp->qlen;

2134

long qll = rsp->qlen_lazy;

2160

long qll = rsp->qlen_lazy;

2135

2161

2136

/* If this is not a no-CBs CPU, tell the caller to do it the old way. */

2162

/* If this is not a no-CBs CPU, tell the caller to do it the old way. */

2137

if (!rcu_is_nocb_cpu(smp_processor_id()))

2163

if (!rcu_is_nocb_cpu(smp_processor_id()))

2138

return 0;

2164

return 0;

2139

rsp->qlen = 0;

2165

rsp->qlen = 0;

2140

rsp->qlen_lazy = 0;

2166

rsp->qlen_lazy = 0;

2141

2167

2142

/* First, enqueue the donelist, if any. This preserves CB ordering. */

2168

/* First, enqueue the donelist, if any. This preserves CB ordering. */

2143

if (rsp->orphan_donelist != NULL) {

2169

if (rsp->orphan_donelist != NULL) {

2144

__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,

2170

__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,

2145

rsp->orphan_donetail, ql, qll);

2171

rsp->orphan_donetail, ql, qll);

2146

ql = qll = 0;

2172

ql = qll = 0;

2147

rsp->orphan_donelist = NULL;

2173

rsp->orphan_donelist = NULL;

2148

rsp->orphan_donetail = &rsp->orphan_donelist;

2174

rsp->orphan_donetail = &rsp->orphan_donelist;

2149

}

2175

}

2150

if (rsp->orphan_nxtlist != NULL) {

2176

if (rsp->orphan_nxtlist != NULL) {

2151

__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,

2177

__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,

2152

rsp->orphan_nxttail, ql, qll);

2178

rsp->orphan_nxttail, ql, qll);

2153

ql = qll = 0;

2179

ql = qll = 0;

2154

rsp->orphan_nxtlist = NULL;

2180

rsp->orphan_nxtlist = NULL;

2155

rsp->orphan_nxttail = &rsp->orphan_nxtlist;

2181

rsp->orphan_nxttail = &rsp->orphan_nxtlist;

2156

}

2182

}

2157

return 1;

2183

return 1;

2158

}

2184

}

2159

2185

2160

/*

2186

/*

2161

* If necessary, kick off a new grace period, and either way wait

2187

* If necessary, kick off a new grace period, and either way wait

2162

* for a subsequent grace period to complete.

2188

* for a subsequent grace period to complete.

2163

*/

2189

*/

2164

static void rcu_nocb_wait_gp(struct rcu_data *rdp)

2190

static void rcu_nocb_wait_gp(struct rcu_data *rdp)

2165

{

2191

{

2166

unsigned long c;

2192

unsigned long c;

2167

bool d;

2193

bool d;

2168

unsigned long flags;

2194

unsigned long flags;

2169

struct rcu_node *rnp = rdp->mynode;

2195

struct rcu_node *rnp = rdp->mynode;

2170

2196

2171

raw_spin_lock_irqsave(&rnp->lock, flags);

2197

raw_spin_lock_irqsave(&rnp->lock, flags);

2172

c = rcu_start_future_gp(rnp, rdp);

2198

c = rcu_start_future_gp(rnp, rdp);

2173

raw_spin_unlock_irqrestore(&rnp->lock, flags);

2199

raw_spin_unlock_irqrestore(&rnp->lock, flags);

2174

2200

2175

/*

2201

/*

2176

* Wait for the grace period. Do so interruptibly to avoid messing

2202

* Wait for the grace period. Do so interruptibly to avoid messing

2177

* up the load average.

2203

* up the load average.

2178

*/

2204

*/

2179

trace_rcu_future_gp(rnp, rdp, c, "StartWait");

2205

trace_rcu_future_gp(rnp, rdp, c, "StartWait");

2180

for (;;) {

2206

for (;;) {

2181

wait_event_interruptible(

2207

wait_event_interruptible(

2182

rnp->nocb_gp_wq[c & 0x1],

2208

rnp->nocb_gp_wq[c & 0x1],

2183

(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));

2209

(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));

2184

if (likely(d))

2210

if (likely(d))

2185

break;

2211

break;

2186

flush_signals(current);

2212

flush_signals(current);

2187

trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");

2213

trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");

2188

}

2214

}

2189

trace_rcu_future_gp(rnp, rdp, c, "EndWait");

2215

trace_rcu_future_gp(rnp, rdp, c, "EndWait");

2190

smp_mb(); /* Ensure that CB invocation happens after GP end. */

2216

smp_mb(); /* Ensure that CB invocation happens after GP end. */

2191

}

2217

}

2192

2218

2193

/*

2219

/*

2194

* Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes

2220

* Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes

2195

* callbacks queued by the corresponding no-CBs CPU.

2221

* callbacks queued by the corresponding no-CBs CPU.

2196

*/

2222

*/

2197

static int rcu_nocb_kthread(void *arg)

2223

static int rcu_nocb_kthread(void *arg)

2198

{

2224

{

2199

int c, cl;

2225

int c, cl;

2200

struct rcu_head *list;

2226

struct rcu_head *list;

2201

struct rcu_head *next;

2227

struct rcu_head *next;

2202

struct rcu_head **tail;

2228

struct rcu_head **tail;

2203

struct rcu_data *rdp = arg;

2229

struct rcu_data *rdp = arg;

2204

2230

2205

/* Each pass through this loop invokes one batch of callbacks */

2231

/* Each pass through this loop invokes one batch of callbacks */

2206

for (;;) {

2232

for (;;) {

2207

/* If not polling, wait for next batch of callbacks. */

2233

/* If not polling, wait for next batch of callbacks. */

2208

if (!rcu_nocb_poll)

2234

if (!rcu_nocb_poll)

2209

wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);

2235

wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);

2210

list = ACCESS_ONCE(rdp->nocb_head);

2236

list = ACCESS_ONCE(rdp->nocb_head);

2211

if (!list) {

2237

if (!list) {

2212

schedule_timeout_interruptible(1);

2238

schedule_timeout_interruptible(1);

2213

flush_signals(current);

2239

flush_signals(current);

2214

continue;

2240

continue;

2215

}

2241

}

2216

2242

2217

/*

2243

/*

2218

* Extract queued callbacks, update counts, and wait

2244

* Extract queued callbacks, update counts, and wait

2219

* for a grace period to elapse.

2245

* for a grace period to elapse.

2220

*/

2246

*/

2221

ACCESS_ONCE(rdp->nocb_head) = NULL;

2247

ACCESS_ONCE(rdp->nocb_head) = NULL;

2222

tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);

2248

tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);

2223

c = atomic_long_xchg(&rdp->nocb_q_count, 0);

2249

c = atomic_long_xchg(&rdp->nocb_q_count, 0);

2224

cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);

2250

cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);

2225

ACCESS_ONCE(rdp->nocb_p_count) += c;

2251

ACCESS_ONCE(rdp->nocb_p_count) += c;

2226

ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;

2252

ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;

2227

rcu_nocb_wait_gp(rdp);

2253

rcu_nocb_wait_gp(rdp);

2228

2254

2229

/* Each pass through the following loop invokes a callback. */

2255

/* Each pass through the following loop invokes a callback. */

2230

trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);

2256

trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);

2231

c = cl = 0;

2257

c = cl = 0;

2232

while (list) {

2258

while (list) {

2233

next = list->next;

2259

next = list->next;

2234

/* Wait for enqueuing to complete, if needed. */

2260

/* Wait for enqueuing to complete, if needed. */

2235

while (next == NULL && &list->next != tail) {

2261

while (next == NULL && &list->next != tail) {

2236

schedule_timeout_interruptible(1);

2262

schedule_timeout_interruptible(1);

2237

next = list->next;

2263

next = list->next;

2238

}

2264

}

2239

debug_rcu_head_unqueue(list);

2265

debug_rcu_head_unqueue(list);

2240

local_bh_disable();

2266

local_bh_disable();

2241

if (__rcu_reclaim(rdp->rsp->name, list))

2267

if (__rcu_reclaim(rdp->rsp->name, list))

2242

cl++;

2268

cl++;

2243

c++;

2269

c++;

2244

local_bh_enable();

2270

local_bh_enable();

2245

list = next;

2271

list = next;

2246

}

2272

}

2247

trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);

2273

trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);

2248

ACCESS_ONCE(rdp->nocb_p_count) -= c;

2274

ACCESS_ONCE(rdp->nocb_p_count) -= c;

2249

ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;

2275

ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;

2250

rdp->n_nocbs_invoked += c;

2276

rdp->n_nocbs_invoked += c;

2251

}

2277

}

2252

return 0;

2278

return 0;

2253

}

2279

}

2254

2280

2255

/* Initialize per-rcu_data variables for no-CBs CPUs. */

2281

/* Initialize per-rcu_data variables for no-CBs CPUs. */

2256

static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)

2282

static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)

2257

{

2283

{

2258

rdp->nocb_tail = &rdp->nocb_head;

2284

rdp->nocb_tail = &rdp->nocb_head;

2259

init_waitqueue_head(&rdp->nocb_wq);

2285

init_waitqueue_head(&rdp->nocb_wq);

2260

}

2286

}

2261

2287

2262

/* Create a kthread for each RCU flavor for each no-CBs CPU. */

2288

/* Create a kthread for each RCU flavor for each no-CBs CPU. */

2263

static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)

2289

static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)

2264

{

2290

{

2265

int cpu;

2291

int cpu;

2266

struct rcu_data *rdp;

2292

struct rcu_data *rdp;

2267

struct task_struct *t;

2293

struct task_struct *t;

2268

2294

2269

if (rcu_nocb_mask == NULL)

2295

if (rcu_nocb_mask == NULL)

2270

return;

2296

return;

2271

for_each_cpu(cpu, rcu_nocb_mask) {

2297

for_each_cpu(cpu, rcu_nocb_mask) {

2272

rdp = per_cpu_ptr(rsp->rda, cpu);

2298

rdp = per_cpu_ptr(rsp->rda, cpu);

2273

t = kthread_run(rcu_nocb_kthread, rdp,

2299

t = kthread_run(rcu_nocb_kthread, rdp,

2274

"rcuo%c/%d", rsp->abbr, cpu);

2300

"rcuo%c/%d", rsp->abbr, cpu);

2275

BUG_ON(IS_ERR(t));

2301

BUG_ON(IS_ERR(t));

2276

ACCESS_ONCE(rdp->nocb_kthread) = t;

2302

ACCESS_ONCE(rdp->nocb_kthread) = t;

2277

}

2303

}

2278

}

2304

}

2279

2305

2280

/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */

2306

/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */

2281

static bool init_nocb_callback_list(struct rcu_data *rdp)

2307

static bool init_nocb_callback_list(struct rcu_data *rdp)

2282

{

2308

{

2283

if (rcu_nocb_mask == NULL ||

2309

if (rcu_nocb_mask == NULL ||

2284

!cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))

2310

!cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))

2285

return false;

2311

return false;

2286

rdp->nxttail[RCU_NEXT_TAIL] = NULL;

2312

rdp->nxttail[RCU_NEXT_TAIL] = NULL;

2287

return true;

2313

return true;

2288

}

2314

}

2289

2315

2290

#else /* #ifdef CONFIG_RCU_NOCB_CPU */

2316

#else /* #ifdef CONFIG_RCU_NOCB_CPU */

2291

2317

2292

static int rcu_nocb_needs_gp(struct rcu_state *rsp)

2318

static int rcu_nocb_needs_gp(struct rcu_state *rsp)

2293

{

2319

{

2294

return 0;

2320

return 0;

2295

}

2321

}

2296

2322

2297

static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)

2323

static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)

2298

{

2324

{

2299

}

2325

}

2300

2326

2301

static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)

2327

static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)

2302

{

2328

{

2303

}

2329

}

2304

2330

2305

static void rcu_init_one_nocb(struct rcu_node *rnp)

2331

static void rcu_init_one_nocb(struct rcu_node *rnp)

2306

{

2332

{

2307

}

2333

}

2308

2334

2309

static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,

2335

static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,

2310

bool lazy)

2336

bool lazy)

2311

{

2337

{

2312

return 0;

2338

return 0;

2313

}

2339

}

2314

2340

2315

static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,

2341

static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,

2316

struct rcu_data *rdp)

2342

struct rcu_data *rdp)

2317

{

2343

{

2318

return 0;

2344

return 0;

2319

}

2345

}

2320

2346

2321

static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)

2347

static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)

2322

{

2348

{

2323

}

2349

}

2324

2350

2325

static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)

2351

static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)

2326

{

2352

{

2327

}

2353

}

2328

2354

2329

static bool init_nocb_callback_list(struct rcu_data *rdp)

2355

static bool init_nocb_callback_list(struct rcu_data *rdp)

2330

{

2356

{

2331

return false;

2357

return false;

2332

}

2358

}

2333

2359

2334

#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

2360

#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

2335

2361

2336

/*

2362

/*

2337

* An adaptive-ticks CPU can potentially execute in kernel mode for an

2363

* An adaptive-ticks CPU can potentially execute in kernel mode for an

2338

* arbitrarily long period of time with the scheduling-clock tick turned

2364

* arbitrarily long period of time with the scheduling-clock tick turned

2339

* off. RCU will be paying attention to this CPU because it is in the

2365

* off. RCU will be paying attention to this CPU because it is in the

2340

* kernel, but the CPU cannot be guaranteed to be executing the RCU state

2366

* kernel, but the CPU cannot be guaranteed to be executing the RCU state

2341

* machine because the scheduling-clock tick has been disabled. Therefore,

2367

* machine because the scheduling-clock tick has been disabled. Therefore,

2342

* if an adaptive-ticks CPU is failing to respond to the current grace

2368

* if an adaptive-ticks CPU is failing to respond to the current grace

2343

* period and has not be idle from an RCU perspective, kick it.

2369

* period and has not be idle from an RCU perspective, kick it.

2344

*/

2370

*/

2345

static void rcu_kick_nohz_cpu(int cpu)

2371

static void rcu_kick_nohz_cpu(int cpu)

2346

{

2372

{

2347

#ifdef CONFIG_NO_HZ_FULL

2373

#ifdef CONFIG_NO_HZ_FULL

2348

if (tick_nohz_full_cpu(cpu))

2374

if (tick_nohz_full_cpu(cpu))

2349

smp_send_reschedule(cpu);

2375

smp_send_reschedule(cpu);

2350

#endif /* #ifdef CONFIG_NO_HZ_FULL */

2376

#endif /* #ifdef CONFIG_NO_HZ_FULL */

2351

}

2377

}

2352

2378

GITLAB

rcu: Shrink TINY_RCU by moving exit_rcu()

 /*
  * Read-Copy Update mechanism for mutual exclusion
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright IBM Corporation, 2001
  *
  * Author: Dipankar Sarma <dipankar@in.ibm.com>
  *
  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  * Papers:
  * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
  * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  *
  * For detailed explanation of Read-Copy Update mechanism see -
  *		http://lse.sourceforge.net/locking/rcupdate.html
  *
  */
 #ifndef __LINUX_RCUPDATE_H
 #define __LINUX_RCUPDATE_H
 #include <linux/types.h>
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
 #include <linux/completion.h>
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 extern void rcutorture_record_test_transition(void);
 extern void rcutorture_record_progress(unsigned long vernum);
 extern void do_trace_rcu_torture_read(char *rcutorturename,
 				      struct rcu_head *rhp,
 				      unsigned long secs,
 				      unsigned long c_old,
 				      unsigned long c);
 #else
 static inline void rcutorture_record_test_transition(void)
 {
 }
 static inline void rcutorture_record_progress(unsigned long vernum)
 {
 }
 #ifdef CONFIG_RCU_TRACE
 extern void do_trace_rcu_torture_read(char *rcutorturename,
 				      struct rcu_head *rhp,
 				      unsigned long secs,
 				      unsigned long c_old,
 				      unsigned long c);
 #else
 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
 	do { } while (0)
 #endif
 #endif
 #define UINT_CMP_GE(a, b)	(UINT_MAX / 2 >= (a) - (b))
 #define UINT_CMP_LT(a, b)	(UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)	(ULONG_MAX / 2 < (a) - (b))
 #define ulong2long(a)		(*(long *)(&(a)))
 /* Exported common interfaces */
 #ifdef CONFIG_PREEMPT_RCU
 /**
  * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual callback function to be invoked after the grace period
  *
  * The callback function will be invoked some time after a full grace
  * period elapses, in other words after all pre-existing RCU read-side
  * critical sections have completed.  However, the callback function
  * might well execute concurrently with RCU read-side critical sections
  * that started after call_rcu() was invoked.  RCU read-side critical
  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  * and may be nested.
  *
  * Note that all CPUs must agree that the grace period extended beyond
  * all pre-existing RCU read-side critical section.  On systems with more
  * than one CPU, this means that when "func()" is invoked, each CPU is
  * guaranteed to have executed a full memory barrier since the end of its
  * last RCU read-side critical section whose beginning preceded the call
  * to call_rcu().  It also means that each CPU executing an RCU read-side
  * critical section that continues beyond the start of "func()" must have
  * executed a memory barrier after the call_rcu() but before the beginning
  * of that RCU read-side critical section.  Note that these guarantees
  * include CPUs that are offline, idle, or executing in user mode, as
  * well as CPUs that are executing in the kernel.
  *
  * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
  * resulting RCU callback function "func()", then both CPU A and CPU B are
  * guaranteed to execute a full memory barrier during the time interval
  * between the call to call_rcu() and the invocation of "func()" -- even
  * if CPU A and CPU B are the same CPU (but again only if the system has
  * more than one CPU).
  */
 extern void call_rcu(struct rcu_head *head,
 			      void (*func)(struct rcu_head *head));
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 /* In classic RCU, call_rcu() is just call_rcu_sched(). */
 #define	call_rcu	call_rcu_sched
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 /**
  * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual callback function to be invoked after the grace period
  *
  * The callback function will be invoked some time after a full grace
  * period elapses, in other words after all currently executing RCU
  * read-side critical sections have completed. call_rcu_bh() assumes
  * that the read-side critical sections end on completion of a softirq
  * handler. This means that read-side critical sections in process
  * context must not be interrupted by softirqs. This interface is to be
  * used when most of the read-side critical sections are in softirq context.
  * RCU read-side critical sections are delimited by :
  *  - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context.
  *  OR
  *  - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
  *  These may be nested.
  *
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
  */
 extern void call_rcu_bh(struct rcu_head *head,
 			void (*func)(struct rcu_head *head));
 /**
  * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual callback function to be invoked after the grace period
  *
  * The callback function will be invoked some time after a full grace
  * period elapses, in other words after all currently executing RCU
  * read-side critical sections have completed. call_rcu_sched() assumes
  * that the read-side critical sections end on enabling of preemption
  * or on voluntary preemption.
  * RCU read-side critical sections are delimited by :
  *  - rcu_read_lock_sched() and  rcu_read_unlock_sched(),
  *  OR
  *  anything that disables preemption.
  *  These may be nested.
  *
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
  */
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *rcu));
 extern void synchronize_sched(void);
 #ifdef CONFIG_PREEMPT_RCU
 extern void __rcu_read_lock(void);
 extern void __rcu_read_unlock(void);
 extern void rcu_read_unlock_special(struct task_struct *t);
 void synchronize_rcu(void);
 /*
  * Defined as a macro as it is a very low level header included from
  * areas that don't even know about current.  This gives the rcu_read_lock()
  * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
  * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
  */
 #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 static inline void __rcu_read_lock(void)
 {
 	preempt_disable();
 }
 static inline void __rcu_read_unlock(void)
 {
 	preempt_enable();
 }
 static inline void synchronize_rcu(void)
 {
 	synchronize_sched();
 }
 static inline int rcu_preempt_depth(void)
 {
 	return 0;
 }
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 /* Internal to kernel */
 extern void rcu_init(void);
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern void rcu_check_callbacks(int cpu, int user);
 struct notifier_block;
 extern void rcu_idle_enter(void);
 extern void rcu_idle_exit(void);
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
 #ifdef CONFIG_RCU_USER_QS
 extern void rcu_user_enter(void);
 extern void rcu_user_exit(void);
 extern void rcu_user_enter_after_irq(void);
 extern void rcu_user_exit_after_irq(void);
 #else
 static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
 static inline void rcu_user_enter_after_irq(void) { }
 static inline void rcu_user_exit_after_irq(void) { }
 static inline void rcu_user_hooks_switch(struct task_struct *prev,
 					 struct task_struct *next) { }
 #endif /* CONFIG_RCU_USER_QS */
-extern void exit_rcu(void);
 /**
  * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
  * @a: Code that RCU needs to pay attention to.
  *
  * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
  * in the inner idle loop, that is, between the rcu_idle_enter() and
  * the rcu_idle_exit() -- RCU will happily ignore any such read-side
  * critical sections.  However, things like powertop need tracepoints
  * in the inner idle loop.
  *
  * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
  * will tell RCU that it needs to pay attending, invoke its argument
  * (in this example, a call to the do_something_with_RCU() function),
  * and then tell RCU to go back to ignoring this CPU.  It is permissible
  * to nest RCU_NONIDLE() wrappers, but the nesting level is currently
  * quite limited.  If deeper nesting is required, it will be necessary
  * to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
  */
 #define RCU_NONIDLE(a) \
 	do { \
 		rcu_irq_enter(); \
 		do { a; } while (0); \
 		rcu_irq_exit(); \
 	} while (0)
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
  */
 typedef void call_rcu_func_t(struct rcu_head *head,
 			     void (*func)(struct rcu_head *head));
 void wait_rcu_gp(call_rcu_func_t crf);
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
 #elif defined(CONFIG_TINY_RCU)
 #include <linux/rcutiny.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
 #endif
 /*
  * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
  * initialization and destruction of rcu_head on the stack. rcu_head structures
  * allocated dynamically in the heap or defined statically don't need any
  * initialization.
  */
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
 extern void init_rcu_head_on_stack(struct rcu_head *head);
 extern void destroy_rcu_head_on_stack(struct rcu_head *head);
 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 static inline void init_rcu_head_on_stack(struct rcu_head *head)
 {
 }
 static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
 {
 }
 #endif	/* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
 extern int rcu_is_cpu_idle(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
 bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 static inline bool rcu_lockdep_current_cpu_online(void)
 {
 	return 1;
 }
 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static inline void rcu_lock_acquire(struct lockdep_map *map)
 {
 	lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
 }
 static inline void rcu_lock_release(struct lockdep_map *map)
 {
 	lock_release(map, 1, _THIS_IP_);
 }
 extern struct lockdep_map rcu_lock_map;
 extern struct lockdep_map rcu_bh_lock_map;
 extern struct lockdep_map rcu_sched_lock_map;
 extern int debug_lockdep_rcu_enabled(void);
 /**
  * rcu_read_lock_held() - might we be in RCU read-side critical section?
  *
  * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
  * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
  * this assumes we are in an RCU read-side critical section unless it can
  * prove otherwise.  This is useful for debug checks in functions that
  * require that they be called within an RCU read-side critical section.
  *
  * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  *
  * Note that rcu_read_lock() and the matching rcu_read_unlock() must
  * occur in the same context, for example, it is illegal to invoke
  * rcu_read_unlock() in process context if the matching rcu_read_lock()
  * was invoked from within an irq handler.
  *
  * Note that rcu_read_lock() is disallowed if the CPU is either idle or
  * offline from an RCU perspective, so check for those as well.
  */
 static inline int rcu_read_lock_held(void)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
 	if (rcu_is_cpu_idle())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
 	return lock_is_held(&rcu_lock_map);
 }
 /*
  * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
  * hell.
  */
 extern int rcu_read_lock_bh_held(void);
 /**
  * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  *
  * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
  * RCU-sched read-side critical section.  In absence of
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.  Note that disabling
  * of preemption (including disabling irqs) counts as an RCU-sched
  * read-side critical section.  This is useful for debug checks in functions
  * that required that they be called within an RCU-sched read-side
  * critical section.
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  *
  * Note that if the CPU is in the idle loop from an RCU point of
  * view (ie: that we are in the section between rcu_idle_enter() and
  * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
  * did an rcu_read_lock().  The reason for this is that RCU ignores CPUs
  * that are in such a section, considering these as in extended quiescent
  * state, so such a CPU is effectively never in an RCU read-side critical
  * section regardless of what RCU primitives it invokes.  This state of
  * affairs is required --- we need to keep an RCU-free window in idle
  * where the CPU may possibly enter into low power mode. This way we can
  * notice an extended quiescent state to other CPUs that started a grace
  * period. Otherwise we would delay any grace period as long as we run in
  * the idle task.
  *
  * Similarly, we avoid claiming an SRCU read lock held if the current
  * CPU is offline.
  */
 #ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
 {
 	int lockdep_opinion = 0;
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
 	if (rcu_is_cpu_idle())
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
 	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT_COUNT */
 static inline int rcu_read_lock_sched_held(void)
 {
 	return 1;
 }
 #endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 # define rcu_lock_acquire(a)		do { } while (0)
 # define rcu_lock_release(a)		do { } while (0)
 static inline int rcu_read_lock_held(void)
 {
 	return 1;
 }
 static inline int rcu_read_lock_bh_held(void)
 {
 	return 1;
 }
 #ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
 {
 	return preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT_COUNT */
 static inline int rcu_read_lock_sched_held(void)
 {
 	return 1;
 }
 #endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 #ifdef CONFIG_PROVE_RCU
 extern int rcu_my_thread_group_empty(void);
 /**
  * rcu_lockdep_assert - emit lockdep splat if specified condition not met
  * @c: condition to check
  * @s: informative message
  */
 #define rcu_lockdep_assert(c, s)					\
 	do {								\
 		static bool __section(.data.unlikely) __warned;		\
 		if (debug_lockdep_rcu_enabled() && !__warned && !(c)) {	\
 			__warned = true;				\
 			lockdep_rcu_suspicious(__FILE__, __LINE__, s);	\
 		}							\
 	} while (0)
 #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU)
 static inline void rcu_preempt_sleep_check(void)
 {
 	rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
 			   "Illegal context switch in RCU read-side critical section");
 }
 #else /* #ifdef CONFIG_PROVE_RCU */
 static inline void rcu_preempt_sleep_check(void)
 {
 }
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 #define rcu_sleep_check()						\
 	do {								\
 		rcu_preempt_sleep_check();				\
 		rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),	\
 				   "Illegal context switch in RCU-bh"	\
 				   " read-side critical section");	\
 		rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),	\
 				   "Illegal context switch in RCU-sched"\
 				   " read-side critical section");	\
 	} while (0)
 #else /* #ifdef CONFIG_PROVE_RCU */
 #define rcu_lockdep_assert(c, s) do { } while (0)
 #define rcu_sleep_check() do { } while (0)
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 /*
  * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
  * and rcu_assign_pointer().  Some of these could be folded into their
  * callers, but they are left separate in order to ease introduction of
  * multiple flavors of pointers to match the multiple flavors of RCU
  * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
  * the future.
  */
 #ifdef __CHECKER__
 #define rcu_dereference_sparse(p, space) \
 	((void)(((typeof(*p) space *)p) == p))
 #else /* #ifdef __CHECKER__ */
 #define rcu_dereference_sparse(p, space)
 #endif /* #else #ifdef __CHECKER__ */
 #define __rcu_access_pointer(p, space) \
 	({ \
 		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
 		rcu_dereference_sparse(p, space); \
 		((typeof(*p) __force __kernel *)(_________p1)); \
 	})
 #define __rcu_dereference_check(p, c, space) \
 	({ \
 		typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
 		rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \
 				      " usage"); \
 		rcu_dereference_sparse(p, space); \
 		smp_read_barrier_depends(); \
 		((typeof(*p) __force __kernel *)(_________p1)); \
 	})
 #define __rcu_dereference_protected(p, c, space) \
 	({ \
 		rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \
 				      " usage"); \
 		rcu_dereference_sparse(p, space); \
 		((typeof(*p) __force __kernel *)(p)); \
 	})
 #define __rcu_access_index(p, space) \
 	({ \
 		typeof(p) _________p1 = ACCESS_ONCE(p); \
 		rcu_dereference_sparse(p, space); \
 		(_________p1); \
 	})
 #define __rcu_dereference_index_check(p, c) \
 	({ \
 		typeof(p) _________p1 = ACCESS_ONCE(p); \
 		rcu_lockdep_assert(c, \
 				   "suspicious rcu_dereference_index_check()" \
 				   " usage"); \
 		smp_read_barrier_depends(); \
 		(_________p1); \
 	})
 #define __rcu_assign_pointer(p, v, space) \
 	do { \
 		smp_wmb(); \
 		(p) = (typeof(*v) __force space *)(v); \
 	} while (0)
 /**
  * rcu_access_pointer() - fetch RCU pointer with no dereferencing
  * @p: The pointer to read
  *
  * Return the value of the specified RCU-protected pointer, but omit the
  * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
  * when the value of this pointer is accessed, but the pointer is not
  * dereferenced, for example, when testing an RCU-protected pointer against
  * NULL.  Although rcu_access_pointer() may also be used in cases where
  * update-side locks prevent the value of the pointer from changing, you
  * should instead use rcu_dereference_protected() for this use case.
  *
  * It is also permissible to use rcu_access_pointer() when read-side
  * access to the pointer was removed at least one grace period ago, as
  * is the case in the context of the RCU callback that is freeing up
  * the data, or after a synchronize_rcu() returns.  This can be useful
  * when tearing down multi-linked structures after a grace period
  * has elapsed.
  */
 #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
 /**
  * rcu_dereference_check() - rcu_dereference with debug checking
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * Do an rcu_dereference(), but check that the conditions under which the
  * dereference will take place are correct.  Typically the conditions
  * indicate the various locking conditions that should be held at that
  * point.  The check should return true if the conditions are satisfied.
  * An implicit check for being in an RCU read-side critical section
  * (rcu_read_lock()) is included.
  *
  * For example:
  *
  *	bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
  *
  * could be used to indicate to lockdep that foo->bar may only be dereferenced
  * if either rcu_read_lock() is held, or that the lock required to replace
  * the bar struct at foo->bar is held.
  *
  * Note that the list of conditions may also include indications of when a lock
  * need not be held, for example during initialisation or destruction of the
  * target struct:
  *
  *	bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
  *					      atomic_read(&foo->usage) == 0);
  *
  * Inserts memory barriers on architectures that require them
  * (currently only the Alpha), prevents the compiler from refetching
  * (and from merging fetches), and, more importantly, documents exactly
  * which pointers are protected by RCU and checks that the pointer is
  * annotated as __rcu.
  */
 #define rcu_dereference_check(p, c) \
 	__rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
 /**
  * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * This is the RCU-bh counterpart to rcu_dereference_check().
  */
 #define rcu_dereference_bh_check(p, c) \
 	__rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
 /**
  * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * This is the RCU-sched counterpart to rcu_dereference_check().
  */
 #define rcu_dereference_sched_check(p, c) \
 	__rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
 				__rcu)
 #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
 /*
  * The tracing infrastructure traces RCU (we want that), but unfortunately
  * some of the RCU checks causes tracing to lock up the system.
  *
  * The tracing version of rcu_dereference_raw() must not call
  * rcu_read_lock_held().
  */
 #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
 /**
  * rcu_access_index() - fetch RCU index with no dereferencing
  * @p: The index to read
  *
  * Return the value of the specified RCU-protected index, but omit the
  * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
  * when the value of this index is accessed, but the index is not
  * dereferenced, for example, when testing an RCU-protected index against
  * -1.  Although rcu_access_index() may also be used in cases where
  * update-side locks prevent the value of the index from changing, you
  * should instead use rcu_dereference_index_protected() for this use case.
  */
 #define rcu_access_index(p) __rcu_access_index((p), __rcu)
 /**
  * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * Similar to rcu_dereference_check(), but omits the sparse checking.
  * This allows rcu_dereference_index_check() to be used on integers,
  * which can then be used as array indices.  Attempting to use
  * rcu_dereference_check() on an integer will give compiler warnings
  * because the sparse address-space mechanism relies on dereferencing
  * the RCU-protected pointer.  Dereferencing integers is not something
  * that even gcc will put up with.
  *
  * Note that this function does not implicitly check for RCU read-side
  * critical sections.  If this function gains lots of uses, it might
  * make sense to provide versions for each flavor of RCU, but it does
  * not make sense as of early 2010.
  */
 #define rcu_dereference_index_check(p, c) \
 	__rcu_dereference_index_check((p), (c))
 /**
  * rcu_dereference_protected() - fetch RCU pointer when updates prevented
  * @p: The pointer to read, prior to dereferencing
  * @c: The conditions under which the dereference will take place
  *
  * Return the value of the specified RCU-protected pointer, but omit
  * both the smp_read_barrier_depends() and the ACCESS_ONCE().  This
  * is useful in cases where update-side locks prevent the value of the
  * pointer from changing.  Please note that this primitive does -not-
  * prevent the compiler from repeating this reference or combining it
  * with other references, so it should not be used without protection
  * of appropriate locks.
  *
  * This function is only for update-side use.  Using this function
  * when protected only by rcu_read_lock() will result in infrequent
  * but very ugly failures.
  */
 #define rcu_dereference_protected(p, c) \
 	__rcu_dereference_protected((p), (c), __rcu)
 /**
  * rcu_dereference() - fetch RCU-protected pointer for dereferencing
  * @p: The pointer to read, prior to dereferencing
  *
  * This is a simple wrapper around rcu_dereference_check().
  */
 #define rcu_dereference(p) rcu_dereference_check(p, 0)
 /**
  * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
  * @p: The pointer to read, prior to dereferencing
  *
  * Makes rcu_dereference_check() do the dirty work.
  */
 #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
 /**
  * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
  * @p: The pointer to read, prior to dereferencing
  *
  * Makes rcu_dereference_check() do the dirty work.
  */
 #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
 /**
  * rcu_read_lock() - mark the beginning of an RCU read-side critical section
  *
  * When synchronize_rcu() is invoked on one CPU while other CPUs
  * are within RCU read-side critical sections, then the
  * synchronize_rcu() is guaranteed to block until after all the other
  * CPUs exit their critical sections.  Similarly, if call_rcu() is invoked
  * on one CPU while other CPUs are within RCU read-side critical
  * sections, invocation of the corresponding RCU callback is deferred
  * until after the all the other CPUs exit their critical sections.
  *
  * Note, however, that RCU callbacks are permitted to run concurrently
  * with new RCU read-side critical sections.  One way that this can happen
  * is via the following sequence of events: (1) CPU 0 enters an RCU
  * read-side critical section, (2) CPU 1 invokes call_rcu() to register
  * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
  * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
  * callback is invoked.  This is legal, because the RCU read-side critical
  * section that was running concurrently with the call_rcu() (and which
  * therefore might be referencing something that the corresponding RCU
  * callback would free up) has completed before the corresponding
  * RCU callback is invoked.
  *
  * RCU read-side critical sections may be nested.  Any deferred actions
  * will be deferred until the outermost RCU read-side critical section
  * completes.
  *
  * You can avoid reading and understanding the next paragraph by
  * following this rule: don't put anything in an rcu_read_lock() RCU
  * read-side critical section that would block in a !PREEMPT kernel.
  * But if you want the full story, read on!
  *
  * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
  * is illegal to block while in an RCU read-side critical section.  In
  * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
  * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
  * be preempted, but explicit blocking is illegal.  Finally, in preemptible
  * RCU implementations in real-time (with -rt patchset) kernel builds,
  * RCU read-side critical sections may be preempted and they may also
  * block, but only when acquiring spinlocks that are subject to priority
  * inheritance.
  */
 static inline void rcu_read_lock(void)
 {
 	__rcu_read_lock();
 	__acquire(RCU);
 	rcu_lock_acquire(&rcu_lock_map);
 	rcu_lockdep_assert(!rcu_is_cpu_idle(),
 			   "rcu_read_lock() used illegally while idle");
 }
 /*
  * So where is rcu_write_lock()?  It does not exist, as there is no
  * way for writers to lock out RCU readers.  This is a feature, not
  * a bug -- this property is what provides RCU's performance benefits.
  * Of course, writers must coordinate with each other.  The normal
  * spinlock primitives work well for this, but any other technique may be
  * used as well.  RCU does not care how the writers keep out of each
  * others' way, as long as they do so.
  */
 /**
  * rcu_read_unlock() - marks the end of an RCU read-side critical section.
  *
  * See rcu_read_lock() for more information.
  */
 static inline void rcu_read_unlock(void)
 {
 	rcu_lockdep_assert(!rcu_is_cpu_idle(),
 			   "rcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&rcu_lock_map);
 	__release(RCU);
 	__rcu_read_unlock();
 }
 /**
  * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
  *
  * This is equivalent of rcu_read_lock(), but to be used when updates
  * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
  * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
  * softirq handler to be a quiescent state, a process in RCU read-side
  * critical section must be protected by disabling softirqs. Read-side
  * critical sections in interrupt context can use just rcu_read_lock(),
  * though this should at least be commented to avoid confusing people
  * reading the code.
  *
  * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
  * must occur in the same context, for example, it is illegal to invoke
  * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
  * was invoked from some other task.
  */
 static inline void rcu_read_lock_bh(void)
 {
 	local_bh_disable();
 	__acquire(RCU_BH);
 	rcu_lock_acquire(&rcu_bh_lock_map);
 	rcu_lockdep_assert(!rcu_is_cpu_idle(),
 			   "rcu_read_lock_bh() used illegally while idle");
 }
 /*
  * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
  *
  * See rcu_read_lock_bh() for more information.
  */
 static inline void rcu_read_unlock_bh(void)
 {
 	rcu_lockdep_assert(!rcu_is_cpu_idle(),
 			   "rcu_read_unlock_bh() used illegally while idle");
 	rcu_lock_release(&rcu_bh_lock_map);
 	__release(RCU_BH);
 	local_bh_enable();
 }
 /**
  * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
  *
  * This is equivalent of rcu_read_lock(), but to be used when updates
  * are being done using call_rcu_sched() or synchronize_rcu_sched().
  * Read-side critical sections can also be introduced by anything that
  * disables preemption, including local_irq_disable() and friends.
  *
  * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
  * must occur in the same context, for example, it is illegal to invoke
  * rcu_read_unlock_sched() from process context if the matching
  * rcu_read_lock_sched() was invoked from an NMI handler.
  */
 static inline void rcu_read_lock_sched(void)
 {
 	preempt_disable();
 	__acquire(RCU_SCHED);
 	rcu_lock_acquire(&rcu_sched_lock_map);
 	rcu_lockdep_assert(!rcu_is_cpu_idle(),
 			   "rcu_read_lock_sched() used illegally while idle");
 }
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
 static inline notrace void rcu_read_lock_sched_notrace(void)
 {
 	preempt_disable_notrace();
 	__acquire(RCU_SCHED);
 }
 /*
  * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
  *
  * See rcu_read_lock_sched for more information.
  */
 static inline void rcu_read_unlock_sched(void)
 {
 	rcu_lockdep_assert(!rcu_is_cpu_idle(),
 			   "rcu_read_unlock_sched() used illegally while idle");
 	rcu_lock_release(&rcu_sched_lock_map);
 	__release(RCU_SCHED);
 	preempt_enable();
 }
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
 static inline notrace void rcu_read_unlock_sched_notrace(void)
 {
 	__release(RCU_SCHED);
 	preempt_enable_notrace();
 }
 /**
  * rcu_assign_pointer() - assign to RCU-protected pointer
  * @p: pointer to assign to
  * @v: value to assign (publish)
  *
  * Assigns the specified value to the specified RCU-protected
  * pointer, ensuring that any concurrent RCU readers will see
  * any prior initialization.
  *
  * Inserts memory barriers on architectures that require them
  * (which is most of them), and also prevents the compiler from
  * reordering the code that initializes the structure after the pointer
  * assignment.  More importantly, this call documents which pointers
  * will be dereferenced by RCU read-side code.
  *
  * In some special cases, you may use RCU_INIT_POINTER() instead
  * of rcu_assign_pointer().  RCU_INIT_POINTER() is a bit faster due
  * to the fact that it does not constrain either the CPU or the compiler.
  * That said, using RCU_INIT_POINTER() when you should have used
  * rcu_assign_pointer() is a very bad thing that results in
  * impossible-to-diagnose memory corruption.  So please be careful.
  * See the RCU_INIT_POINTER() comment header for details.
  */
 #define rcu_assign_pointer(p, v) \
 	__rcu_assign_pointer((p), (v), __rcu)
 /**
  * RCU_INIT_POINTER() - initialize an RCU protected pointer
  *
  * Initialize an RCU-protected pointer in special cases where readers
  * do not need ordering constraints on the CPU or the compiler.  These
  * special cases are:
  *
  * 1.	This use of RCU_INIT_POINTER() is NULLing out the pointer -or-
  * 2.	The caller has taken whatever steps are required to prevent
  *	RCU readers from concurrently accessing this pointer -or-
  * 3.	The referenced data structure has already been exposed to
  *	readers either at compile time or via rcu_assign_pointer() -and-
  *	a.	You have not made -any- reader-visible changes to
  *		this structure since then -or-
  *	b.	It is OK for readers accessing this structure from its
  *		new location to see the old state of the structure.  (For
  *		example, the changes were to statistical counters or to
  *		other state where exact synchronization is not required.)
  *
  * Failure to follow these rules governing use of RCU_INIT_POINTER() will
  * result in impossible-to-diagnose memory corruption.  As in the structures
  * will look OK in crash dumps, but any concurrent RCU readers might
  * see pre-initialized values of the referenced data structure.  So
  * please be very careful how you use RCU_INIT_POINTER()!!!
  *
  * If you are creating an RCU-protected linked structure that is accessed
  * by a single external-to-structure RCU-protected pointer, then you may
  * use RCU_INIT_POINTER() to initialize the internal RCU-protected
  * pointers, but you must use rcu_assign_pointer() to initialize the
  * external-to-structure pointer -after- you have completely initialized
  * the reader-accessible portions of the linked structure.
  */
 #define RCU_INIT_POINTER(p, v) \
 	do { \
 		p = (typeof(*v) __force __rcu *)(v); \
 	} while (0)
 /**
  * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
  *
  * GCC-style initialization for an RCU-protected pointer in a structure field.
  */
 #define RCU_POINTER_INITIALIZER(p, v) \
 		.p = (typeof(*v) __force __rcu *)(v)
 /*
  * Does the specified offset indicate that the corresponding rcu_head
  * structure can be handled by kfree_rcu()?
  */
 #define __is_kfree_rcu_offset(offset) ((offset) < 4096)
 /*
  * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
  */
 #define __kfree_rcu(head, offset) \
 	do { \
 		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
 		kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
 	} while (0)
 /**
  * kfree_rcu() - kfree an object after a grace period.
  * @ptr:	pointer to kfree
  * @rcu_head:	the name of the struct rcu_head within the type of @ptr.
  *
  * Many rcu callbacks functions just call kfree() on the base structure.
  * These functions are trivial, but their size adds up, and furthermore
  * when they are used in a kernel module, that module must invoke the
  * high-latency rcu_barrier() function at module-unload time.
  *
  * The kfree_rcu() function handles this issue.  Rather than encoding a
  * function address in the embedded rcu_head structure, kfree_rcu() instead
  * encodes the offset of the rcu_head structure within the base structure.
  * Because the functions are not allowed in the low-order 4096 bytes of
  * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
  * If the offset is larger than 4095 bytes, a compile-time error will
  * be generated in __kfree_rcu().  If this error is triggered, you can
  * either fall back to use of call_rcu() or rearrange the structure to
  * position the rcu_head structure into the first 4096 bytes.
  *
  * Note that the allowable offset might decrease in the future, for example,
  * to allow something like kmem_cache_free_rcu().
  *
  * The BUILD_BUG_ON check must not involve any function calls, hence the
  * checks are done in macros here.
  */
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 #ifdef CONFIG_RCU_NOCB_CPU
 extern bool rcu_is_nocb_cpu(int cpu);
 #else
 static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 #endif /* __LINUX_RCUPDATE_H */

 /*
  * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright IBM Corporation, 2008
  *
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
  *		Documentation/RCU
  */
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 #include <linux/cache.h>
 static inline void rcu_barrier_bh(void)
 {
 	wait_rcu_gp(call_rcu_bh);
 }
 static inline void rcu_barrier_sched(void)
 {
 	wait_rcu_gp(call_rcu_sched);
 }
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
 }
 static inline void rcu_barrier(void)
 {
 	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
 }
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_sched();
 }
 static inline void synchronize_rcu_bh_expedited(void)
 {
 	synchronize_sched();
 }
 static inline void synchronize_sched_expedited(void)
 {
 	synchronize_sched();
 }
 static inline void kfree_call_rcu(struct rcu_head *head,
 				  void (*func)(struct rcu_head *rcu))
 {
 	call_rcu(head, func);
 }
 static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
 	return 0;
 }
 static inline void rcu_note_context_switch(int cpu)
 {
 	rcu_sched_qs(cpu);
 }
 /*
  * Take advantage of the fact that there is only one CPU, which
  * allows us to ignore virtualization-based context switches.
  */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
 }
 /*
  * Return the number of grace periods.
  */
 static inline long rcu_batches_completed(void)
 {
 	return 0;
 }
 /*
  * Return the number of bottom-half grace periods.
  */
 static inline long rcu_batches_completed_bh(void)
 {
 	return 0;
 }
 static inline void rcu_force_quiescent_state(void)
 {
 }
 static inline void rcu_bh_force_quiescent_state(void)
 {
 }
 static inline void rcu_sched_force_quiescent_state(void)
 {
 }
 static inline void rcu_cpu_stall_reset(void)
 {
 }
+static inline void exit_rcu(void)
+{
+}
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern int rcu_scheduler_active __read_mostly;
 extern void rcu_scheduler_starting(void);
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 static inline void rcu_scheduler_starting(void)
 {
 }
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 #endif /* __LINUX_RCUTINY_H */

 /*
  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright IBM Corporation, 2008
  *
  * Author: Dipankar Sarma <dipankar@in.ibm.com>
  *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm
  *
  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  *
  * For detailed explanation of Read-Copy Update mechanism see -
  *	Documentation/RCU
  */
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
 extern void rcu_cpu_stall_reset(void);
 /*
  * Note a virtualization-based context switch.  This is simply a
  * wrapper around rcu_note_context_switch(), which allows TINY_RCU
  * to save a few bytes.
  */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
 	rcu_note_context_switch(cpu);
 }
 extern void synchronize_rcu_bh(void);
 extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
 void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 /**
  * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
  *
  * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
  * approach to force the grace period to end quickly.  This consumes
  * significant time on all CPUs and is unfriendly to real-time workloads,
  * so is thus not recommended for any sort of common-case code.  In fact,
  * if you are using synchronize_rcu_bh_expedited() in a loop, please
  * restructure your code to batch your updates, and then use a single
  * synchronize_rcu_bh() instead.
  *
  * Note that it is illegal to call this function while holding any lock
  * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
  * to call this function from a CPU-hotplug notifier.  Failing to observe
  * these restriction will result in deadlock.
  */
 static inline void synchronize_rcu_bh_expedited(void)
 {
 	synchronize_sched_expedited();
 }
 extern void rcu_barrier(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
 extern unsigned long rcutorture_testseq;
 extern unsigned long rcutorture_vernum;
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
 extern void rcu_force_quiescent_state(void);
 extern void rcu_bh_force_quiescent_state(void);
 extern void rcu_sched_force_quiescent_state(void);
+extern void exit_rcu(void);
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 #endif /* __LINUX_RCUTREE_H */

 /*
  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  * Internal non-public definitions that provide either classic
  * or preemptible semantics.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  *
  * Copyright Red Hat, 2009
  * Copyright IBM Corporation, 2009
  *
  * Author: Ingo Molnar <mingo@elte.hu>
  *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 #include <linux/delay.h>
 #include <linux/gfp.h>
 #include <linux/oom.h>
 #include <linux/smpboot.h>
 #include <linux/tick.h>
 #define RCU_KTHREAD_PRIO 1
 #ifdef CONFIG_RCU_BOOST
 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
 #else
 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
 #endif
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
 static bool have_rcu_nocb_mask;	    /* Was rcu_nocb_mask allocated? */
 static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
 static char __initdata nocb_buf[NR_CPUS * 5];
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 /*
  * Check the RCU kernel configuration parameters and print informative
  * messages about anything out of the ordinary.  If you like #ifdef, you
  * will love this function.
  */
 static void __init rcu_bootup_announce_oddness(void)
 {
 #ifdef CONFIG_RCU_TRACE
 	pr_info("\tRCU debugfs-based tracing is enabled.\n");
 #endif
 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
 	pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
 	       CONFIG_RCU_FANOUT);
 #endif
 #ifdef CONFIG_RCU_FANOUT_EXACT
 	pr_info("\tHierarchical RCU autobalancing is disabled.\n");
 #endif
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
 #endif
 #ifdef CONFIG_PROVE_RCU
 	pr_info("\tRCU lockdep checking is enabled.\n");
 #endif
 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
 	pr_info("\tRCU torture testing starts during boot.\n");
 #endif
 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
 	pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");
 #endif
 #if defined(CONFIG_RCU_CPU_STALL_INFO)
 	pr_info("\tAdditional per-CPU info printed with stalls.\n");
 #endif
 #if NUM_RCU_LVL_4 != 0
 	pr_info("\tFour-level hierarchy is enabled.\n");
 #endif
 	if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
 		pr_info("\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 	if (nr_cpu_ids != NR_CPUS)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
 #ifdef CONFIG_RCU_NOCB_CPU
 #ifndef CONFIG_RCU_NOCB_CPU_NONE
 	if (!have_rcu_nocb_mask) {
 		zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
 		have_rcu_nocb_mask = true;
 	}
 #ifdef CONFIG_RCU_NOCB_CPU_ZERO
 	pr_info("\tExperimental no-CBs CPU 0\n");
 	cpumask_set_cpu(0, rcu_nocb_mask);
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
 #ifdef CONFIG_RCU_NOCB_CPU_ALL
 	pr_info("\tExperimental no-CBs for all CPUs\n");
 	cpumask_setall(rcu_nocb_mask);
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
 	if (have_rcu_nocb_mask) {
 		cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
 		pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
 		if (rcu_nocb_poll)
 			pr_info("\tExperimental polled no-CBs CPUs.\n");
 	}
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 }
 #ifdef CONFIG_TREE_PREEMPT_RCU
 struct rcu_state rcu_preempt_state =
 	RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
 static struct rcu_state *rcu_state = &rcu_preempt_state;
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
 /*
  * Tell them what RCU they are running.
  */
 static void __init rcu_bootup_announce(void)
 {
 	pr_info("Preemptible hierarchical RCU implementation.\n");
 	rcu_bootup_announce_oddness();
 }
 /*
  * Return the number of RCU-preempt batches processed thus far
  * for debug and statistics.
  */
 long rcu_batches_completed_preempt(void)
 {
 	return rcu_preempt_state.completed;
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
 /*
  * Return the number of RCU batches processed thus far for debug & stats.
  */
 long rcu_batches_completed(void)
 {
 	return rcu_batches_completed_preempt();
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 /*
  * Force a quiescent state for preemptible RCU.
  */
 void rcu_force_quiescent_state(void)
 {
 	force_quiescent_state(&rcu_preempt_state);
 }
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 /*
  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
  *
  * Unlike the other rcu_*_qs() functions, callers to this function
  * must disable irqs in order to protect the assignment to
  * ->rcu_read_unlock_special.
  */
 static void rcu_preempt_qs(int cpu)
 {
 	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
 	if (rdp->passed_quiesce == 0)
 		trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
 	rdp->passed_quiesce = 1;
 	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 }
 /*
  * We have entered the scheduler, and the current task might soon be
  * context-switched away from.  If this task is in an RCU read-side
  * critical section, we will no longer be able to rely on the CPU to
  * record that fact, so we enqueue the task on the blkd_tasks list.
  * The task will dequeue itself when it exits the outermost enclosing
  * RCU read-side critical section.  Therefore, the current grace period
  * cannot be permitted to complete until the blkd_tasks list entries
  * predating the current grace period drain, in other words, until
  * rnp->gp_tasks becomes NULL.
  *
  * Caller must disable preemption.
  */
 static void rcu_preempt_note_context_switch(int cpu)
 {
 	struct task_struct *t = current;
 	unsigned long flags;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	if (t->rcu_read_lock_nesting > 0 &&
 	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 		/* Possibly blocking in an RCU read-side critical section. */
 		rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
 		rnp = rdp->mynode;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 		t->rcu_blocked_node = rnp;
 		/*
 		 * If this CPU has already checked in, then this task
 		 * will hold up the next grace period rather than the
 		 * current grace period.  Queue the task accordingly.
 		 * If the task is queued for the current grace period
 		 * (i.e., this CPU has not yet passed through a quiescent
 		 * state for the current grace period), then as long
 		 * as that task remains queued, the current grace period
 		 * cannot end.  Note that there is some uncertainty as
 		 * to exactly when the current grace period started.
 		 * We take a conservative approach, which can result
 		 * in unnecessarily waiting on tasks that started very
 		 * slightly after the current grace period began.  C'est
 		 * la vie!!!
 		 *
 		 * But first, note that the current CPU must still be
 		 * on line!
 		 */
 		WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
 		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
 		if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
 			list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
 			rnp->gp_tasks = &t->rcu_node_entry;
 #ifdef CONFIG_RCU_BOOST
 			if (rnp->boost_tasks != NULL)
 				rnp->boost_tasks = rnp->gp_tasks;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 		} else {
 			list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
 			if (rnp->qsmask & rdp->grpmask)
 				rnp->gp_tasks = &t->rcu_node_entry;
 		}
 		trace_rcu_preempt_task(rdp->rsp->name,
 				       t->pid,
 				       (rnp->qsmask & rdp->grpmask)
 				       ? rnp->gpnum
 				       : rnp->gpnum + 1);
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	} else if (t->rcu_read_lock_nesting < 0 &&
 		   t->rcu_read_unlock_special) {
 		/*
 		 * Complete exit from RCU read-side critical section on
 		 * behalf of preempted instance of __rcu_read_unlock().
 		 */
 		rcu_read_unlock_special(t);
 	}
 	/*
 	 * Either we were not in an RCU read-side critical section to
 	 * begin with, or we have now recorded that critical section
 	 * globally.  Either way, we can now note a quiescent state
 	 * for this CPU.  Again, if we were in an RCU read-side critical
 	 * section, and if that critical section was blocking the current
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
 	local_irq_save(flags);
 	rcu_preempt_qs(cpu);
 	local_irq_restore(flags);
 }
 /*
  * Check for preempted RCU readers blocking the current grace period
  * for the specified rcu_node structure.  If the caller needs a reliable
  * answer, it must hold the rcu_node's ->lock.
  */
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 {
 	return rnp->gp_tasks != NULL;
 }
 /*
  * Record a quiescent state for all tasks that were previously queued
  * on the specified rcu_node structure and that were blocking the current
  * RCU grace period.  The caller must hold the specified rnp->lock with
  * irqs disabled, and this lock is released upon return, but irqs remain
  * disabled.
  */
 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 	__releases(rnp->lock)
 {
 	unsigned long mask;
 	struct rcu_node *rnp_p;
 	if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return;  /* Still need more quiescent states! */
 	}
 	rnp_p = rnp->parent;
 	if (rnp_p == NULL) {
 		/*
 		 * Either there is only one rcu_node in the tree,
 		 * or tasks were kicked up to root rcu_node due to
 		 * CPUs going offline.
 		 */
 		rcu_report_qs_rsp(&rcu_preempt_state, flags);
 		return;
 	}
 	/* Report up the rest of the hierarchy. */
 	mask = rnp->grpmask;
 	raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
 	raw_spin_lock(&rnp_p->lock);	/* irqs already disabled. */
 	rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
 }
 /*
  * Advance a ->blkd_tasks-list pointer to the next entry, instead
  * returning NULL if at the end of the list.
  */
 static struct list_head *rcu_next_node_entry(struct task_struct *t,
 					     struct rcu_node *rnp)
 {
 	struct list_head *np;
 	np = t->rcu_node_entry.next;
 	if (np == &rnp->blkd_tasks)
 		np = NULL;
 	return np;
 }
 /*
  * Handle special cases during rcu_read_unlock(), such as needing to
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
 void rcu_read_unlock_special(struct task_struct *t)
 {
 	int empty;
 	int empty_exp;
 	int empty_exp_now;
 	unsigned long flags;
 	struct list_head *np;
 #ifdef CONFIG_RCU_BOOST
 	struct rt_mutex *rbmp = NULL;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 	struct rcu_node *rnp;
 	int special;
 	/* NMI handlers cannot block and cannot safely manipulate state. */
 	if (in_nmi())
 		return;
 	local_irq_save(flags);
 	/*
 	 * If RCU core is waiting for this CPU to exit critical section,
 	 * let it know that we have done so.
 	 */
 	special = t->rcu_read_unlock_special;
 	if (special & RCU_READ_UNLOCK_NEED_QS) {
 		rcu_preempt_qs(smp_processor_id());
 	}
 	/* Hardware IRQ handlers cannot block. */
 	if (in_irq() || in_serving_softirq()) {
 		local_irq_restore(flags);
 		return;
 	}
 	/* Clean up if blocked during RCU read-side critical section. */
 	if (special & RCU_READ_UNLOCK_BLOCKED) {
 		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 		/*
 		 * Remove this task from the list it blocked on.  The
 		 * task can migrate while we acquire the lock, but at
 		 * most one time.  So at most two passes through loop.
 		 */
 		for (;;) {
 			rnp = t->rcu_blocked_node;
 			raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
 			if (rnp == t->rcu_blocked_node)
 				break;
 			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 		}
 		empty = !rcu_preempt_blocked_readers_cgp(rnp);
 		empty_exp = !rcu_preempted_readers_exp(rnp);
 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
 		np = rcu_next_node_entry(t, rnp);
 		list_del_init(&t->rcu_node_entry);
 		t->rcu_blocked_node = NULL;
 		trace_rcu_unlock_preempted_task("rcu_preempt",
 						rnp->gpnum, t->pid);
 		if (&t->rcu_node_entry == rnp->gp_tasks)
 			rnp->gp_tasks = np;
 		if (&t->rcu_node_entry == rnp->exp_tasks)
 			rnp->exp_tasks = np;
 #ifdef CONFIG_RCU_BOOST
 		if (&t->rcu_node_entry == rnp->boost_tasks)
 			rnp->boost_tasks = np;
 		/* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
 		if (t->rcu_boost_mutex) {
 			rbmp = t->rcu_boost_mutex;
 			t->rcu_boost_mutex = NULL;
 		}
 #endif /* #ifdef CONFIG_RCU_BOOST */
 		/*
 		 * If this was the last task on the current list, and if
 		 * we aren't waiting on any CPUs, report the quiescent state.
 		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
 		 * so we must take a snapshot of the expedited state.
 		 */
 		empty_exp_now = !rcu_preempted_readers_exp(rnp);
 		if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
 			trace_rcu_quiescent_state_report("preempt_rcu",
 							 rnp->gpnum,
 							 0, rnp->qsmask,
 							 rnp->level,
 							 rnp->grplo,
 							 rnp->grphi,
 							 !!rnp->gp_tasks);
 			rcu_report_unblock_qs_rnp(rnp, flags);
 		} else {
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		}
 #ifdef CONFIG_RCU_BOOST
 		/* Unboost if we were boosted. */
 		if (rbmp)
 			rt_mutex_unlock(rbmp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
 		/*
 		 * If this was the last task on the expedited lists,
 		 * then we need to report up the rcu_node hierarchy.
 		 */
 		if (!empty_exp && empty_exp_now)
 			rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
 	} else {
 		local_irq_restore(flags);
 	}
 }
 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
 /*
  * Dump detailed information for all tasks blocking the current RCU
  * grace period on the specified rcu_node structure.
  */
 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
 {
 	unsigned long flags;
 	struct task_struct *t;
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	if (!rcu_preempt_blocked_readers_cgp(rnp)) {
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return;
 	}
 	t = list_entry(rnp->gp_tasks,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
 		sched_show_task(t);
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 /*
  * Dump detailed information for all tasks blocking the current RCU
  * grace period.
  */
 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 {
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	rcu_print_detail_task_stall_rnp(rnp);
 	rcu_for_each_leaf_node(rsp, rnp)
 		rcu_print_detail_task_stall_rnp(rnp);
 }
 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 {
 }
 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
 #ifdef CONFIG_RCU_CPU_STALL_INFO
 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
 {
 	pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
 	       rnp->level, rnp->grplo, rnp->grphi);
 }
 static void rcu_print_task_stall_end(void)
 {
 	pr_cont("\n");
 }
 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
 {
 }
 static void rcu_print_task_stall_end(void)
 {
 }
 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
 /*
  * Scan the current list of tasks blocked within RCU read-side critical
  * sections, printing out the tid of each.
  */
 static int rcu_print_task_stall(struct rcu_node *rnp)
 {
 	struct task_struct *t;
 	int ndetected = 0;
 	if (!rcu_preempt_blocked_readers_cgp(rnp))
 		return 0;
 	rcu_print_task_stall_begin(rnp);
 	t = list_entry(rnp->gp_tasks,
 		       struct task_struct, rcu_node_entry);
 	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
 		pr_cont(" P%d", t->pid);
 		ndetected++;
 	}
 	rcu_print_task_stall_end();
 	return ndetected;
 }
 /*
  * Check that the list of blocked tasks for the newly completed grace
  * period is in fact empty.  It is a serious bug to complete a grace
  * period that still has RCU readers blocked!  This function must be
  * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
  * must be held by the caller.
  *
  * Also, if there are blocked tasks on the list, they automatically
  * block the newly created grace period, so set up ->gp_tasks accordingly.
  */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
 	WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
 	if (!list_empty(&rnp->blkd_tasks))
 		rnp->gp_tasks = rnp->blkd_tasks.next;
 	WARN_ON_ONCE(rnp->qsmask);
 }
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Handle tasklist migration for case in which all CPUs covered by the
  * specified rcu_node have gone offline.  Move them up to the root
  * rcu_node.  The reason for not just moving them to the immediate
  * parent is to remove the need for rcu_read_unlock_special() to
  * make more than two attempts to acquire the target rcu_node's lock.
  * Returns true if there were tasks blocking the current RCU grace
  * period.
  *
  * Returns 1 if there was previously a task blocking the current grace
  * period on the specified rcu_node structure.
  *
  * The caller must hold rnp->lock with irqs disabled.
  */
 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 				     struct rcu_node *rnp,
 				     struct rcu_data *rdp)
 {
 	struct list_head *lp;
 	struct list_head *lp_root;
 	int retval = 0;
 	struct rcu_node *rnp_root = rcu_get_root(rsp);
 	struct task_struct *t;
 	if (rnp == rnp_root) {
 		WARN_ONCE(1, "Last CPU thought to be offlined?");
 		return 0;  /* Shouldn't happen: at least one CPU online. */
 	}
 	/* If we are on an internal node, complain bitterly. */
 	WARN_ON_ONCE(rnp != rdp->mynode);
 	/*
 	 * Move tasks up to root rcu_node.  Don't try to get fancy for
 	 * this corner-case operation -- just put this node's tasks
 	 * at the head of the root node's list, and update the root node's
 	 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
 	 * if non-NULL.  This might result in waiting for more tasks than
 	 * absolutely necessary, but this is a good performance/complexity
 	 * tradeoff.
 	 */
 	if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
 		retval |= RCU_OFL_TASKS_NORM_GP;
 	if (rcu_preempted_readers_exp(rnp))
 		retval |= RCU_OFL_TASKS_EXP_GP;
 	lp = &rnp->blkd_tasks;
 	lp_root = &rnp_root->blkd_tasks;
 	while (!list_empty(lp)) {
 		t = list_entry(lp->next, typeof(*t), rcu_node_entry);
 		raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
 		list_del(&t->rcu_node_entry);
 		t->rcu_blocked_node = rnp_root;
 		list_add(&t->rcu_node_entry, lp_root);
 		if (&t->rcu_node_entry == rnp->gp_tasks)
 			rnp_root->gp_tasks = rnp->gp_tasks;
 		if (&t->rcu_node_entry == rnp->exp_tasks)
 			rnp_root->exp_tasks = rnp->exp_tasks;
 #ifdef CONFIG_RCU_BOOST
 		if (&t->rcu_node_entry == rnp->boost_tasks)
 			rnp_root->boost_tasks = rnp->boost_tasks;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 		raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
 	}
 	rnp->gp_tasks = NULL;
 	rnp->exp_tasks = NULL;
 #ifdef CONFIG_RCU_BOOST
 	rnp->boost_tasks = NULL;
 	/*
 	 * In case root is being boosted and leaf was not.  Make sure
 	 * that we boost the tasks blocking the current grace period
 	 * in this case.
 	 */
 	raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
 	if (rnp_root->boost_tasks != NULL &&
 	    rnp_root->boost_tasks != rnp_root->gp_tasks &&
 	    rnp_root->boost_tasks != rnp_root->exp_tasks)
 		rnp_root->boost_tasks = rnp_root->gp_tasks;
 	raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
 #endif /* #ifdef CONFIG_RCU_BOOST */
 	return retval;
 }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 /*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the corresponding CPU's rcu_node structure,
  * which is checked elsewhere.
  *
  * Caller must disable hard irqs.
  */
 static void rcu_preempt_check_callbacks(int cpu)
 {
 	struct task_struct *t = current;
 	if (t->rcu_read_lock_nesting == 0) {
 		rcu_preempt_qs(cpu);
 		return;
 	}
 	if (t->rcu_read_lock_nesting > 0 &&
 	    per_cpu(rcu_preempt_data, cpu).qs_pending)
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 #ifdef CONFIG_RCU_BOOST
 static void rcu_preempt_do_callbacks(void)
 {
 	rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
 }
 #endif /* #ifdef CONFIG_RCU_BOOST */
 /*
  * Queue a preemptible-RCU callback for invocation after a grace period.
  */
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_preempt_state, -1, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
  * but this change will require some way of tagging the lazy RCU
  * callbacks in the list of pending callbacks.  Until then, this
  * function may only be called from __kfree_rcu().
  */
 void kfree_call_rcu(struct rcu_head *head,
 		    void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_preempt_state, -1, 1);
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 /**
  * synchronize_rcu - wait until a grace period has elapsed.
  *
  * Control will return to the caller some time after a full grace
  * period has elapsed, in other words after all currently executing RCU
  * read-side critical sections have completed.  Note, however, that
  * upon return from synchronize_rcu(), the caller might well be executing
  * concurrently with new RCU read-side critical sections that began while
  * synchronize_rcu() was waiting.  RCU read-side critical sections are
  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
  *
  * See the description of synchronize_sched() for more detailed information
  * on memory ordering guarantees.
  */
 void synchronize_rcu(void)
 {
 	rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
 			   !lock_is_held(&rcu_lock_map) &&
 			   !lock_is_held(&rcu_sched_lock_map),
 			   "Illegal synchronize_rcu() in RCU read-side critical section");
 	if (!rcu_scheduler_active)
 		return;
 	if (rcu_expedited)
 		synchronize_rcu_expedited();
 	else
 		wait_rcu_gp(call_rcu);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 static unsigned long sync_rcu_preempt_exp_count;
 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 /*
  * Return non-zero if there are any tasks in RCU read-side critical
  * sections blocking the current preemptible-RCU expedited grace period.
  * If there is no preemptible-RCU expedited grace period currently in
  * progress, returns zero unconditionally.
  */
 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
 {
 	return rnp->exp_tasks != NULL;
 }
 /*
  * return non-zero if there is no RCU expedited grace period in progress
  * for the specified rcu_node structure, in other words, if all CPUs and
  * tasks covered by the specified rcu_node structure have done their bit
  * for the current expedited grace period.  Works only for preemptible
  * RCU -- other RCU implementation use other means.
  *
  * Caller must hold sync_rcu_preempt_exp_mutex.
  */
 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
 	return !rcu_preempted_readers_exp(rnp) &&
 	       ACCESS_ONCE(rnp->expmask) == 0;
 }
 /*
  * Report the exit from RCU read-side critical section for the last task
  * that queued itself during or before the current expedited preemptible-RCU
  * grace period.  This event is reported either to the rcu_node structure on
  * which the task was queued or to one of that rcu_node structure's ancestors,
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
  * Most callers will set the "wake" flag, but the task initiating the
  * expedited grace period need not wake itself.
  *
  * Caller must hold sync_rcu_preempt_exp_mutex.
  */
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake)
 {
 	unsigned long flags;
 	unsigned long mask;
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	for (;;) {
 		if (!sync_rcu_preempt_exp_done(rnp)) {
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 			break;
 		}
 		if (rnp->parent == NULL) {
 			raw_spin_unlock_irqrestore(&rnp->lock, flags);
 			if (wake)
 				wake_up(&sync_rcu_preempt_exp_wq);
 			break;
 		}
 		mask = rnp->grpmask;
 		raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
 		rnp = rnp->parent;
 		raw_spin_lock(&rnp->lock); /* irqs already disabled */
 		rnp->expmask &= ~mask;
 	}
 }
 /*
  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
  * grace period for the specified rcu_node structure.  If there are no such
  * tasks, report it up the rcu_node hierarchy.
  *
  * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
  * CPU hotplug operations.
  */
 static void
 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 {
 	unsigned long flags;
 	int must_wait = 0;
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	if (list_empty(&rnp->blkd_tasks)) {
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	} else {
 		rnp->exp_tasks = rnp->blkd_tasks.next;
 		rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
 		must_wait = 1;
 	}
 	if (!must_wait)
 		rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
 }
 /**
  * synchronize_rcu_expedited - Brute-force RCU grace period
  *
  * Wait for an RCU-preempt grace period, but expedite it.  The basic
  * idea is to invoke synchronize_sched_expedited() to push all the tasks to
  * the ->blkd_tasks lists and wait for this list to drain.  This consumes
  * significant time on all CPUs and is unfriendly to real-time workloads,
  * so is thus not recommended for any sort of common-case code.
  * In fact, if you are using synchronize_rcu_expedited() in a loop,
  * please restructure your code to batch your updates, and then Use a
  * single synchronize_rcu() instead.
  *
  * Note that it is illegal to call this function while holding any lock
  * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
  * to call this function from a CPU-hotplug notifier.  Failing to observe
  * these restriction will result in deadlock.
  */
 void synchronize_rcu_expedited(void)
 {
 	unsigned long flags;
 	struct rcu_node *rnp;
 	struct rcu_state *rsp = &rcu_preempt_state;
 	unsigned long snap;
 	int trycount = 0;
 	smp_mb(); /* Caller's modifications seen first by other CPUs. */
 	snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
 	smp_mb(); /* Above access cannot bleed into critical section. */
 	/*
 	 * Block CPU-hotplug operations.  This means that any CPU-hotplug
 	 * operation that finds an rcu_node structure with tasks in the
 	 * process of being boosted will know that all tasks blocking
 	 * this expedited grace period will already be in the process of
 	 * being boosted.  This simplifies the process of moving tasks
 	 * from leaf to root rcu_node structures.
 	 */
 	get_online_cpus();
 	/*
 	 * Acquire lock, falling back to synchronize_rcu() if too many
 	 * lock-acquisition failures.  Of course, if someone does the
 	 * expedited grace period for us, just leave.
 	 */
 	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
 		if (ULONG_CMP_LT(snap,
 		    ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
 			put_online_cpus();
 			goto mb_ret; /* Others did our work for us. */
 		}
 		if (trycount++ < 10) {
 			udelay(trycount * num_online_cpus());
 		} else {
 			put_online_cpus();
 			wait_rcu_gp(call_rcu);
 			return;
 		}
 	}
 	if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
 		put_online_cpus();
 		goto unlock_mb_ret; /* Others did our work for us. */
 	}
 	/* force all RCU readers onto ->blkd_tasks lists. */
 	synchronize_sched_expedited();
 	/* Initialize ->expmask for all non-leaf rcu_node structures. */
 	rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		rnp->expmask = rnp->qsmaskinit;
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	}
 	/* Snapshot current state of ->blkd_tasks lists. */
 	rcu_for_each_leaf_node(rsp, rnp)
 		sync_rcu_preempt_exp_init(rsp, rnp);
 	if (NUM_RCU_NODES > 1)
 		sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
 	put_online_cpus();
 	/* Wait for snapshotted ->blkd_tasks lists to drain. */
 	rnp = rcu_get_root(rsp);
 	wait_event(sync_rcu_preempt_exp_wq,
 		   sync_rcu_preempt_exp_done(rnp));
 	/* Clean up and exit. */
 	smp_mb(); /* ensure expedited GP seen before counter increment. */
 	ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
 unlock_mb_ret:
 	mutex_unlock(&sync_rcu_preempt_exp_mutex);
 mb_ret:
 	smp_mb(); /* ensure subsequent action seen after grace period. */
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 /**
  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
  *
  * Note that this primitive does not necessarily wait for an RCU grace period
  * to complete.  For example, if there are no RCU callbacks queued anywhere
  * in the system, then rcu_barrier() is within its rights to return
  * immediately, without waiting for anything, much less an RCU grace period.
  */
 void rcu_barrier(void)
 {
 	_rcu_barrier(&rcu_preempt_state);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 /*
  * Initialize preemptible RCU's state structures.
  */
 static void __init __rcu_init_preempt(void)
 {
 	rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
 }
+/*
+ * Check for a task exiting while in a preemptible-RCU read-side
+ * critical section, clean up if so.  No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+	struct task_struct *t = current;
+	if (likely(list_empty(&current->rcu_node_entry)))
+		return;
+	t->rcu_read_lock_nesting = 1;
+	barrier();
+	t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+	__rcu_read_unlock();
+}
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 static struct rcu_state *rcu_state = &rcu_sched_state;
 /*
  * Tell them what RCU they are running.
  */
 static void __init rcu_bootup_announce(void)
 {
 	pr_info("Hierarchical RCU implementation.\n");
 	rcu_bootup_announce_oddness();
 }
 /*
  * Return the number of RCU batches processed thus far for debug & stats.
  */
 long rcu_batches_completed(void)
 {
 	return rcu_batches_completed_sched();
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 /*
  * Force a quiescent state for RCU, which, because there is no preemptible
  * RCU, becomes the same as rcu-sched.
  */
 void rcu_force_quiescent_state(void)
 {
 	rcu_sched_force_quiescent_state();
 }
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
 static void rcu_preempt_note_context_switch(int cpu)
 {
 }
 /*
  * Because preemptible RCU does not exist, there are never any preempted
  * RCU readers.
  */
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 {
 	return 0;
 }
 #ifdef CONFIG_HOTPLUG_CPU
 /* Because preemptible RCU does not exist, no quieting of tasks. */
 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 {
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
  */
 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
 {
 }
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
  */
 static int rcu_print_task_stall(struct rcu_node *rnp)
 {
 	return 0;
 }
 /*
  * Because there is no preemptible RCU, there can be no readers blocked,
  * so there is no need to check for blocked tasks.  So check only for
  * bogus qsmask values.
  */
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
 	WARN_ON_ONCE(rnp->qsmask);
 }
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Because preemptible RCU does not exist, it never needs to migrate
  * tasks that were blocked within RCU read-side critical sections, and
  * such non-existent tasks cannot possibly have been blocking the current
  * grace period.
  */
 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 				     struct rcu_node *rnp,
 				     struct rcu_data *rdp)
 {
 	return 0;
 }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
  */
 static void rcu_preempt_check_callbacks(int cpu)
 {
 }
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
  * but this change will require some way of tagging the lazy RCU
  * callbacks in the list of pending callbacks.  Until then, this
  * function may only be called from __kfree_rcu().
  *
  * Because there is no preemptible RCU, we use RCU-sched instead.
  */
 void kfree_call_rcu(struct rcu_head *head,
 		    void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_sched_state, -1, 1);
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 /*
  * Wait for an rcu-preempt grace period, but make it happen quickly.
  * But because preemptible RCU does not exist, map to rcu-sched.
  */
 void synchronize_rcu_expedited(void)
 {
 	synchronize_sched_expedited();
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Because preemptible RCU does not exist, there is never any need to
  * report on tasks preempted in RCU read-side critical sections during
  * expedited RCU grace periods.
  */
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake)
 {
 }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 /*
  * Because preemptible RCU does not exist, rcu_barrier() is just
  * another name for rcu_barrier_sched().
  */
 void rcu_barrier(void)
 {
 	rcu_barrier_sched();
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 /*
  * Because preemptible RCU does not exist, it need not be initialized.
  */
 static void __init __rcu_init_preempt(void)
+{
+}
+/*
+ * Because preemptible RCU does not exist, tasks cannot possibly exit
+ * while in preemptible RCU read-side critical sections.
+ */
+void exit_rcu(void)
 {
 }
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 #ifdef CONFIG_RCU_BOOST
 #include "rtmutex_common.h"
 #ifdef CONFIG_RCU_TRACE
 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
 {
 	if (list_empty(&rnp->blkd_tasks))
 		rnp->n_balk_blkd_tasks++;
 	else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
 		rnp->n_balk_exp_gp_tasks++;
 	else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
 		rnp->n_balk_boost_tasks++;
 	else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
 		rnp->n_balk_notblocked++;
 	else if (rnp->gp_tasks != NULL &&
 		 ULONG_CMP_LT(jiffies, rnp->boost_time))
 		rnp->n_balk_notyet++;
 	else
 		rnp->n_balk_nos++;
 }
 #else /* #ifdef CONFIG_RCU_TRACE */
 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
 {
 }
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 static void rcu_wake_cond(struct task_struct *t, int status)
 {
 	/*
 	 * If the thread is yielding, only wake it when this
 	 * is invoked from idle
 	 */
 	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
 		wake_up_process(t);
 }
 /*
  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  * or ->boost_tasks, advancing the pointer to the next task in the
  * ->blkd_tasks list.
  *
  * Note that irqs must be enabled: boosting the task can block.
  * Returns 1 if there are more tasks needing to be boosted.
  */
 static int rcu_boost(struct rcu_node *rnp)
 {
 	unsigned long flags;
 	struct rt_mutex mtx;
 	struct task_struct *t;
 	struct list_head *tb;
 	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
 		return 0;  /* Nothing left to boost. */
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	/*
 	 * Recheck under the lock: all tasks in need of boosting
 	 * might exit their RCU read-side critical sections on their own.
 	 */
 	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return 0;
 	}
 	/*
 	 * Preferentially boost tasks blocking expedited grace periods.
 	 * This cannot starve the normal grace periods because a second
 	 * expedited grace period must boost all blocked tasks, including
 	 * those blocking the pre-existing normal grace period.
 	 */
 	if (rnp->exp_tasks != NULL) {
 		tb = rnp->exp_tasks;
 		rnp->n_exp_boosts++;
 	} else {
 		tb = rnp->boost_tasks;
 		rnp->n_normal_boosts++;
 	}
 	rnp->n_tasks_boosted++;
 	/*
 	 * We boost task t by manufacturing an rt_mutex that appears to
 	 * be held by task t.  We leave a pointer to that rt_mutex where
 	 * task t can find it, and task t will release the mutex when it
 	 * exits its outermost RCU read-side critical section.  Then
 	 * simply acquiring this artificial rt_mutex will boost task
 	 * t's priority.  (Thanks to tglx for suggesting this approach!)
 	 *
 	 * Note that task t must acquire rnp->lock to remove itself from
 	 * the ->blkd_tasks list, which it will do from exit() if from
 	 * nowhere else.  We therefore are guaranteed that task t will
 	 * stay around at least until we drop rnp->lock.  Note that
 	 * rnp->lock also resolves races between our priority boosting
 	 * and task t's exiting its outermost RCU read-side critical
 	 * section.
 	 */
 	t = container_of(tb, struct task_struct, rcu_node_entry);
 	rt_mutex_init_proxy_locked(&mtx, t);
 	t->rcu_boost_mutex = &mtx;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	rt_mutex_lock(&mtx);  /* Side effect: boosts task t's priority. */
 	rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
 	return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
 	       ACCESS_ONCE(rnp->boost_tasks) != NULL;
 }
 /*
  * Priority-boosting kthread.  One per leaf rcu_node and one for the
  * root rcu_node.
  */
 static int rcu_boost_kthread(void *arg)
 {
 	struct rcu_node *rnp = (struct rcu_node *)arg;
 	int spincnt = 0;
 	int more2boost;
 	trace_rcu_utilization("Start boost kthread@init");
 	for (;;) {
 		rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
 		trace_rcu_utilization("End boost kthread@rcu_wait");
 		rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
 		trace_rcu_utilization("Start boost kthread@rcu_wait");
 		rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
 		more2boost = rcu_boost(rnp);
 		if (more2boost)
 			spincnt++;
 		else
 			spincnt = 0;
 		if (spincnt > 10) {
 			rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
 			trace_rcu_utilization("End boost kthread@rcu_yield");
 			schedule_timeout_interruptible(2);
 			trace_rcu_utilization("Start boost kthread@rcu_yield");
 			spincnt = 0;
 		}
 	}
 	/* NOTREACHED */
 	trace_rcu_utilization("End boost kthread@notreached");
 	return 0;
 }
 /*
  * Check to see if it is time to start boosting RCU readers that are
  * blocking the current grace period, and, if so, tell the per-rcu_node
  * kthread to start boosting them.  If there is an expedited grace
  * period in progress, it is always time to boost.
  *
  * The caller must hold rnp->lock, which this function releases.
  * The ->boost_kthread_task is immortal, so we don't need to worry
  * about it going away.
  */
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 {
 	struct task_struct *t;
 	if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
 		rnp->n_balk_exp_gp_tasks++;
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		return;
 	}
 	if (rnp->exp_tasks != NULL ||
 	    (rnp->gp_tasks != NULL &&
 	     rnp->boost_tasks == NULL &&
 	     rnp->qsmask == 0 &&
 	     ULONG_CMP_GE(jiffies, rnp->boost_time))) {
 		if (rnp->exp_tasks == NULL)
 			rnp->boost_tasks = rnp->gp_tasks;
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 		t = rnp->boost_kthread_task;
 		if (t)
 			rcu_wake_cond(t, rnp->boost_kthread_status);
 	} else {
 		rcu_initiate_boost_trace(rnp);
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	}
 }
 /*
  * Wake up the per-CPU kthread to invoke RCU callbacks.
  */
 static void invoke_rcu_callbacks_kthread(void)
 {
 	unsigned long flags;
 	local_irq_save(flags);
 	__this_cpu_write(rcu_cpu_has_work, 1);
 	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
 	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
 		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
 			      __this_cpu_read(rcu_cpu_kthread_status));
 	}
 	local_irq_restore(flags);
 }
 /*
  * Is the current CPU running the RCU-callbacks kthread?
  * Caller must have preemption disabled.
  */
 static bool rcu_is_callbacks_kthread(void)
 {
 	return __get_cpu_var(rcu_cpu_kthread_task) == current;
 }
 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
 /*
  * Do priority-boost accounting for the start of a new grace period.
  */
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
 {
 	rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
 }
 /*
  * Create an RCU-boost kthread for the specified node if one does not
  * already exist.  We only create this kthread for preemptible RCU.
  * Returns zero if all is well, a negated errno otherwise.
  */
 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 						 struct rcu_node *rnp)
 {
 	int rnp_index = rnp - &rsp->node[0];
 	unsigned long flags;
 	struct sched_param sp;
 	struct task_struct *t;
 	if (&rcu_preempt_state != rsp)
 		return 0;
 	if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
 		return 0;
 	rsp->boost = 1;
 	if (rnp->boost_kthread_task != NULL)
 		return 0;
 	t = kthread_create(rcu_boost_kthread, (void *)rnp,
 			   "rcub/%d", rnp_index);
 	if (IS_ERR(t))
 		return PTR_ERR(t);
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	rnp->boost_kthread_task = t;
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	sp.sched_priority = RCU_BOOST_PRIO;
 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
 	return 0;
 }
 static void rcu_kthread_do_work(void)
 {
 	rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
 	rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
 	rcu_preempt_do_callbacks();
 }
 static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
 	struct sched_param sp;
 	sp.sched_priority = RCU_KTHREAD_PRIO;
 	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 }
 static void rcu_cpu_kthread_park(unsigned int cpu)
 {
 	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
 }
 static int rcu_cpu_kthread_should_run(unsigned int cpu)
 {
 	return __get_cpu_var(rcu_cpu_has_work);
 }
 /*
  * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
  * RCU softirq used in flavors and configurations of RCU that do not
  * support RCU priority boosting.
  */
 static void rcu_cpu_kthread(unsigned int cpu)
 {
 	unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
 	char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
 	int spincnt;
 	for (spincnt = 0; spincnt < 10; spincnt++) {
 		trace_rcu_utilization("Start CPU kthread@rcu_wait");
 		local_bh_disable();
 		*statusp = RCU_KTHREAD_RUNNING;
 		this_cpu_inc(rcu_cpu_kthread_loops);
 		local_irq_disable();
 		work = *workp;
 		*workp = 0;
 		local_irq_enable();
 		if (work)
 			rcu_kthread_do_work();
 		local_bh_enable();
 		if (*workp == 0) {
 			trace_rcu_utilization("End CPU kthread@rcu_wait");
 			*statusp = RCU_KTHREAD_WAITING;
 			return;
 		}
 	}
 	*statusp = RCU_KTHREAD_YIELDING;
 	trace_rcu_utilization("Start CPU kthread@rcu_yield");
 	schedule_timeout_interruptible(2);
 	trace_rcu_utilization("End CPU kthread@rcu_yield");
 	*statusp = RCU_KTHREAD_WAITING;
 }
 /*
  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  * served by the rcu_node in question.  The CPU hotplug lock is still
  * held, so the value of rnp->qsmaskinit will be stable.
  *
  * We don't include outgoingcpu in the affinity set, use -1 if there is
  * no outgoing CPU.  If there are no CPUs left in the affinity set,
  * this function allows the kthread to execute on any CPU.
  */
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 	struct task_struct *t = rnp->boost_kthread_task;
 	unsigned long mask = rnp->qsmaskinit;
 	cpumask_var_t cm;
 	int cpu;
 	if (!t)
 		return;
 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
 		return;
 	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
 		if ((mask & 0x1) && cpu != outgoingcpu)
 			cpumask_set_cpu(cpu, cm);
 	if (cpumask_weight(cm) == 0) {
 		cpumask_setall(cm);
 		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
 			cpumask_clear_cpu(cpu, cm);
 		WARN_ON_ONCE(cpumask_weight(cm) == 0);
 	}
 	set_cpus_allowed_ptr(t, cm);
 	free_cpumask_var(cm);
 }
 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
 	.store			= &rcu_cpu_kthread_task,
 	.thread_should_run	= rcu_cpu_kthread_should_run,
 	.thread_fn		= rcu_cpu_kthread,
 	.thread_comm		= "rcuc/%u",
 	.setup			= rcu_cpu_kthread_setup,
 	.park			= rcu_cpu_kthread_park,
 };
 /*
  * Spawn all kthreads -- called as soon as the scheduler is running.
  */
 static int __init rcu_spawn_kthreads(void)
 {
 	struct rcu_node *rnp;
 	int cpu;
 	rcu_scheduler_fully_active = 1;
 	for_each_possible_cpu(cpu)
 		per_cpu(rcu_cpu_has_work, cpu) = 0;
 	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
 	rnp = rcu_get_root(rcu_state);
 	(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
 	if (NUM_RCU_NODES > 1) {
 		rcu_for_each_leaf_node(rcu_state, rnp)
 			(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
 	}
 	return 0;
 }
 early_initcall(rcu_spawn_kthreads);
 static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
 	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
 	struct rcu_node *rnp = rdp->mynode;
 	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
 	if (rcu_scheduler_fully_active)
 		(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
 }
 #else /* #ifdef CONFIG_RCU_BOOST */
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 {
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 static void invoke_rcu_callbacks_kthread(void)
 {
 	WARN_ON_ONCE(1);
 }
 static bool rcu_is_callbacks_kthread(void)
 {
 	return false;
 }
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
 {
 }
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 }
 static int __init rcu_scheduler_really_started(void)
 {
 	rcu_scheduler_fully_active = 1;
 	return 0;
 }
 early_initcall(rcu_scheduler_really_started);
 static void __cpuinit rcu_prepare_kthreads(int cpu)
 {
 }
 #endif /* #else #ifdef CONFIG_RCU_BOOST */
 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 /*
  * Check to see if any future RCU-related work will need to be done
  * by the current CPU, even if none need be done immediately, returning
  * 1 if so.  This function is part of the RCU implementation; it is -not-
  * an exported member of the RCU API.
  *
  * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
  * any flavor of RCU.
  */
 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
 	return rcu_cpu_has_callbacks(cpu, NULL);
 }
 /*
  * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  * after it.
  */
 static void rcu_cleanup_after_idle(int cpu)
 {
 }
 /*
  * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
  * is nothing.
  */
 static void rcu_prepare_for_idle(int cpu)
 {
 }
 /*
  * Don't bother keeping a running count of the number of RCU callbacks
  * posted because CONFIG_RCU_FAST_NO_HZ=n.
  */
 static void rcu_idle_count_callbacks_posted(void)
 {
 }
 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 /*
  * This code is invoked when a CPU goes idle, at which point we want
  * to have the CPU do everything required for RCU so that it can enter
  * the energy-efficient dyntick-idle mode.  This is handled by a
  * state machine implemented by rcu_prepare_for_idle() below.
  *
  * The following three proprocessor symbols control this state machine:
  *
  * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
  *	to sleep in dyntick-idle mode with RCU callbacks pending.  This
  *	is sized to be roughly one RCU grace period.  Those energy-efficiency
  *	benchmarkers who might otherwise be tempted to set this to a large
  *	number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
  *	system.  And if you are -that- concerned about energy efficiency,
  *	just power the system down and be done with it!
  * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
  *	permitted to sleep in dyntick-idle mode with only lazy RCU
  *	callbacks pending.  Setting this too high can OOM your system.
  *
  * The values below work well in practice.  If future workloads require
  * adjustment, they can be converted into kernel config parameters, though
  * making the state machine smarter might be a better option.
  */
 #define RCU_IDLE_GP_DELAY 4		/* Roughly one grace period. */
 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */
 static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
 module_param(rcu_idle_gp_delay, int, 0644);
 static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
 module_param(rcu_idle_lazy_gp_delay, int, 0644);
 extern int tick_nohz_enabled;
 /*
  * Try to advance callbacks for all flavors of RCU on the current CPU.
  * Afterwards, if there are any callbacks ready for immediate invocation,
  * return true.
  */
 static bool rcu_try_advance_all_cbs(void)
 {
 	bool cbs_ready = false;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	struct rcu_state *rsp;
 	for_each_rcu_flavor(rsp) {
 		rdp = this_cpu_ptr(rsp->rda);
 		rnp = rdp->mynode;
 		/*
 		 * Don't bother checking unless a grace period has
 		 * completed since we last checked and there are
 		 * callbacks not yet ready to invoke.
 		 */
 		if (rdp->completed != rnp->completed &&
 		    rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
 			rcu_process_gp_end(rsp, rdp);
 		if (cpu_has_callbacks_ready_to_invoke(rdp))
 			cbs_ready = true;
 	}
 	return cbs_ready;
 }
 /*
  * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
  * to invoke.  If the CPU has callbacks, try to advance them.  Tell the
  * caller to set the timeout based on whether or not there are non-lazy
  * callbacks.
  *
  * The caller must have disabled interrupts.
  */
 int rcu_needs_cpu(int cpu, unsigned long *dj)
 {
 	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 	/* Snapshot to detect later posting of non-lazy callback. */
 	rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
 	/* If no callbacks, RCU doesn't need the CPU. */
 	if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
 		*dj = ULONG_MAX;
 		return 0;
 	}
 	/* Attempt to advance callbacks. */
 	if (rcu_try_advance_all_cbs()) {
 		/* Some ready to invoke, so initiate later invocation. */
 		invoke_rcu_core();
 		return 1;
 	}
 	rdtp->last_accelerate = jiffies;
 	/* Request timer delay depending on laziness, and round. */
 	if (!rdtp->all_lazy) {
 		*dj = round_up(rcu_idle_gp_delay + jiffies,
 			       rcu_idle_gp_delay) - jiffies;
 	} else {
 		*dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
 	}
 	return 0;
 }
 /*
  * Prepare a CPU for idle from an RCU perspective.  The first major task
  * is to sense whether nohz mode has been enabled or disabled via sysfs.
  * The second major task is to check to see if a non-lazy callback has
  * arrived at a CPU that previously had only lazy callbacks.  The third
  * major task is to accelerate (that is, assign grace-period numbers to)
  * any recently arrived callbacks.
  *
  * The caller must have disabled interrupts.
  */
 static void rcu_prepare_for_idle(int cpu)
 {
 	struct rcu_data *rdp;
 	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 	struct rcu_node *rnp;
 	struct rcu_state *rsp;
 	int tne;
 	/* Handle nohz enablement switches conservatively. */
 	tne = ACCESS_ONCE(tick_nohz_enabled);
 	if (tne != rdtp->tick_nohz_enabled_snap) {
 		if (rcu_cpu_has_callbacks(cpu, NULL))
 			invoke_rcu_core(); /* force nohz to see update. */
 		rdtp->tick_nohz_enabled_snap = tne;
 		return;
 	}
 	if (!tne)
 		return;
 	/* If this is a no-CBs CPU, no callbacks, just return. */
 	if (rcu_is_nocb_cpu(cpu))
 		return;
 	/*
 	 * If a non-lazy callback arrived at a CPU having only lazy
 	 * callbacks, invoke RCU core for the side-effect of recalculating
 	 * idle duration on re-entry to idle.
 	 */
 	if (rdtp->all_lazy &&
 	    rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
 		invoke_rcu_core();
 		return;
 	}
 	/*
 	 * If we have not yet accelerated this jiffy, accelerate all
 	 * callbacks on this CPU.
 	 */
 	if (rdtp->last_accelerate == jiffies)
 		return;
 	rdtp->last_accelerate = jiffies;
 	for_each_rcu_flavor(rsp) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
 		if (!*rdp->nxttail[RCU_DONE_TAIL])
 			continue;
 		rnp = rdp->mynode;
 		raw_spin_lock(&rnp->lock); /* irqs already disabled. */
 		rcu_accelerate_cbs(rsp, rnp, rdp);
 		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
 	}
 }
 /*
  * Clean up for exit from idle.  Attempt to advance callbacks based on
  * any grace periods that elapsed while the CPU was idle, and if any
  * callbacks are now ready to invoke, initiate invocation.
  */
 static void rcu_cleanup_after_idle(int cpu)
 {
 	struct rcu_data *rdp;
 	struct rcu_state *rsp;
 	if (rcu_is_nocb_cpu(cpu))
 		return;
 	rcu_try_advance_all_cbs();
 	for_each_rcu_flavor(rsp) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
 		if (cpu_has_callbacks_ready_to_invoke(rdp))
 			invoke_rcu_core();
 	}
 }
 /*
  * Keep a running count of the number of non-lazy callbacks posted
  * on this CPU.  This running counter (which is never decremented) allows
  * rcu_prepare_for_idle() to detect when something out of the idle loop
  * posts a callback, even if an equal number of callbacks are invoked.
  * Of course, callbacks should only be posted from within a trace event
  * designed to be called from idle or from within RCU_NONIDLE().
  */
 static void rcu_idle_count_callbacks_posted(void)
 {
 	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
 }
 /*
  * Data for flushing lazy RCU callbacks at OOM time.
  */
 static atomic_t oom_callback_count;
 static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
 /*
  * RCU OOM callback -- decrement the outstanding count and deliver the
  * wake-up if we are the last one.
  */
 static void rcu_oom_callback(struct rcu_head *rhp)
 {
 	if (atomic_dec_and_test(&oom_callback_count))
 		wake_up(&oom_callback_wq);
 }
 /*
  * Post an rcu_oom_notify callback on the current CPU if it has at
  * least one lazy callback.  This will unnecessarily post callbacks
  * to CPUs that already have a non-lazy callback at the end of their
  * callback list, but this is an infrequent operation, so accept some
  * extra overhead to keep things simple.
  */
 static void rcu_oom_notify_cpu(void *unused)
 {
 	struct rcu_state *rsp;
 	struct rcu_data *rdp;
 	for_each_rcu_flavor(rsp) {
 		rdp = __this_cpu_ptr(rsp->rda);
 		if (rdp->qlen_lazy != 0) {
 			atomic_inc(&oom_callback_count);
 			rsp->call(&rdp->oom_head, rcu_oom_callback);
 		}
 	}
 }
 /*
  * If low on memory, ensure that each CPU has a non-lazy callback.
  * This will wake up CPUs that have only lazy callbacks, in turn
  * ensuring that they free up the corresponding memory in a timely manner.
  * Because an uncertain amount of memory will be freed in some uncertain
  * timeframe, we do not claim to have freed anything.
  */
 static int rcu_oom_notify(struct notifier_block *self,
 			  unsigned long notused, void *nfreed)
 {
 	int cpu;
 	/* Wait for callbacks from earlier instance to complete. */
 	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
 	/*
 	 * Prevent premature wakeup: ensure that all increments happen
 	 * before there is a chance of the counter reaching zero.
 	 */
 	atomic_set(&oom_callback_count, 1);
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
 		cond_resched();
 	}
 	put_online_cpus();
 	/* Unconditionally decrement: no need to wake ourselves up. */
 	atomic_dec(&oom_callback_count);
 	return NOTIFY_OK;
 }
 static struct notifier_block rcu_oom_nb = {
 	.notifier_call = rcu_oom_notify
 };
 static int __init rcu_register_oom_notifier(void)
 {
 	register_oom_notifier(&rcu_oom_nb);
 	return 0;
 }
 early_initcall(rcu_register_oom_notifier);
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 #ifdef CONFIG_RCU_CPU_STALL_INFO
 #ifdef CONFIG_RCU_FAST_NO_HZ
 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
 {
 	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 	unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
 	sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
 		rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
 		ulong2long(nlpd),
 		rdtp->all_lazy ? 'L' : '.',
 		rdtp->tick_nohz_enabled_snap ? '.' : 'D');
 }
 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
 {
 	*cp = '\0';
 }
 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
 /* Initiate the stall-info list. */
 static void print_cpu_stall_info_begin(void)
 {
 	pr_cont("\n");
 }
 /*
  * Print out diagnostic information for the specified stalled CPU.
  *
  * If the specified CPU is aware of the current RCU grace period
  * (flavor specified by rsp), then print the number of scheduling
  * clock interrupts the CPU has taken during the time that it has
  * been aware.  Otherwise, print the number of RCU grace periods
  * that this CPU is ignorant of, for example, "1" if the CPU was
  * aware of the previous grace period.
  *
  * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
  */
 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 {
 	char fast_no_hz[72];
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_dynticks *rdtp = rdp->dynticks;
 	char *ticks_title;
 	unsigned long ticks_value;
 	if (rsp->gpnum == rdp->gpnum) {
 		ticks_title = "ticks this GP";
 		ticks_value = rdp->ticks_this_gp;
 	} else {
 		ticks_title = "GPs behind";
 		ticks_value = rsp->gpnum - rdp->gpnum;
 	}
 	print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
 	pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
 	       cpu, ticks_value, ticks_title,
 	       atomic_read(&rdtp->dynticks) & 0xfff,
 	       rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
 	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
 	       fast_no_hz);
 }
 /* Terminate the stall-info list. */
 static void print_cpu_stall_info_end(void)
 {
 	pr_err("\t");
 }
 /* Zero ->ticks_this_gp for all flavors of RCU. */
 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
 {
 	rdp->ticks_this_gp = 0;
 	rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
 }
 /* Increment ->ticks_this_gp for all flavors of RCU. */
 static void increment_cpu_stall_ticks(void)
 {
 	struct rcu_state *rsp;
 	for_each_rcu_flavor(rsp)
 		__this_cpu_ptr(rsp->rda)->ticks_this_gp++;
 }
 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
 static void print_cpu_stall_info_begin(void)
 {
 	pr_cont(" {");
 }
 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
 {
 	pr_cont(" %d", cpu);
 }
 static void print_cpu_stall_info_end(void)
 {
 	pr_cont("} ");
 }
 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
 {
 }
 static void increment_cpu_stall_ticks(void)
 {
 }
 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
 #ifdef CONFIG_RCU_NOCB_CPU
 /*
  * Offload callback processing from the boot-time-specified set of CPUs
  * specified by rcu_nocb_mask.  For each CPU in the set, there is a
  * kthread created that pulls the callbacks from the corresponding CPU,
  * waits for a grace period to elapse, and invokes the callbacks.
  * The no-CBs CPUs do a wake_up() on their kthread when they insert
  * a callback into any empty list, unless the rcu_nocb_poll boot parameter
  * has been specified, in which case each kthread actively polls its
  * CPU.  (Which isn't so great for energy efficiency, but which does
  * reduce RCU's overhead on that CPU.)
  *
  * This is intended to be used in conjunction with Frederic Weisbecker's
  * adaptive-idle work, which would seriously reduce OS jitter on CPUs
  * running CPU-bound user-mode computations.
  *
  * Offloading of callback processing could also in theory be used as
  * an energy-efficiency measure because CPUs with no RCU callbacks
  * queued are more aggressive about entering dyntick-idle mode.
  */
 /* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
 static int __init rcu_nocb_setup(char *str)
 {
 	alloc_bootmem_cpumask_var(&rcu_nocb_mask);
 	have_rcu_nocb_mask = true;
 	cpulist_parse(str, rcu_nocb_mask);
 	return 1;
 }
 __setup("rcu_nocbs=", rcu_nocb_setup);
 static int __init parse_rcu_nocb_poll(char *arg)
 {
 	rcu_nocb_poll = 1;
 	return 0;
 }
 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
 /*
  * Do any no-CBs CPUs need another grace period?
  *
  * Interrupts must be disabled.  If the caller does not hold the root
  * rnp_node structure's ->lock, the results are advisory only.
  */
 static int rcu_nocb_needs_gp(struct rcu_state *rsp)
 {
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
 }
 /*
  * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
  * grace period.
  */
 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
 	wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
 }
 /*
  * Set the root rcu_node structure's ->need_future_gp field
  * based on the sum of those of all rcu_node structures.  This does
  * double-count the root rcu_node structure's requests, but this
  * is necessary to handle the possibility of a rcu_nocb_kthread()
  * having awakened during the time that the rcu_node structures
  * were being updated for the end of the previous grace period.
  */
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
 {
 	rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
 }
 static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
 	init_waitqueue_head(&rnp->nocb_gp_wq[0]);
 	init_waitqueue_head(&rnp->nocb_gp_wq[1]);
 }
 /* Is the specified CPU a no-CPUs CPU? */
 bool rcu_is_nocb_cpu(int cpu)
 {
 	if (have_rcu_nocb_mask)
 		return cpumask_test_cpu(cpu, rcu_nocb_mask);
 	return false;
 }
 /*
  * Enqueue the specified string of rcu_head structures onto the specified
  * CPU's no-CBs lists.  The CPU is specified by rdp, the head of the
  * string by rhp, and the tail of the string by rhtp.  The non-lazy/lazy
  * counts are supplied by rhcount and rhcount_lazy.
  *
  * If warranted, also wake up the kthread servicing this CPUs queues.
  */
 static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
 				    struct rcu_head *rhp,
 				    struct rcu_head **rhtp,
 				    int rhcount, int rhcount_lazy)
 {
 	int len;
 	struct rcu_head **old_rhpp;
 	struct task_struct *t;
 	/* Enqueue the callback on the nocb list and update counts. */
 	old_rhpp = xchg(&rdp->nocb_tail, rhtp);
 	ACCESS_ONCE(*old_rhpp) = rhp;
 	atomic_long_add(rhcount, &rdp->nocb_q_count);
 	atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
 	/* If we are not being polled and there is a kthread, awaken it ... */
 	t = ACCESS_ONCE(rdp->nocb_kthread);
 	if (rcu_nocb_poll | !t)
 		return;
 	len = atomic_long_read(&rdp->nocb_q_count);
 	if (old_rhpp == &rdp->nocb_head) {
 		wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
 		rdp->qlen_last_fqs_check = 0;
 	} else if (len > rdp->qlen_last_fqs_check + qhimark) {
 		wake_up_process(t); /* ... or if many callbacks queued. */
 		rdp->qlen_last_fqs_check = LONG_MAX / 2;
 	}
 	return;
 }
 /*
  * This is a helper for __call_rcu(), which invokes this when the normal
  * callback queue is inoperable.  If this is not a no-CBs CPU, this
  * function returns failure back to __call_rcu(), which can complain
  * appropriately.
  *
  * Otherwise, this function queues the callback where the corresponding
  * "rcuo" kthread can find it.
  */
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy)
 {
 	if (!rcu_is_nocb_cpu(rdp->cpu))
 		return 0;
 	__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
 	if (__is_kfree_rcu_offset((unsigned long)rhp->func))
 		trace_rcu_kfree_callback(rdp->rsp->name, rhp,
 					 (unsigned long)rhp->func,
 					 rdp->qlen_lazy, rdp->qlen);
 	else
 		trace_rcu_callback(rdp->rsp->name, rhp,
 				   rdp->qlen_lazy, rdp->qlen);
 	return 1;
 }
 /*
  * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
  * not a no-CBs CPU.
  */
 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 						     struct rcu_data *rdp)
 {
 	long ql = rsp->qlen;
 	long qll = rsp->qlen_lazy;
 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */
 	if (!rcu_is_nocb_cpu(smp_processor_id()))
 		return 0;
 	rsp->qlen = 0;
 	rsp->qlen_lazy = 0;
 	/* First, enqueue the donelist, if any.  This preserves CB ordering. */
 	if (rsp->orphan_donelist != NULL) {
 		__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
 					rsp->orphan_donetail, ql, qll);
 		ql = qll = 0;
 		rsp->orphan_donelist = NULL;
 		rsp->orphan_donetail = &rsp->orphan_donelist;
 	}
 	if (rsp->orphan_nxtlist != NULL) {
 		__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
 					rsp->orphan_nxttail, ql, qll);
 		ql = qll = 0;
 		rsp->orphan_nxtlist = NULL;
 		rsp->orphan_nxttail = &rsp->orphan_nxtlist;
 	}
 	return 1;
 }
 /*
  * If necessary, kick off a new grace period, and either way wait
  * for a subsequent grace period to complete.
  */
 static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 {
 	unsigned long c;
 	bool d;
 	unsigned long flags;
 	struct rcu_node *rnp = rdp->mynode;
 	raw_spin_lock_irqsave(&rnp->lock, flags);
 	c = rcu_start_future_gp(rnp, rdp);
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	/*
 	 * Wait for the grace period.  Do so interruptibly to avoid messing
 	 * up the load average.
 	 */
 	trace_rcu_future_gp(rnp, rdp, c, "StartWait");
 	for (;;) {
 		wait_event_interruptible(
 			rnp->nocb_gp_wq[c & 0x1],
 			(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
 		if (likely(d))
 			break;
 		flush_signals(current);
 		trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");
 	}
 	trace_rcu_future_gp(rnp, rdp, c, "EndWait");
 	smp_mb(); /* Ensure that CB invocation happens after GP end. */
 }
 /*
  * Per-rcu_data kthread, but only for no-CBs CPUs.  Each kthread invokes
  * callbacks queued by the corresponding no-CBs CPU.
  */
 static int rcu_nocb_kthread(void *arg)
 {
 	int c, cl;
 	struct rcu_head *list;
 	struct rcu_head *next;
 	struct rcu_head **tail;
 	struct rcu_data *rdp = arg;
 	/* Each pass through this loop invokes one batch of callbacks */
 	for (;;) {
 		/* If not polling, wait for next batch of callbacks. */
 		if (!rcu_nocb_poll)
 			wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
 		list = ACCESS_ONCE(rdp->nocb_head);
 		if (!list) {
 			schedule_timeout_interruptible(1);
 			flush_signals(current);
 			continue;
 		}
 		/*
 		 * Extract queued callbacks, update counts, and wait
 		 * for a grace period to elapse.
 		 */
 		ACCESS_ONCE(rdp->nocb_head) = NULL;
 		tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
 		c = atomic_long_xchg(&rdp->nocb_q_count, 0);
 		cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
 		ACCESS_ONCE(rdp->nocb_p_count) += c;
 		ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
 		rcu_nocb_wait_gp(rdp);
 		/* Each pass through the following loop invokes a callback. */
 		trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
 		c = cl = 0;
 		while (list) {
 			next = list->next;
 			/* Wait for enqueuing to complete, if needed. */
 			while (next == NULL && &list->next != tail) {
 				schedule_timeout_interruptible(1);
 				next = list->next;
 			}
 			debug_rcu_head_unqueue(list);
 			local_bh_disable();
 			if (__rcu_reclaim(rdp->rsp->name, list))
 				cl++;
 			c++;
 			local_bh_enable();
 			list = next;
 		}
 		trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
 		ACCESS_ONCE(rdp->nocb_p_count) -= c;
 		ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
 		rdp->n_nocbs_invoked += c;
 	}
 	return 0;
 }
 /* Initialize per-rcu_data variables for no-CBs CPUs. */
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
 	rdp->nocb_tail = &rdp->nocb_head;
 	init_waitqueue_head(&rdp->nocb_wq);
 }
 /* Create a kthread for each RCU flavor for each no-CBs CPU. */
 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
 {
 	int cpu;
 	struct rcu_data *rdp;
 	struct task_struct *t;
 	if (rcu_nocb_mask == NULL)
 		return;
 	for_each_cpu(cpu, rcu_nocb_mask) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
 		t = kthread_run(rcu_nocb_kthread, rdp,
 				"rcuo%c/%d", rsp->abbr, cpu);
 		BUG_ON(IS_ERR(t));
 		ACCESS_ONCE(rdp->nocb_kthread) = t;
 	}
 }
 /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
 static bool init_nocb_callback_list(struct rcu_data *rdp)
 {
 	if (rcu_nocb_mask == NULL ||
 	    !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
 		return false;
 	rdp->nxttail[RCU_NEXT_TAIL] = NULL;
 	return true;
 }
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
 static int rcu_nocb_needs_gp(struct rcu_state *rsp)
 {
 	return 0;
 }
 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
 }
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
 {
 }
 static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
 }
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy)
 {
 	return 0;
 }
 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 						     struct rcu_data *rdp)
 {
 	return 0;
 }
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
 }
 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
 {
 }
 static bool init_nocb_callback_list(struct rcu_data *rdp)
 {
 	return false;
 }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 /*
  * An adaptive-ticks CPU can potentially execute in kernel mode for an
  * arbitrarily long period of time with the scheduling-clock tick turned
  * off.  RCU will be paying attention to this CPU because it is in the
  * kernel, but the CPU cannot be guaranteed to be executing the RCU state
  * machine because the scheduling-clock tick has been disabled.  Therefore,
  * if an adaptive-ticks CPU is failing to respond to the current grace
  * period and has not be idle from an RCU perspective, kick it.
  */
 static void rcu_kick_nohz_cpu(int cpu)
 {
 #ifdef CONFIG_NO_HZ_FULL
 	if (tick_nohz_full_cpu(cpu))
 		smp_send_reschedule(cpu);
 #endif /* #ifdef CONFIG_NO_HZ_FULL */
 }