psci_checker.c 12.7 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491


/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * Copyright (C) 2016 ARM Limited
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/atomic.h>
#include <linux/completion.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <uapi/linux/sched/types.h>
#include <linux/module.h>
#include <linux/preempt.h>
#include <linux/psci.h>
#include <linux/slab.h>
#include <linux/tick.h>
#include <linux/topology.h>

#include <asm/cpuidle.h>

#include <uapi/linux/psci.h>

#define NUM_SUSPEND_CYCLE (10)

static unsigned int nb_available_cpus;
static int tos_resident_cpu = -1;

static atomic_t nb_active_threads;
static struct completion suspend_threads_started =
	COMPLETION_INITIALIZER(suspend_threads_started);
static struct completion suspend_threads_done =
	COMPLETION_INITIALIZER(suspend_threads_done);

/*
 * We assume that PSCI operations are used if they are available. This is not
 * necessarily true on arm64, since the decision is based on the
 * "enable-method" property of each CPU in the DT, but given that there is no
 * arch-specific way to check this, we assume that the DT is sensible.
 */
static int psci_ops_check(void)
{
	int migrate_type = -1;
	int cpu;

	if (!(psci_ops.cpu_off && psci_ops.cpu_on && psci_ops.cpu_suspend)) {
		pr_warn("Missing PSCI operations, aborting tests\n");
		return -EOPNOTSUPP;
	}

	if (psci_ops.migrate_info_type)
		migrate_type = psci_ops.migrate_info_type();

	if (migrate_type == PSCI_0_2_TOS_UP_MIGRATE ||
	    migrate_type == PSCI_0_2_TOS_UP_NO_MIGRATE) {
		/* There is a UP Trusted OS, find on which core it resides. */
		for_each_online_cpu(cpu)
			if (psci_tos_resident_on(cpu)) {
				tos_resident_cpu = cpu;
				break;
			}
		if (tos_resident_cpu == -1)
			pr_warn("UP Trusted OS resides on no online CPU\n");
	}

	return 0;
}

static int find_clusters(const struct cpumask *cpus,
			 const struct cpumask **clusters)
{
	unsigned int nb = 0;
	cpumask_var_t tmp;

	if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
		return -ENOMEM;
	cpumask_copy(tmp, cpus);

	while (!cpumask_empty(tmp)) {
		const struct cpumask *cluster =
			topology_core_cpumask(cpumask_any(tmp));

		clusters[nb++] = cluster;
		cpumask_andnot(tmp, tmp, cluster);
	}

	free_cpumask_var(tmp);
	return nb;
}

/*
 * offlined_cpus is a temporary array but passing it as an argument avoids
 * multiple allocations.
 */
static unsigned int down_and_up_cpus(const struct cpumask *cpus,
				     struct cpumask *offlined_cpus)
{
	int cpu;
	int err = 0;

	cpumask_clear(offlined_cpus);

	/* Try to power down all CPUs in the mask. */
	for_each_cpu(cpu, cpus) {
		int ret = cpu_down(cpu);

		/*
		 * cpu_down() checks the number of online CPUs before the TOS
		 * resident CPU.
		 */
		if (cpumask_weight(offlined_cpus) + 1 == nb_available_cpus) {
			if (ret != -EBUSY) {
				pr_err("Unexpected return code %d while trying "
				       "to power down last online CPU %d\n",
				       ret, cpu);
				++err;
			}
		} else if (cpu == tos_resident_cpu) {
			if (ret != -EPERM) {
				pr_err("Unexpected return code %d while trying "
				       "to power down TOS resident CPU %d\n",
				       ret, cpu);
				++err;
			}
		} else if (ret != 0) {
			pr_err("Error occurred (%d) while trying "
			       "to power down CPU %d\n", ret, cpu);
			++err;
		}

		if (ret == 0)
			cpumask_set_cpu(cpu, offlined_cpus);
	}

	/* Try to power up all the CPUs that have been offlined. */
	for_each_cpu(cpu, offlined_cpus) {
		int ret = cpu_up(cpu);

		if (ret != 0) {
			pr_err("Error occurred (%d) while trying "
			       "to power up CPU %d\n", ret, cpu);
			++err;
		} else {
			cpumask_clear_cpu(cpu, offlined_cpus);
		}
	}

	/*
	 * Something went bad at some point and some CPUs could not be turned
	 * back on.
	 */
	WARN_ON(!cpumask_empty(offlined_cpus) ||
		num_online_cpus() != nb_available_cpus);

	return err;
}

static int hotplug_tests(void)
{
	int err;
	cpumask_var_t offlined_cpus;
	int i, nb_cluster;
	const struct cpumask **clusters;
	char *page_buf;

	err = -ENOMEM;
	if (!alloc_cpumask_var(&offlined_cpus, GFP_KERNEL))
		return err;
	/* We may have up to nb_available_cpus clusters. */
	clusters = kmalloc_array(nb_available_cpus, sizeof(*clusters),
				 GFP_KERNEL);
	if (!clusters)
		goto out_free_cpus;
	page_buf = (char *)__get_free_page(GFP_KERNEL);
	if (!page_buf)
		goto out_free_clusters;

	err = 0;
	nb_cluster = find_clusters(cpu_online_mask, clusters);

	/*
	 * Of course the last CPU cannot be powered down and cpu_down() should
	 * refuse doing that.
	 */
	pr_info("Trying to turn off and on again all CPUs\n");
	err += down_and_up_cpus(cpu_online_mask, offlined_cpus);

	/*
	 * Take down CPUs by cluster this time. When the last CPU is turned
	 * off, the cluster itself should shut down.
	 */
	for (i = 0; i < nb_cluster; ++i) {
		int cluster_id =
			topology_physical_package_id(cpumask_any(clusters[i]));
		ssize_t len = cpumap_print_to_pagebuf(true, page_buf,
						      clusters[i]);
		/* Remove trailing newline. */
		page_buf[len - 1] = '\0';
		pr_info("Trying to turn off and on again cluster %d "
			"(CPUs %s)\n", cluster_id, page_buf);
		err += down_and_up_cpus(clusters[i], offlined_cpus);
	}

	free_page((unsigned long)page_buf);
out_free_clusters:
	kfree(clusters);
out_free_cpus:
	free_cpumask_var(offlined_cpus);
	return err;
}

static void dummy_callback(unsigned long ignored) {}

static int suspend_cpu(int index, bool broadcast)
{
	int ret;

	arch_cpu_idle_enter();

	if (broadcast) {
		/*
		 * The local timer will be shut down, we need to enter tick
		 * broadcast.
		 */
		ret = tick_broadcast_enter();
		if (ret) {
			/*
			 * In the absence of hardware broadcast mechanism,
			 * this CPU might be used to broadcast wakeups, which
			 * may be why entering tick broadcast has failed.
			 * There is little the kernel can do to work around
			 * that, so enter WFI instead (idle state 0).
			 */
			cpu_do_idle();
			ret = 0;
			goto out_arch_exit;
		}
	}

	/*
	 * Replicate the common ARM cpuidle enter function
	 * (arm_enter_idle_state).
	 */
	ret = CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, index);

	if (broadcast)
		tick_broadcast_exit();

out_arch_exit:
	arch_cpu_idle_exit();

	return ret;
}

static int suspend_test_thread(void *arg)
{
	int cpu = (long)arg;
	int i, nb_suspend = 0, nb_shallow_sleep = 0, nb_err = 0;
	struct sched_param sched_priority = { .sched_priority = MAX_RT_PRIO-1 };
	struct cpuidle_device *dev;
	struct cpuidle_driver *drv;
	/* No need for an actual callback, we just want to wake up the CPU. */
	struct timer_list wakeup_timer;

	/* Wait for the main thread to give the start signal. */
	wait_for_completion(&suspend_threads_started);

	/* Set maximum priority to preempt all other threads on this CPU. */
	if (sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_priority))
		pr_warn("Failed to set suspend thread scheduler on CPU %d\n",
			cpu);

	dev = this_cpu_read(cpuidle_devices);
	drv = cpuidle_get_cpu_driver(dev);

	pr_info("CPU %d entering suspend cycles, states 1 through %d\n",
		cpu, drv->state_count - 1);

	setup_timer_on_stack(&wakeup_timer, dummy_callback, 0);
	for (i = 0; i < NUM_SUSPEND_CYCLE; ++i) {
		int index;
		/*
		 * Test all possible states, except 0 (which is usually WFI and
		 * doesn't use PSCI).
		 */
		for (index = 1; index < drv->state_count; ++index) {
			struct cpuidle_state *state = &drv->states[index];
			bool broadcast = state->flags & CPUIDLE_FLAG_TIMER_STOP;
			int ret;

			/*
			 * Set the timer to wake this CPU up in some time (which
			 * should be largely sufficient for entering suspend).
			 * If the local tick is disabled when entering suspend,
			 * suspend_cpu() takes care of switching to a broadcast
			 * tick, so the timer will still wake us up.
			 */
			mod_timer(&wakeup_timer, jiffies +
				  usecs_to_jiffies(state->target_residency));

			/* IRQs must be disabled during suspend operations. */
			local_irq_disable();

			ret = suspend_cpu(index, broadcast);

			/*
			 * We have woken up. Re-enable IRQs to handle any
			 * pending interrupt, do not wait until the end of the
			 * loop.
			 */
			local_irq_enable();

			if (ret == index) {
				++nb_suspend;
			} else if (ret >= 0) {
				/* We did not enter the expected state. */
				++nb_shallow_sleep;
			} else {
				pr_err("Failed to suspend CPU %d: error %d "
				       "(requested state %d, cycle %d)\n",
				       cpu, ret, index, i);
				++nb_err;
			}
		}
	}

	/*
	 * Disable the timer to make sure that the timer will not trigger
	 * later.
	 */
	del_timer(&wakeup_timer);

	if (atomic_dec_return_relaxed(&nb_active_threads) == 0)
		complete(&suspend_threads_done);

	/* Give up on RT scheduling and wait for termination. */
	sched_priority.sched_priority = 0;
	if (sched_setscheduler_nocheck(current, SCHED_NORMAL, &sched_priority))
		pr_warn("Failed to set suspend thread scheduler on CPU %d\n",
			cpu);
	for (;;) {
		/* Needs to be set first to avoid missing a wakeup. */
		set_current_state(TASK_INTERRUPTIBLE);
		if (kthread_should_stop()) {
			__set_current_state(TASK_RUNNING);
			break;
		}
		schedule();
	}

	pr_info("CPU %d suspend test results: success %d, shallow states %d, errors %d\n",
		cpu, nb_suspend, nb_shallow_sleep, nb_err);

	return nb_err;
}

static int suspend_tests(void)
{
	int i, cpu, err = 0;
	struct task_struct **threads;
	int nb_threads = 0;

	threads = kmalloc_array(nb_available_cpus, sizeof(*threads),
				GFP_KERNEL);
	if (!threads)
		return -ENOMEM;

	/*
	 * Stop cpuidle to prevent the idle tasks from entering a deep sleep
	 * mode, as it might interfere with the suspend threads on other CPUs.
	 * This does not prevent the suspend threads from using cpuidle (only
	 * the idle tasks check this status). Take the idle lock so that
	 * the cpuidle driver and device look-up can be carried out safely.
	 */
	cpuidle_pause_and_lock();

	for_each_online_cpu(cpu) {
		struct task_struct *thread;
		/* Check that cpuidle is available on that CPU. */
		struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
		struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);

		if (!dev || !drv) {
			pr_warn("cpuidle not available on CPU %d, ignoring\n",
				cpu);
			continue;
		}

		thread = kthread_create_on_cpu(suspend_test_thread,
					       (void *)(long)cpu, cpu,
					       "psci_suspend_test");
		if (IS_ERR(thread))
			pr_err("Failed to create kthread on CPU %d\n", cpu);
		else
			threads[nb_threads++] = thread;
	}

	if (nb_threads < 1) {
		err = -ENODEV;
		goto out;
	}

	atomic_set(&nb_active_threads, nb_threads);

	/*
	 * Wake up the suspend threads. To avoid the main thread being preempted
	 * before all the threads have been unparked, the suspend threads will
	 * wait for the completion of suspend_threads_started.
	 */
	for (i = 0; i < nb_threads; ++i)
		wake_up_process(threads[i]);
	complete_all(&suspend_threads_started);

	wait_for_completion(&suspend_threads_done);


	/* Stop and destroy all threads, get return status. */
	for (i = 0; i < nb_threads; ++i)
		err += kthread_stop(threads[i]);
 out:
	cpuidle_resume_and_unlock();
	kfree(threads);
	return err;
}

static int __init psci_checker(void)
{
	int ret;

	/*
	 * Since we're in an initcall, we assume that all the CPUs that all
	 * CPUs that can be onlined have been onlined.
	 *
	 * The tests assume that hotplug is enabled but nobody else is using it,
	 * otherwise the results will be unpredictable. However, since there
	 * is no userspace yet in initcalls, that should be fine, as long as
	 * no torture test is running at the same time (see Kconfig).
	 */
	nb_available_cpus = num_online_cpus();

	/* Check PSCI operations are set up and working. */
	ret = psci_ops_check();
	if (ret)
		return ret;

	pr_info("PSCI checker started using %u CPUs\n", nb_available_cpus);

	pr_info("Starting hotplug tests\n");
	ret = hotplug_tests();
	if (ret == 0)
		pr_info("Hotplug tests passed OK\n");
	else if (ret > 0)
		pr_err("%d error(s) encountered in hotplug tests\n", ret);
	else {
		pr_err("Out of memory\n");
		return ret;
	}

	pr_info("Starting suspend tests (%d cycles per state)\n",
		NUM_SUSPEND_CYCLE);
	ret = suspend_tests();
	if (ret == 0)
		pr_info("Suspend tests passed OK\n");
	else if (ret > 0)
		pr_err("%d error(s) encountered in suspend tests\n", ret);
	else {
		switch (ret) {
		case -ENOMEM:
			pr_err("Out of memory\n");
			break;
		case -ENODEV:
			pr_warn("Could not start suspend tests on any CPU\n");
			break;
		}
	}

	pr_info("PSCI checker completed\n");
	return ret < 0 ? ret : 0;
}
late_initcall(psci_checker);